From f8acb63a63ad50566dda21cd24f1943e40f0eb85 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Mon, 20 Nov 2023 11:47:43 +1100 Subject: [PATCH 01/34] Billing pre-release (#605) (#617) * Bill 112 (#583) * START: add billing home page and data page * FIX: fixed style in the nav. Everythign but search bar * DONE: menu and billing pages done * REMOVE: remove redundant code * User Story 1.3: As a user, I want to sort costs by budget and by the percentage of budget used * First version of Current Billing Cost page. * Added dropdown to select grouping by Project, Topic or Dataset. * [BIL-39] User Story 2.3: As a user, I want to view costs in a time series with daily granularity in the billing dashboard (#588) * Added gcp-projects API to billing, new Billing CostByTime page * Updated StackedAreaByDateChart wiht more custom properties. * Bill 150 (#589) * API: add api changes to allow the get running cost query to filter using invoice month * IN PROGRESS: trying to cache the call to the seqr prop map API endpoint. UI cleaned up. * RM: console.log * Extended bq looks back time to 300 days as we do not have much loaded in the dev table. (#591) * REFACOR: move data loading logic into the main page to be used by all charts * DONE: data table on time view page complete * Added gcp-projects API to billing, new Billing CostByTime page * Updated StackedAreaByDateChart with more custom properties. * First version of Bar and Donut charts. * Upgrading babel / vulnerability. * Bil 242 - Hide billing pages when env variables aren't set (#600) * DONE: menu and billing pages done * REMOVE: remove redundant code * API: add api changes to allow the get running cost query to filter using invoice month * FIX: Dropdown to FieldSelector in BillingInvoiceMonthCost * FIX: all pages and navigation/links working * FIX: fixed all of the navigate() issues now all links work * IN PROGRESS: trying to cache the call to the seqr prop map API endpoint. UI cleaned up. * LINT: fix typing issues in API * RM: console.log * Extended bq looks back time to 300 days as we do not have much loaded in the dev table. (#591) * Billing cost by time data refactor (#592) * IN PROGRESS: data table on over time cost page * DONE: data table on time view page complete * UPDATE: frontend now checks if the the billing endpoint is returning an OK status or not. Hides billing pages on any failed (not 200) status --------- * Added gcp-projects API to billing, new Billing CostByTime page * Updated StackedAreaByDateChart with more custom properties. * First version of Bar and Dount charts. * Upgrading babel / vulnerability. * Billing - show 24H table fields only to the current invoice month (#604) * Fixing last 24H billing calculations. * Limit Budget % and 24H to only latest month. * Pick only the latest monthly budget row per gcp_project. * Added Last 24H UTC day to the table header. * Removing unused job_config. * Fixing docker image building issues. * Getting gcp_project data from gcp_billing view instead of aggregated view. (#606) * Small fix to enable query cost by ar-guid, added example to API docs. (#611) --------- Co-authored-by: Sabrina Yan <9669990+violetbrina@users.noreply.github.com> --- api/routes/billing.py | 92 ++- api/settings.py | 12 +- api/utils/dates.py | 44 +- db/python/layers/billing.py | 579 +++++++++++++++++- models/models/__init__.py | 2 + models/models/billing.py | 72 ++- web/package-lock.json | 213 +++++-- web/package.json | 3 + web/src/Routes.tsx | 53 +- web/src/index.css | 3 + web/src/pages/billing/Billing.css | 15 + web/src/pages/billing/BillingCostByTime.tsx | 317 ++++++++++ web/src/pages/billing/BillingDashboard.tsx | 42 -- web/src/pages/billing/BillingHome.tsx | 19 + .../pages/billing/BillingInvoiceMonthCost.tsx | 377 ++++++++++++ web/src/pages/billing/BillingSeqrProp.tsx | 65 ++ .../components/BillingCostByTimeTable.tsx | 248 ++++++++ .../billing/components/CostByTimeChart.tsx | 54 ++ .../billing/components/FieldSelector.tsx | 143 +++++ .../SeqrProportionalMapGraph.tsx | 83 ++- web/src/pages/billing/index.ts | 4 + web/src/shared/components/Graphs/BarChart.tsx | 126 ++++ .../shared/components/Graphs/DonutChart.tsx | 214 +++++++ .../Graphs/StackedAreaByDateChart.tsx | 61 +- .../DarkModeTriButton/DarkModeTriButton.css | 5 +- web/src/shared/components/Header/NavBar.css | 80 ++- web/src/shared/components/Header/NavBar.tsx | 230 +++++-- web/src/shared/components/Header/Search.css | 3 +- .../shared/components/ResponsiveContainer.tsx | 198 ++++++ web/src/shared/utilities/fieldName.ts | 10 + web/src/shared/utilities/formatMoney.ts | 3 + web/src/shared/utilities/orderBy.ts | 19 + 32 files changed, 3110 insertions(+), 279 deletions(-) create mode 100644 web/src/pages/billing/Billing.css create mode 100644 web/src/pages/billing/BillingCostByTime.tsx delete mode 100644 web/src/pages/billing/BillingDashboard.tsx create mode 100644 web/src/pages/billing/BillingHome.tsx create mode 100644 web/src/pages/billing/BillingInvoiceMonthCost.tsx create mode 100644 web/src/pages/billing/BillingSeqrProp.tsx create mode 100644 web/src/pages/billing/components/BillingCostByTimeTable.tsx create mode 100644 web/src/pages/billing/components/CostByTimeChart.tsx create mode 100644 web/src/pages/billing/components/FieldSelector.tsx rename web/src/pages/billing/{ => components}/SeqrProportionalMapGraph.tsx (76%) create mode 100644 web/src/pages/billing/index.ts create mode 100644 web/src/shared/components/Graphs/BarChart.tsx create mode 100644 web/src/shared/components/Graphs/DonutChart.tsx create mode 100644 web/src/shared/components/ResponsiveContainer.tsx create mode 100644 web/src/shared/utilities/fieldName.ts create mode 100644 web/src/shared/utilities/formatMoney.ts create mode 100644 web/src/shared/utilities/orderBy.ts diff --git a/api/routes/billing.py b/api/routes/billing.py index 9a7b3ea35..bdc0d8b52 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -11,6 +11,8 @@ ) from db.python.layers.billing import BillingLayer from models.models.billing import ( + BillingColumn, + BillingCostBudgetRecord, BillingQueryModel, BillingRowRecord, BillingTotalCostRecord, @@ -21,6 +23,22 @@ router = APIRouter(prefix='/billing', tags=['billing']) +@router.get( + '/gcp-projects', + response_model=list[str], + operation_id='getGcpProjects', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_gcp_projects( + author: str = get_author, +) -> list[str]: + """Get list of all GCP projects in database""" + connection = BqConnection(author) + billing_layer = BillingLayer(connection) + records = await billing_layer.get_gcp_projects() + return records + + @router.get( '/topics', response_model=list[str], @@ -95,7 +113,7 @@ async def get_datasets( @router.get( - '/sequencing_types', + '/sequencing-types', response_model=list[str], operation_id='getSequencingTypes', ) @@ -133,7 +151,7 @@ async def get_stages( @router.get( - '/sequencing_groups', + '/sequencing-groups', response_model=list[str], operation_id='getSequencingGroups', ) @@ -151,6 +169,25 @@ async def get_sequencing_groups( return records +@router.get( + '/invoice-months', + response_model=list[str], + operation_id='getInvoiceMonths', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_invoice_months( + author: str = get_author, +) -> list[str]: + """ + Get list of all invoice months in database + Results are sorted DESC + """ + connection = BqConnection(author) + billing_layer = BillingLayer(connection) + records = await billing_layer.get_invoice_months() + return records + + @router.post( '/query', response_model=list[BillingRowRecord], operation_id='queryBilling' ) @@ -198,7 +235,8 @@ async def get_total_cost( "fields": ["topic"], "start_date": "2023-03-01", "end_date": "2023-03-31", - "order_by": {"cost": true} + "order_by": {"cost": true}, + "source": "aggregate" } 2. Get total cost by day and topic for March 2023, order by day ASC and topic DESC: @@ -283,9 +321,57 @@ async def get_total_cost( "order_by": {"cost": true} } + 10. Get total gcp_project for month of March 2023, ordered by cost DESC: + + { + "fields": ["gcp_project"], + "start_date": "2023-03-01", + "end_date": "2023-03-31", + "order_by": {"cost": true}, + "source": "gcp_billing" + } + + 11. Get total cost by sku for given ar_guid, order by cost DESC: + + { + "fields": ["sku"], + "start_date": "2023-10-23", + "end_date": "2023-10-23", + "filters": { "ar_guid": "4e53702e-8b6c-48ea-857f-c5d33b7e72d7"}, + "order_by": {"cost": true} + } + """ connection = BqConnection(author) billing_layer = BillingLayer(connection) records = await billing_layer.get_total_cost(query) return records + + +@router.get( + '/running-cost/{field}', + response_model=list[BillingCostBudgetRecord], + operation_id='getRunningCost', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_running_costs( + field: BillingColumn, + invoice_month: str | None = None, + source: str | None = None, + author: str = get_author, +) -> list[BillingCostBudgetRecord]: + """ + Get running cost for specified fields in database + e.g. fields = ['gcp_project', 'topic'] + """ + + # TODO replace alru_cache with async-cache? + # so we can skip author for caching? + # pip install async-cache + # @AsyncTTL(time_to_live=BILLING_CACHE_RESPONSE_TTL, maxsize=1024, skip_args=2) + + connection = BqConnection(author) + billing_layer = BillingLayer(connection) + records = await billing_layer.get_running_cost(field, invoice_month, source) + return records diff --git a/api/settings.py b/api/settings.py index 949824b18..ab92c717c 100644 --- a/api/settings.py +++ b/api/settings.py @@ -37,11 +37,15 @@ SEQUENCING_GROUP_CHECKSUM_OFFSET = int(os.getenv('SM_SEQUENCINGGROUPCHECKOFFSET', '9')) # billing settings -BQ_GCP_BILLING_PROJECT = os.getenv('SM_GCP_BILLING_PROJECT') +BQ_AGGREG_VIEW = os.getenv('SM_GCP_BQ_AGGREG_VIEW') +BQ_AGGREG_RAW = os.getenv('SM_GCP_BQ_AGGREG_RAW') +BQ_AGGREG_EXT_VIEW = os.getenv('SM_GCP_BQ_AGGREG_EXT_VIEW') +BQ_BUDGET_VIEW = os.getenv('SM_GCP_BQ_BUDGET_VIEW') +BQ_GCP_BILLING_VIEW = os.getenv('SM_GCP_BQ_BILLING_VIEW') + # This is to optimise BQ queries, DEV table has data only for Mar 2023 -# TODO change to 7 days or similar before merging into DEV -BQ_DAYS_BACK_OPTIMAL = 210 -BILLING_CACHE_RESPONSE_TTL = 1800 # 30 minutes +BQ_DAYS_BACK_OPTIMAL = 30 # Look back 30 days for optimal query +BILLING_CACHE_RESPONSE_TTL = 3600 # 1 Hour def get_default_user() -> str | None: diff --git a/api/utils/dates.py b/api/utils/dates.py index 7f26704e0..2ef961f01 100644 --- a/api/utils/dates.py +++ b/api/utils/dates.py @@ -1,4 +1,6 @@ -from datetime import datetime, date +from datetime import datetime, date, timedelta + +INVOICE_DAY_DIFF = 3 def parse_date_only_string(d: str | None) -> date | None: @@ -10,3 +12,43 @@ def parse_date_only_string(d: str | None) -> date | None: return datetime.strptime(d, '%Y-%m-%d').date() except Exception as excep: raise ValueError(f'Date could not be converted: {d}') from excep + + +def get_invoice_month_range(convert_month: date) -> tuple[date, date]: + """ + Get the start and end date of the invoice month for a given date + Start and end date are used mostly for optimising BQ queries + All our BQ tables/views are partitioned by day + """ + first_day = convert_month.replace(day=1) + + # Grab the first day of invoice month then subtract INVOICE_DAY_DIFF days + start_day = first_day + timedelta(days=-INVOICE_DAY_DIFF) + + if convert_month.month == 12: + next_month = first_day.replace(month=1, year=convert_month.year + 1) + else: + next_month = first_day.replace(month=convert_month.month + 1) + + # Grab the last day of invoice month then add INVOICE_DAY_DIFF days + last_day = next_month + timedelta(days=-1) + timedelta(days=INVOICE_DAY_DIFF) + + return start_day, last_day + + +def reformat_datetime( + in_date: str | None, in_format: str, out_format: str +) -> str | None: + """ + Reformat datetime as string to another string format + This function take string as input and return string as output + """ + if not in_date: + return None + + try: + result = datetime.strptime(in_date, in_format) + return result.strftime(out_format) + + except Exception as excep: + raise ValueError(f'Date could not be converted: {in_date}') from excep diff --git a/db/python/layers/billing.py b/db/python/layers/billing.py index f38c23228..93ce3cfc7 100644 --- a/db/python/layers/billing.py +++ b/db/python/layers/billing.py @@ -1,3 +1,8 @@ +import re + +from typing import Any +from datetime import datetime +from collections import Counter, defaultdict from google.cloud import bigquery from models.models import ( @@ -5,25 +10,41 @@ BillingTotalCostRecord, BillingTotalCostQueryModel, BillingColumn, + BillingCostBudgetRecord, ) from db.python.gcp_connect import BqDbBase from db.python.layers.bq_base import BqBaseLayer from db.python.tables.billing import BillingFilter -from api.settings import BQ_GCP_BILLING_PROJECT, BQ_DAYS_BACK_OPTIMAL - -# TODO update beore merging into DEV -BQ_AGGREG_VIEW = f'{BQ_GCP_BILLING_PROJECT}.billing_aggregate.aggregate_daily_cost-dev' -BQ_AGGREG_RAW = f'{BQ_GCP_BILLING_PROJECT}.billing_aggregate.aggregate-dev' -BQ_AGGREG_EXT_VIEW = ( - f'{BQ_GCP_BILLING_PROJECT}.billing_aggregate.aggregate_daily_cost_extended-dev' +from api.settings import ( + BQ_DAYS_BACK_OPTIMAL, + BQ_AGGREG_VIEW, + BQ_AGGREG_RAW, + BQ_AGGREG_EXT_VIEW, + BQ_BUDGET_VIEW, + BQ_GCP_BILLING_VIEW, ) +from api.utils.dates import get_invoice_month_range, reformat_datetime + + +def abbrev_cost_category(cost_category: str) -> str: + """abbreviate cost category""" + return 'S' if cost_category == 'Cloud Storage' else 'C' class BillingLayer(BqBaseLayer): """Billing layer""" + async def get_gcp_projects( + self, + ) -> list[str] | None: + """ + Get All GCP projects in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_gcp_projects() + async def get_topics( self, ) -> list[str] | None: @@ -89,6 +110,15 @@ async def get_sequencing_groups( billing_db = BillingDb(self.connection) return await billing_db.get_extended_values('sequencing_group') + async def get_invoice_months( + self, + ) -> list[str] | None: + """ + Get All invoice months in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_invoice_months() + async def query( self, _filter: BillingFilter, @@ -110,14 +140,64 @@ async def get_total_cost( billing_db = BillingDb(self.connection) return await billing_db.get_total_cost(query) + async def get_running_cost( + self, + field: BillingColumn, + invoice_month: str | None = None, + source: str | None = None, + ) -> list[BillingCostBudgetRecord]: + """ + Get Running costs including monthly budget + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_running_cost(field, invoice_month, source) + class BillingDb(BqDbBase): """Db layer for billing related routes""" + async def get_gcp_projects(self): + """Get all GCP projects in database""" + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this part_time > filter is to limit the amount of data scanned, + # saving cost for running BQ + _query = f""" + SELECT DISTINCT gcp_project + FROM `{BQ_GCP_BILLING_VIEW}` + WHERE part_time > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + AND gcp_project IS NOT NULL + ORDER BY gcp_project ASC; + """ + + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter( + 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) + ), + ] + ) + + query_job_result = list( + self._connection.connection.query(_query, job_config=job_config).result() + ) + if query_job_result: + return [str(dict(row)['gcp_project']) for row in query_job_result] + + # return empty list if no record found + return [] + async def get_topics(self): """Get all topics in database""" # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day _query = f""" SELECT DISTINCT topic FROM `{BQ_AGGREG_VIEW}` @@ -144,10 +224,31 @@ async def get_topics(self): # return empty list if no record found return [] + async def get_invoice_months(self): + """Get all invoice months in database""" + + _query = f""" + SELECT DISTINCT FORMAT_DATE("%Y%m", day) as invoice_month + FROM `{BQ_AGGREG_VIEW}` + WHERE EXTRACT(day from day) = 1 + ORDER BY invoice_month DESC; + """ + + query_job_result = list(self._connection.connection.query(_query).result()) + if query_job_result: + return [str(dict(row)['invoice_month']) for row in query_job_result] + + # return empty list if no record found + return [] + async def get_cost_categories(self): """Get all service description in database""" # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day _query = f""" SELECT DISTINCT cost_category FROM `{BQ_AGGREG_VIEW}` @@ -182,6 +283,10 @@ async def get_skus( """Get all SKUs in database""" # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day _query = f""" SELECT DISTINCT sku FROM `{BQ_AGGREG_VIEW}` @@ -223,6 +328,10 @@ async def get_extended_values(self, field: str): """ # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day _query = f""" SELECT DISTINCT {field} FROM `{BQ_AGGREG_EXT_VIEW}` @@ -332,7 +441,10 @@ async def get_total_cost( extended_cols = BillingColumn.extended_cols() # by default look at the normal view - view_to_use = BQ_AGGREG_VIEW + if query.source == 'gcp_billing': + view_to_use = BQ_GCP_BILLING_VIEW + else: + view_to_use = BQ_AGGREG_VIEW columns = [] for field in query.fields: @@ -363,6 +475,18 @@ async def get_total_cost( bigquery.ScalarQueryParameter('end_date', 'STRING', query.end_date) ) + if query.source == 'gcp_billing': + # BQ_GCP_BILLING_VIEW view is partitioned by different field + # BQ has limitation, materialized view can only by partition by base table + # partition or its subset, in our case _PARTITIONTIME + # (part_time field in the view) + # We are querying by day, + # which can be up to a week behind regarding _PARTITIONTIME + filters.append('part_time >= TIMESTAMP(@start_date)') + filters.append( + 'part_time <= TIMESTAMP_ADD(TIMESTAMP(@end_date), INTERVAL 7 DAY)' + ) + if query.filters: for filter_key, filter_value in query.filters.items(): col_name = str(filter_key.value) @@ -388,15 +512,10 @@ async def get_total_cost( order_by_str = f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' _query = f""" - SELECT * FROM - ( - SELECT {fields_selected}, SUM(cost) as cost - FROM `{view_to_use}` - {filter_str} - GROUP BY {fields_selected} - - ) - WHERE cost > 0.01 + SELECT {fields_selected}, SUM(cost) as cost + FROM `{view_to_use}` + {filter_str} + GROUP BY {fields_selected} {order_by_str} """ @@ -424,3 +543,429 @@ async def get_total_cost( # return empty list if no record found return [] + + async def get_budgets_by_gcp_project( + self, field: BillingColumn, is_current_month: bool + ) -> dict[str, float]: + """ + Get budget for gcp-projects + """ + if field != BillingColumn.PROJECT or not is_current_month: + # only projects have budget and only for current month + return {} + + _query = f""" + WITH t AS ( + SELECT gcp_project, MAX(created_at) as last_created_at + FROM `{BQ_BUDGET_VIEW}` + GROUP BY 1 + ) + SELECT t.gcp_project, d.budget + FROM t inner join `{BQ_BUDGET_VIEW}` d + ON d.gcp_project = t.gcp_project AND d.created_at = t.last_created_at + """ + + query_job_result = list(self._connection.connection.query(_query).result()) + + if query_job_result: + return {row.gcp_project: row.budget for row in query_job_result} + + return {} + + async def get_last_loaded_day(self): + """Get the most recent fully loaded day in db + Go 2 days back as the data is not always available for the current day + 1 day back is not enough + """ + + _query = f""" + SELECT TIMESTAMP_ADD(MAX(day), INTERVAL -2 DAY) as last_loaded_day + FROM `{BQ_AGGREG_VIEW}` + WHERE day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + """ + + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter( + 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) + ), + ] + ) + + query_job_result = list( + self._connection.connection.query(_query, job_config=job_config).result() + ) + if query_job_result: + return str(query_job_result[0].last_loaded_day) + + return None + + async def prepare_daily_cost_subquery( + self, field, view_to_use, source, query_params + ): + """prepare daily cost subquery""" + + if source == 'gcp_billing': + # add extra filter to limit materialized view partition + # Raw BQ billing table is partitioned by part_time (when data are loaded) + # and not by end of usage time (day) + # There is a delay up to 4-5 days between part_time and day + # 7 days is added to be sure to get all data + gcp_billing_optimise_filter = """ + AND part_time >= TIMESTAMP(@last_loaded_day) + AND part_time <= TIMESTAMP_ADD( + TIMESTAMP(@last_loaded_day), INTERVAL 7 DAY + ) + """ + else: + gcp_billing_optimise_filter = '' + + # Find the last fully loaded day in the view + last_loaded_day = await self.get_last_loaded_day() + + daily_cost_field = ', day.cost as daily_cost' + daily_cost_join = f"""LEFT JOIN ( + SELECT + {field.value} as field, + cost_category, + SUM(cost) as cost + FROM + `{view_to_use}` + WHERE day = TIMESTAMP(@last_loaded_day) + {gcp_billing_optimise_filter} + GROUP BY + field, + cost_category + ) day + ON month.field = day.field + AND month.cost_category = day.cost_category + """ + + query_params.append( + bigquery.ScalarQueryParameter('last_loaded_day', 'STRING', last_loaded_day), + ) + return (last_loaded_day, query_params, daily_cost_field, daily_cost_join) + + async def execute_running_cost_query( + self, + field: BillingColumn, + invoice_month: str | None = None, + source: str | None = None, + ): + """ + Run query to get running cost of selected field + """ + # check if invoice month is valid first + if not invoice_month or not re.match(r'^\d{6}$', invoice_month): + raise ValueError('Invalid invoice month') + + invoice_month_date = datetime.strptime(invoice_month, '%Y%m') + if invoice_month != invoice_month_date.strftime('%Y%m'): + raise ValueError('Invalid invoice month') + + # get start day and current day for given invoice month + # This is to optimise the query, BQ view is partitioned by day + # and not by invoice month + start_day_date, last_day_date = get_invoice_month_range(invoice_month_date) + start_day = start_day_date.strftime('%Y-%m-%d') + last_day = last_day_date.strftime('%Y-%m-%d') + + # by default look at the normal view + if field in BillingColumn.extended_cols(): + # if any of the extendeid fields are needed use the extended view + view_to_use = BQ_AGGREG_EXT_VIEW + elif source == 'gcp_billing': + # if source is gcp_billing, + # use the view on top of the raw billing table + view_to_use = BQ_GCP_BILLING_VIEW + else: + # otherwise use the normal view + view_to_use = BQ_AGGREG_VIEW + + if source == 'gcp_billing': + # add extra filter to limit materialized view partition + # Raw BQ billing table is partitioned by part_time (when data are loaded) + # and not by end of usage time (day) + # There is a delay up to 4-5 days between part_time and day + # 7 days is added to be sure to get all data + filter_to_optimise_query = """ + part_time >= TIMESTAMP(@start_day) + AND part_time <= TIMESTAMP_ADD( + TIMESTAMP(@last_day), INTERVAL 7 DAY + ) + """ + else: + # add extra filter to limit materialized view partition + filter_to_optimise_query = """ + day >= TIMESTAMP(@start_day) + AND day <= TIMESTAMP(@last_day) + """ + + # start_day and last_day are in to optimise the query + query_params = [ + bigquery.ScalarQueryParameter('start_day', 'STRING', start_day), + bigquery.ScalarQueryParameter('last_day', 'STRING', last_day), + ] + + current_day = datetime.now().strftime('%Y-%m-%d') + is_current_month = last_day >= current_day + last_loaded_day = None + + if is_current_month: + # Only current month can have last 24 hours cost + # Last 24H in UTC time + ( + last_loaded_day, + query_params, + daily_cost_field, + daily_cost_join, + ) = await self.prepare_daily_cost_subquery( + field, view_to_use, source, query_params + ) + else: + # Do not calculate last 24H cost + daily_cost_field = ', NULL as daily_cost' + daily_cost_join = '' + + _query = f""" + SELECT + CASE WHEN month.field IS NULL THEN 'N/A' ELSE month.field END as field, + month.cost_category, + month.cost as monthly_cost + {daily_cost_field} + FROM + ( + SELECT + {field.value} as field, + cost_category, + SUM(cost) as cost + FROM + `{view_to_use}` + WHERE {filter_to_optimise_query} + AND invoice_month = @invoice_month + GROUP BY + field, + cost_category + HAVING cost > 0.1 + ) month + {daily_cost_join} + ORDER BY field ASC, daily_cost DESC, monthly_cost DESC; + """ + + query_params.append( + bigquery.ScalarQueryParameter('invoice_month', 'STRING', invoice_month) + ) + + return ( + is_current_month, + last_loaded_day, + list( + self._connection.connection.query( + _query, + job_config=bigquery.QueryJobConfig(query_parameters=query_params), + ).result() + ), + ) + + async def append_total_running_cost( + self, + field: BillingColumn, + is_current_month: bool, + last_loaded_day: str | None, + total_monthly: dict, + total_daily: dict, + total_monthly_category: dict, + total_daily_category: dict, + results: list[BillingCostBudgetRecord], + ) -> list[BillingCostBudgetRecord]: + """ + Add total row: compute + storage to the results + """ + # construct ALL fields details + all_details = [] + for cat, mth_cost in total_monthly_category.items(): + all_details.append( + { + 'cost_group': abbrev_cost_category(cat), + 'cost_category': cat, + 'daily_cost': total_daily_category[cat] + if is_current_month + else None, + 'monthly_cost': mth_cost, + } + ) + + # add total row: compute + storage + results.append( + BillingCostBudgetRecord.from_json( + { + 'field': f'{BillingColumn.generate_all_title(field)}', + 'total_monthly': ( + total_monthly['C']['ALL'] + total_monthly['S']['ALL'] + ), + 'total_daily': (total_daily['C']['ALL'] + total_daily['S']['ALL']) + if is_current_month + else None, + 'compute_monthly': total_monthly['C']['ALL'], + 'compute_daily': (total_daily['C']['ALL']) + if is_current_month + else None, + 'storage_monthly': total_monthly['S']['ALL'], + 'storage_daily': (total_daily['S']['ALL']) + if is_current_month + else None, + 'details': all_details, + 'last_loaded_day': last_loaded_day, + } + ) + ) + + return results + + async def append_running_cost_records( + self, + field: BillingColumn, + is_current_month: bool, + last_loaded_day: str | None, + total_monthly: dict, + total_daily: dict, + field_details: dict, + results: list[BillingCostBudgetRecord], + ) -> list[BillingCostBudgetRecord]: + """ + Add all the selected field rows: compute + storage to the results + """ + # get budget map per gcp project + budgets_per_gcp_project = await self.get_budgets_by_gcp_project( + field, is_current_month + ) + + # add rows by field + for key, details in field_details.items(): + compute_daily = total_daily['C'][key] if key in total_daily['C'] else 0 + storage_daily = total_daily['S'][key] if key in total_daily['S'] else 0 + compute_monthly = ( + total_monthly['C'][key] if key in total_monthly['C'] else 0 + ) + storage_monthly = ( + total_monthly['S'][key] if key in total_monthly['S'] else 0 + ) + monthly = compute_monthly + storage_monthly + budget_monthly = budgets_per_gcp_project.get(key) + + results.append( + BillingCostBudgetRecord.from_json( + { + 'field': key, + 'total_monthly': monthly, + 'total_daily': (compute_daily + storage_daily) + if is_current_month + else None, + 'compute_monthly': compute_monthly, + 'compute_daily': compute_daily, + 'storage_monthly': storage_monthly, + 'storage_daily': storage_daily, + 'details': details, + 'budget_spent': 100 * monthly / budget_monthly + if budget_monthly + else None, + 'last_loaded_day': last_loaded_day, + } + ) + ) + + return results + + async def get_running_cost( + self, + field: BillingColumn, + invoice_month: str | None = None, + source: str | None = None, + ) -> list[BillingCostBudgetRecord]: + """ + Get currently running cost of selected field + """ + + # accept only Topic, Dataset or Project at this stage + if field not in ( + BillingColumn.TOPIC, + BillingColumn.PROJECT, + BillingColumn.DATASET, + ): + raise ValueError('Invalid field only topic, dataset or project allowed') + + ( + is_current_month, + last_loaded_day, + query_job_result, + ) = await self.execute_running_cost_query(field, invoice_month, source) + if not query_job_result: + # return empty list + return [] + + # prepare data + results: list[BillingCostBudgetRecord] = [] + + # reformat last_loaded_day if present + last_loaded_day = reformat_datetime( + last_loaded_day, '%Y-%m-%d %H:%M:%S+00:00', '%b %d' + ) + + total_monthly: dict[str, Counter[str]] = defaultdict(Counter) + total_daily: dict[str, Counter[str]] = defaultdict(Counter) + field_details: dict[str, list[Any]] = defaultdict(list) + total_monthly_category: Counter[str] = Counter() + total_daily_category: Counter[str] = Counter() + + for row in query_job_result: + if row.field not in field_details: + field_details[row.field] = [] + + cost_group = abbrev_cost_category(row.cost_category) + + field_details[row.field].append( + { + 'cost_group': cost_group, + 'cost_category': row.cost_category, + 'daily_cost': row.daily_cost if is_current_month else None, + 'monthly_cost': row.monthly_cost, + } + ) + + total_monthly_category[row.cost_category] += row.monthly_cost + if row.daily_cost: + total_daily_category[row.cost_category] += row.daily_cost + + # cost groups totals + total_monthly[cost_group]['ALL'] += row.monthly_cost + total_monthly[cost_group][row.field] += row.monthly_cost + if row.daily_cost and is_current_month: + total_daily[cost_group]['ALL'] += row.daily_cost + total_daily[cost_group][row.field] += row.daily_cost + + # add total row: compute + storage + results = await self.append_total_running_cost( + field, + is_current_month, + last_loaded_day, + total_monthly, + total_daily, + total_monthly_category, + total_daily_category, + results, + ) + + # add rest of the records: compute + storage + results = await self.append_running_cost_records( + field, + is_current_month, + last_loaded_day, + total_monthly, + total_daily, + field_details, + results, + ) + + return results diff --git a/models/models/__init__.py b/models/models/__init__.py index a9ded5690..aa7cf5324 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -67,4 +67,6 @@ BillingTotalCostRecord, BillingTotalCostQueryModel, BillingColumn, + BillingCostBudgetRecord, + BillingCostDetailsRecord, ) diff --git a/models/models/billing.py b/models/models/billing.py index 80209feb7..481ea77ce 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -1,5 +1,4 @@ import datetime -from decimal import Decimal from enum import Enum from db.python.tables.billing import BillingFilter @@ -124,12 +123,14 @@ class BillingColumn(str, Enum): # base view columns TOPIC = 'topic' + PROJECT = 'gcp_project' DAY = 'day' COST_CATEGORY = 'cost_category' SKU = 'sku' AR_GUID = 'ar_guid' CURRENCY = 'currency' COST = 'cost' + INVOICE_MONTH = 'invoice_month' # extended, filtered view columns DATASET = 'dataset' @@ -147,8 +148,17 @@ def extended_cols(cls) -> list[str]: 'sequencing_type', 'stage', 'sequencing_group', + 'ar_guid' ] + @staticmethod + def generate_all_title(record) -> str: + """Generate Column as All Title""" + if record == BillingColumn.PROJECT: + return 'All GCP Projects' + + return f'All {record.title()}s' + class BillingTotalCostQueryModel(SMBase): """ @@ -160,6 +170,8 @@ class BillingTotalCostQueryModel(SMBase): fields: list[BillingColumn] start_date: str end_date: str + # optional, can be aggregate or gcp_billing + source: str | None = None # optional filters: dict[BillingColumn, str] | None = None @@ -178,6 +190,7 @@ class BillingTotalCostRecord(SMBase): day: datetime.date | None topic: str | None + gcp_project: str | None cost_category: str | None sku: str | None ar_guid: str | None @@ -188,7 +201,7 @@ class BillingTotalCostRecord(SMBase): stage: str | None sequencing_group: str | None - cost: Decimal + cost: float currency: str | None @staticmethod @@ -197,6 +210,7 @@ def from_json(record): return BillingTotalCostRecord( day=record.get('day'), topic=record.get('topic'), + gcp_project=record.get('gcp_project'), cost_category=record.get('cost_category'), sku=record.get('sku'), ar_guid=record.get('ar_guid'), @@ -208,3 +222,57 @@ def from_json(record): cost=record.get('cost'), currency=record.get('currency'), ) + + +class BillingCostDetailsRecord(SMBase): + """_summary_""" + + cost_group: str + cost_category: str + daily_cost: float | None + monthly_cost: float | None + + @staticmethod + def from_json(record): + """Create BillingCostDetailsRecord from json""" + return BillingCostDetailsRecord( + cost_group=record.get('cost_group'), + cost_category=record.get('cost_category'), + daily_cost=record.get('daily_cost'), + monthly_cost=record.get('monthly_cost'), + ) + + +class BillingCostBudgetRecord(SMBase): + """Return class for the Billing Total Budget / Cost record""" + + field: str | None + total_monthly: float | None + total_daily: float | None + + compute_monthly: float | None + compute_daily: float | None + storage_monthly: float | None + storage_daily: float | None + details: list[BillingCostDetailsRecord] | None + budget_spent: float | None + + last_loaded_day: str | None + + @staticmethod + def from_json(record): + """Create BillingTopicCostCategoryRecord from json""" + return BillingCostBudgetRecord( + field=record.get('field'), + total_monthly=record.get('total_monthly'), + total_daily=record.get('total_daily'), + compute_monthly=record.get('compute_monthly'), + compute_daily=record.get('compute_daily'), + storage_monthly=record.get('storage_monthly'), + storage_daily=record.get('storage_daily'), + details=[ + BillingCostDetailsRecord.from_json(row) for row in record.get('details') + ], + budget_spent=record.get('budget_spent'), + last_loaded_day=record.get('last_loaded_day'), + ) diff --git a/web/package-lock.json b/web/package-lock.json index 5d81f4805..62926084c 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1,14 +1,15 @@ { "name": "metamist", - "version": "6.3.0", + "version": "6.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "metamist", - "version": "6.3.0", + "version": "6.4.0", "dependencies": { "@apollo/client": "^3.7.3", + "@artsy/fresnel": "^6.2.1", "@emotion/react": "^11.10.4", "@emotion/styled": "^11.10.4", "@mui/icons-material": "^5.10.9", @@ -25,9 +26,11 @@ "d3": "^7.6.1", "graphql": "^16.6.0", "lodash": "^4.17.21", + "prop-types": "^15.8.1", "react": "^17.0.2", "react-dom": "^17.0.2", "react-grid-layout": "^1.3.4", + "react-intersection-observer": "^9.5.2", "react-markdown": "^7.1.0", "react-responsive": "^9.0.2", "react-router-dom": "^6.0.1", @@ -135,7 +138,7 @@ "@babel/generator": "^7.14.0", "@babel/parser": "^7.14.0", "@babel/runtime": "^7.0.0", - "@babel/traverse": "^7.14.0", + "@babel/traverse": "^7.23.2", "@babel/types": "^7.0.0", "babel-preset-fbjs": "^3.4.0", "chalk": "^4.0.0", @@ -236,16 +239,94 @@ "node": ">=14" } }, + "node_modules/@artsy/fresnel": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/@artsy/fresnel/-/fresnel-6.2.1.tgz", + "integrity": "sha512-UAyHZU64Vie3sLDdL3qD+7pODGzKNu9pSpxGKpDOCaiBvCDZnFXIfJEEfV9v9i+QiJJwzO+lqsFwvw6YiJeXFQ==", + "engines": { + "node": ">=12.20.2", + "yarn": "1.x.x" + }, + "peerDependencies": { + "react": ">=16.3.0" + } + }, "node_modules/@babel/code-frame": { - "version": "7.21.4", - "license": "MIT", + "version": "7.22.13", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.22.13.tgz", + "integrity": "sha512-XktuhWlJ5g+3TJXc5upd9Ks1HutSArik6jf2eAjYFyIOf4ej3RN+184cZbzDvbPnuTJIUhPKKJE3cIsYTiAT3w==", "dependencies": { - "@babel/highlight": "^7.18.6" + "@babel/highlight": "^7.22.13", + "chalk": "^2.4.2" }, "engines": { "node": ">=6.9.0" } }, + "node_modules/@babel/code-frame/node_modules/ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dependencies": { + "color-convert": "^1.9.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/code-frame/node_modules/chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dependencies": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/code-frame/node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/@babel/code-frame/node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" + }, + "node_modules/@babel/code-frame/node_modules/escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/@babel/code-frame/node_modules/has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "engines": { + "node": ">=4" + } + }, + "node_modules/@babel/code-frame/node_modules/supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dependencies": { + "has-flag": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/@babel/compat-data": { "version": "7.21.4", "license": "MIT", @@ -282,10 +363,11 @@ } }, "node_modules/@babel/generator": { - "version": "7.21.4", - "license": "MIT", + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.23.0.tgz", + "integrity": "sha512-lN85QRR+5IbYrMWM6Y4pE/noaQtg4pNiqeNGX60eqOfo6gtEj6uw/JagelB8vVztSd7R6M5n1+PQkDbHbBRU4g==", "dependencies": { - "@babel/types": "^7.21.4", + "@babel/types": "^7.23.0", "@jridgewell/gen-mapping": "^0.3.2", "@jridgewell/trace-mapping": "^0.3.17", "jsesc": "^2.5.1" @@ -344,28 +426,31 @@ } }, "node_modules/@babel/helper-environment-visitor": { - "version": "7.18.9", - "license": "MIT", + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-environment-visitor/-/helper-environment-visitor-7.22.20.tgz", + "integrity": "sha512-zfedSIzFhat/gFhWfHtgWvlec0nqB9YEIVrpuwjruLlXfUSnA8cJB0miHKwqDnQ7d32aKo2xt88/xZptwxbfhA==", "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-function-name": { - "version": "7.21.0", - "license": "MIT", + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.23.0.tgz", + "integrity": "sha512-OErEqsrxjZTJciZ4Oo+eoZqeW9UIiOcuYKRJA4ZAgV9myA+pOXhhmpfNCKjEH/auVfEYVFJ6y1Tc4r0eIApqiw==", "dependencies": { - "@babel/template": "^7.20.7", - "@babel/types": "^7.21.0" + "@babel/template": "^7.22.15", + "@babel/types": "^7.23.0" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-hoist-variables": { - "version": "7.18.6", - "license": "MIT", + "version": "7.22.5", + "resolved": "https://registry.npmjs.org/@babel/helper-hoist-variables/-/helper-hoist-variables-7.22.5.tgz", + "integrity": "sha512-wGjk9QZVzvknA6yKIUURb8zY3grXCcOZt+/7Wcy8O2uctxhplmUPkOdlgoNhmdVee2c92JXbf1xpMtVNbfoxRw==", "dependencies": { - "@babel/types": "^7.18.6" + "@babel/types": "^7.22.5" }, "engines": { "node": ">=6.9.0" @@ -466,25 +551,28 @@ } }, "node_modules/@babel/helper-split-export-declaration": { - "version": "7.18.6", - "license": "MIT", + "version": "7.22.6", + "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.22.6.tgz", + "integrity": "sha512-AsUnxuLhRYsisFiaJwvp1QF+I3KjD5FOxut14q/GzovUe6orHLesW2C7d754kRm53h5gqrz6sFl6sxc4BVtE/g==", "dependencies": { - "@babel/types": "^7.18.6" + "@babel/types": "^7.22.5" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-string-parser": { - "version": "7.19.4", - "license": "MIT", + "version": "7.22.5", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.22.5.tgz", + "integrity": "sha512-mM4COjgZox8U+JcXQwPijIZLElkgEpO5rsERVDJTc2qfCDfERyob6k5WegS14SX18IIjv+XD+GrqNumY5JRCDw==", "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-validator-identifier": { - "version": "7.19.1", - "license": "MIT", + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz", + "integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==", "engines": { "node": ">=6.9.0" } @@ -509,11 +597,12 @@ } }, "node_modules/@babel/highlight": { - "version": "7.18.6", - "license": "MIT", + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.22.20.tgz", + "integrity": "sha512-dkdMCN3py0+ksCgYmGG8jKeGA/8Tk+gJwSYYlFGxG5lmhfKNoAy004YpLxpS1W2J8m/EK2Ew+yOs9pVRwO89mg==", "dependencies": { - "@babel/helper-validator-identifier": "^7.18.6", - "chalk": "^2.0.0", + "@babel/helper-validator-identifier": "^7.22.20", + "chalk": "^2.4.2", "js-tokens": "^4.0.0" }, "engines": { @@ -578,8 +667,9 @@ } }, "node_modules/@babel/parser": { - "version": "7.21.4", - "license": "MIT", + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.23.0.tgz", + "integrity": "sha512-vvPKKdMemU85V9WE/l5wZEmImpCtLqbnTvqDS2U1fJ96KrxoW7KrXhNsNCblQlg8Ck4b85yxdTyelsMUgFUXiw==", "bin": { "parser": "bin/babel-parser.js" }, @@ -1034,29 +1124,31 @@ } }, "node_modules/@babel/template": { - "version": "7.20.7", - "license": "MIT", + "version": "7.22.15", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.22.15.tgz", + "integrity": "sha512-QPErUVm4uyJa60rkI73qneDacvdvzxshT3kksGqlGWYdOTIUOwJ7RDUL8sGqslY1uXWSL6xMFKEXDS3ox2uF0w==", "dependencies": { - "@babel/code-frame": "^7.18.6", - "@babel/parser": "^7.20.7", - "@babel/types": "^7.20.7" + "@babel/code-frame": "^7.22.13", + "@babel/parser": "^7.22.15", + "@babel/types": "^7.22.15" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/traverse": { - "version": "7.21.4", - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.21.4", - "@babel/generator": "^7.21.4", - "@babel/helper-environment-visitor": "^7.18.9", - "@babel/helper-function-name": "^7.21.0", - "@babel/helper-hoist-variables": "^7.18.6", - "@babel/helper-split-export-declaration": "^7.18.6", - "@babel/parser": "^7.21.4", - "@babel/types": "^7.21.4", + "version": "7.23.2", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.23.2.tgz", + "integrity": "sha512-azpe59SQ48qG6nu2CzcMLbxUudtN+dOM9kDbUqGq3HXUJRlo7i8fvPoxQUzYgLZ4cMVmuZgm8vvBpNeRhd6XSw==", + "dependencies": { + "@babel/code-frame": "^7.22.13", + "@babel/generator": "^7.23.0", + "@babel/helper-environment-visitor": "^7.22.20", + "@babel/helper-function-name": "^7.23.0", + "@babel/helper-hoist-variables": "^7.22.5", + "@babel/helper-split-export-declaration": "^7.22.6", + "@babel/parser": "^7.23.0", + "@babel/types": "^7.23.0", "debug": "^4.1.0", "globals": "^11.1.0" }, @@ -1065,11 +1157,12 @@ } }, "node_modules/@babel/types": { - "version": "7.21.4", - "license": "MIT", + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.23.0.tgz", + "integrity": "sha512-0oIyUfKoI3mSqMvsxBdclDwxXKXAUA8v/apZbc+iSyARYou1o8ZGDxbUYyLFoW2arqS2jDGqJuZvv1d/io1axg==", "dependencies": { - "@babel/helper-string-parser": "^7.19.4", - "@babel/helper-validator-identifier": "^7.19.1", + "@babel/helper-string-parser": "^7.22.5", + "@babel/helper-validator-identifier": "^7.22.20", "to-fast-properties": "^2.0.0" }, "engines": { @@ -4281,7 +4374,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001481", + "version": "1.0.30001553", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001553.tgz", + "integrity": "sha512-N0ttd6TrFfuqKNi+pMgWJTb9qrdJu4JSpgPFLe/lrD19ugC6fZgF0pUewRowDwzdDnb9V41mFcdlYgl/PyKf4A==", "funding": [ { "type": "opencollective", @@ -4295,8 +4390,7 @@ "type": "github", "url": "https://github.com/sponsors/ai" } - ], - "license": "CC-BY-4.0" + ] }, "node_modules/capital-case": { "version": "1.0.4", @@ -9641,7 +9735,8 @@ }, "node_modules/prop-types": { "version": "15.8.1", - "license": "MIT", + "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", + "integrity": "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==", "dependencies": { "loose-envify": "^1.4.0", "object-assign": "^4.1.1", @@ -9874,6 +9969,14 @@ "react": "^16.8.4 || ^17.0.0 || ^18.0.0" } }, + "node_modules/react-intersection-observer": { + "version": "9.5.2", + "resolved": "https://registry.npmjs.org/react-intersection-observer/-/react-intersection-observer-9.5.2.tgz", + "integrity": "sha512-EmoV66/yvksJcGa1rdW0nDNc4I1RifDWkT50gXSFnPLYQ4xUptuDD4V7k+Rj1OgVAlww628KLGcxPXFlOkkU/Q==", + "peerDependencies": { + "react": "^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0" + } + }, "node_modules/react-is": { "version": "18.2.0", "license": "MIT" diff --git a/web/package.json b/web/package.json index 6dfaccc1d..a6c6a3abe 100644 --- a/web/package.json +++ b/web/package.json @@ -4,6 +4,7 @@ "private": true, "dependencies": { "@apollo/client": "^3.7.3", + "@artsy/fresnel": "^6.2.1", "@emotion/react": "^11.10.4", "@emotion/styled": "^11.10.4", "@mui/icons-material": "^5.10.9", @@ -20,9 +21,11 @@ "d3": "^7.6.1", "graphql": "^16.6.0", "lodash": "^4.17.21", + "prop-types": "^15.8.1", "react": "^17.0.2", "react-dom": "^17.0.2", "react-grid-layout": "^1.3.4", + "react-intersection-observer": "^9.5.2", "react-markdown": "^7.1.0", "react-responsive": "^9.0.2", "react-router-dom": "^6.0.1", diff --git a/web/src/Routes.tsx b/web/src/Routes.tsx index f7f6da271..5abaa6d8d 100644 --- a/web/src/Routes.tsx +++ b/web/src/Routes.tsx @@ -2,6 +2,12 @@ import * as React from 'react' import SwaggerUI from 'swagger-ui-react' import { Routes as Switch, Route } from 'react-router-dom' +import { + BillingHome, + BillingSeqrProp, + BillingCostByTime, + BillingInvoiceMonthCost, +} from './pages/billing' import DocumentationArticle from './pages/docs/Documentation' import SampleView from './pages/sample/SampleView' import FamilyView from './pages/family/FamilyView' @@ -9,56 +15,63 @@ import ProjectSummaryView from './pages/project/ProjectSummary' import ProjectsAdmin from './pages/admin/ProjectsAdmin' import ErrorBoundary from './shared/utilities/errorBoundary' import AnalysisRunnerSummary from './pages/project/AnalysisRunnerView/AnalysisRunnerSummary' -import BillingDashboard from './pages/billing/BillingDashboard' const Routes: React.FunctionComponent = () => ( - } /> - - } /> + } /> + } /> - + } /> - - + } /> - } /> - - } /> - - + } /> + } /> + } /> - + } /> + + + + } + /> + + } /> + + } /> @@ -67,10 +80,10 @@ const Routes: React.FunctionComponent = () => ( /> - + } /> diff --git a/web/src/index.css b/web/src/index.css index 7f0a5424f..65dd69ccd 100644 --- a/web/src/index.css +++ b/web/src/index.css @@ -46,6 +46,7 @@ html[data-theme='dark-mode'] { --color-check-green: #659251; --color-table-header: rgba(0, 0, 0, 0.15); } + /* poor man's dark mode: https://github.com/swagger-api/swagger-ui/issues/5327#issuecomment-742375520 */ html[data-theme='dark-mode'] .swagger-ui { filter: invert(88%) hue-rotate(180deg); @@ -122,6 +123,7 @@ blockquote { max-width: 768px; margin: calc(50px + 1vmin) auto 0 auto; */ padding: 80px; + padding-top: 40px; } @media screen and (min-width: 1200px) { @@ -154,6 +156,7 @@ html[data-theme='dark-mode'] .ui.styled.accordion .content { background-color: #353535; border-color: rgba(255, 255, 255, 0.87); } + html[data-theme='dark-mode'] .ui.styled.accordion .title { background-color: #242424; color: rgba(255, 255, 255, 0.87); diff --git a/web/src/pages/billing/Billing.css b/web/src/pages/billing/Billing.css new file mode 100644 index 000000000..68d62cfe0 --- /dev/null +++ b/web/src/pages/billing/Billing.css @@ -0,0 +1,15 @@ +#group-by-dropdown .menu { + background: #ffffff !important; +} + +.field-selector-label { + width: 200px !important; +} + +.field-selector-dropdown { + width: 80% !important; +} + +.donut-chart { + margin-top: 20px; +} diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx new file mode 100644 index 000000000..629b3ee08 --- /dev/null +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -0,0 +1,317 @@ +import * as React from 'react' +import { useLocation, useNavigate, useSearchParams } from 'react-router-dom' +import { Button, Card, Grid, Input, Message, Table as SUITable } from 'semantic-ui-react' +import CostByTimeChart from './components/CostByTimeChart' +import FieldSelector from './components/FieldSelector' +import { + BillingApi, + BillingColumn, + BillingTotalCostQueryModel, + BillingTotalCostRecord, +} from '../../sm-api' + +import { convertFieldName } from '../../shared/utilities/fieldName' +import { IStackedAreaByDateChartData } from '../../shared/components/Graphs/StackedAreaByDateChart' +import BillingCostByTimeTable from './components/BillingCostByTimeTable' +import { BarChart, IData } from '../../shared/components/Graphs/BarChart' +import { DonutChart } from '../../shared/components/Graphs/DonutChart' + +const BillingCostByTime: React.FunctionComponent = () => { + const now = new Date() + + const [searchParams] = useSearchParams() + + const inputGroupBy: string | undefined = searchParams.get('groupBy') ?? undefined + const fixedGroupBy: BillingColumn = inputGroupBy + ? (inputGroupBy as BillingColumn) + : BillingColumn.GcpProject + const inputSelectedData: string | undefined = searchParams.get('selectedData') ?? undefined + + const [start, setStart] = React.useState( + searchParams.get('start') ?? `${now.getFullYear()}-${now.getMonth() + 1}-01` + ) + const [end, setEnd] = React.useState( + searchParams.get('end') ?? `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` + ) + const [groupBy, setGroupBy] = React.useState( + fixedGroupBy ?? BillingColumn.GcpProject + ) + const [selectedData, setSelectedData] = React.useState(inputSelectedData) + + // Max Aggregated Data Points, rest will be aggregated into "Rest" + const maxDataPoints = 7 + + // Data loading + const [isLoading, setIsLoading] = React.useState(true) + const [error, setError] = React.useState() + const [groups, setGroups] = React.useState([]) + const [data, setData] = React.useState([]) + const [aggregatedData, setAggregatedData] = React.useState([]) + + // use navigate and update url params + const location = useLocation() + const navigate = useNavigate() + + const updateNav = ( + grp: string | undefined, + data: string | undefined, + start: string, + end: string + ) => { + let url = `${location.pathname}` + if (grp || data) url += '?' + + let params: string[] = [] + if (grp) params.push(`groupBy=${grp}`) + if (data) params.push(`selectedData=${data}`) + if (start) params.push(`start=${start}`) + if (end) params.push(`end=${end}`) + + url += params.join('&') + navigate(url) + } + + const onGroupBySelect = (event: any, data: any) => { + setGroupBy(data.value) + setSelectedData(undefined) + updateNav(data.value, undefined, start, end) + } + + const onSelect = (event: any, data: any) => { + setSelectedData(data.value) + updateNav(groupBy, data.value, start, end) + } + + const changeDate = (name: string, value: string) => { + let start_update = start + let end_update = end + if (name === 'start') start_update = value + if (name === 'end') end_update = value + setStart(start_update) + setEnd(end_update) + updateNav(groupBy, selectedData, start_update, end_update) + } + + const getData = (query: BillingTotalCostQueryModel) => { + setIsLoading(true) + setError(undefined) + new BillingApi() + .getTotalCost(query) + .then((response) => { + setIsLoading(false) + + // calc totals per cost_category + const recTotals = response.data.reduce( + ( + acc: { [key: string]: { [key: string]: number } }, + item: BillingTotalCostRecord + ) => { + const { cost_category, cost } = item + if (!acc[cost_category]) { + acc[cost_category] = 0 + } + acc[cost_category] += cost + return acc + }, + {} + ) + const sortedRecTotals: { [key: string]: number } = Object.fromEntries( + Object.entries(recTotals).sort(([, a], [, b]) => b - a) + ) + const rec_grps = Object.keys(sortedRecTotals) + const records = response.data.reduce( + ( + acc: { [key: string]: { [key: string]: number } }, + item: BillingTotalCostRecord + ) => { + const { day, cost_category, cost } = item + if (day !== undefined) { + if (!acc[day]) { + // initialise day structure + acc[day] = {} + rec_grps.forEach((k) => { + acc[day][k] = 0 + }) + } + acc[day][cost_category] = cost + } + return acc + }, + {} + ) + const no_undefined: string[] = rec_grps.filter( + (item): item is string => item !== undefined + ) + setGroups(no_undefined) + setData( + Object.keys(records).map((key) => ({ + date: new Date(key), + values: records[key], + })) + ) + const aggData: IData[] = Object.entries(sortedRecTotals) + .map(([label, value]) => ({ label, value })) + .reduce((acc: IData[], curr: IData, index: number, arr: IData[]) => { + if (index < maxDataPoints) { + acc.push(curr) + } else { + const restValue = arr + .slice(index) + .reduce((sum, { value }) => sum + value, 0) + + if (acc.length == maxDataPoints) { + acc.push({ label: 'Rest*', value: restValue }) + } else { + acc[maxDataPoints].value += restValue + } + } + return acc + }, []) + + setAggregatedData(aggData) + }) + .catch((er) => setError(er.message)) + } + + React.useEffect(() => { + if (selectedData !== undefined && selectedData !== '' && selectedData !== null) { + let source = 'aggregate' + if (groupBy === BillingColumn.GcpProject) { + source = 'gcp_billing' + } + if (selectedData.startsWith('All ')) { + getData({ + fields: [BillingColumn.Day, BillingColumn.CostCategory], + start_date: start, + end_date: end, + order_by: { day: false }, + source: source, + }) + } else { + getData({ + fields: [BillingColumn.Day, BillingColumn.CostCategory], + start_date: start, + end_date: end, + filters: { [groupBy.replace('-', '_').toLowerCase()]: selectedData }, + order_by: { day: false }, + source: source, + }) + } + } + }, [start, end, groupBy, selectedData]) + + if (error) { + return ( + setError(undefined)}> + {error} +
+ +
+ ) + } + + return ( + <> + +

+ Billing Cost By Time +

+ + + + + + + + + + + + + + changeDate('start', e.target.value)} + value={start} + /> + + + + changeDate('end', e.target.value)} + value={end} + /> + + + + + + + + + + + + + + + + + + +
+ + + + + ) +} + +export default BillingCostByTime diff --git a/web/src/pages/billing/BillingDashboard.tsx b/web/src/pages/billing/BillingDashboard.tsx deleted file mode 100644 index 0eff89d3a..000000000 --- a/web/src/pages/billing/BillingDashboard.tsx +++ /dev/null @@ -1,42 +0,0 @@ -import * as React from 'react' -import { Card, Input } from 'semantic-ui-react' -import SeqrProportionalMapGraph from './SeqrProportionalMapGraph' - -const BillingDashboard: React.FunctionComponent = () => { - const now = new Date() - const [start, setStart] = React.useState(`${now.getFullYear()}-01-01`) - const [end, setEnd] = React.useState( - `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` - ) - - return ( - -
-
- Billing Dashboard -
-
-
-
- Start - setStart(e.target.value)} value={start} /> - Finish - setEnd(e.target.value)} value={end} /> -
- -
- ) -} - -export default BillingDashboard diff --git a/web/src/pages/billing/BillingHome.tsx b/web/src/pages/billing/BillingHome.tsx new file mode 100644 index 000000000..a39afbf9c --- /dev/null +++ b/web/src/pages/billing/BillingHome.tsx @@ -0,0 +1,19 @@ +import * as React from 'react' + +import { ThemeContext } from '../../shared/components/ThemeProvider' + +interface IBillingHomeProps {} + +const BillingHome: React.FunctionComponent = (props: IBillingHomeProps) => { + const theme = React.useContext(ThemeContext) + // const isDarkMode = theme.theme === 'dark-mode' + + return ( +
+

Billing Homepage

+

This will probably where we have the main billing documentation

+
+ ) +} + +export default BillingHome diff --git a/web/src/pages/billing/BillingInvoiceMonthCost.tsx b/web/src/pages/billing/BillingInvoiceMonthCost.tsx new file mode 100644 index 000000000..d7793d265 --- /dev/null +++ b/web/src/pages/billing/BillingInvoiceMonthCost.tsx @@ -0,0 +1,377 @@ +import * as React from 'react' +import { Link, useSearchParams, useNavigate, useLocation } from 'react-router-dom' +import { Table as SUITable, Message, Button, Checkbox, Dropdown, Grid } from 'semantic-ui-react' +import _ from 'lodash' + +import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' +import Table from '../../shared/components/Table' +import { BillingApi, BillingColumn, BillingCostBudgetRecord } from '../../sm-api' + +import './Billing.css' +import FieldSelector from './components/FieldSelector' + +import { convertFieldName } from '../../shared/utilities/fieldName' + +const BillingCurrentCost = () => { + const [isLoading, setIsLoading] = React.useState(true) + const [openRows, setOpenRows] = React.useState([]) + + const [costRecords, setCosts] = React.useState([]) + const [error, setError] = React.useState() + const [sort, setSort] = React.useState<{ column: string | null; direction: string | null }>({ + column: 'undefined', + direction: 'undefined', + }) + + // Pull search params for use in the component + const [searchParams] = useSearchParams() + const inputGroupBy: string | null = searchParams.get('groupBy') + const fixedGroupBy: BillingColumn = inputGroupBy + ? (inputGroupBy as BillingColumn) + : BillingColumn.GcpProject + const inputInvoiceMonth = searchParams.get('invoiceMonth') + + // use navigate and update url params + const location = useLocation() + const navigate = useNavigate() + + const updateNav = (grp: BillingColumn, invoiceMonth: string | undefined) => { + let url = `${location.pathname}?groupBy=${grp}` + if (invoiceMonth) { + url += `&invoiceMonth=${invoiceMonth}` + } + navigate(url) + } + + // toISOString() will give you YYYY-MM-DDTHH:mm:ss.sssZ + // toISOString().substring(0, 7) will give you YYYY-MM + // .replace('-', '') will give you YYYYMM + const thisMonth = new Date().toISOString().substring(0, 7).replace('-', '') + + const [groupBy, setGroupBy] = React.useState( + fixedGroupBy ?? BillingColumn.GcpProject + ) + const [invoiceMonth, setInvoiceMonth] = React.useState(inputInvoiceMonth ?? thisMonth) + + const [lastLoadedDay, setLastLoadedDay] = React.useState() + + const getCosts = (grp: BillingColumn, invoiceMth: string | undefined) => { + updateNav(groupBy, invoiceMth) + setIsLoading(true) + setError(undefined) + let source = 'aggregate' + if (grp === BillingColumn.GcpProject) { + source = 'gcp_billing' + } + new BillingApi() + .getRunningCost(grp, invoiceMth, source) + .then((response) => { + setIsLoading(false) + setCosts(response.data) + setLastLoadedDay(response.data[0].last_loaded_day) + }) + .catch((er) => setError(er.message)) + } + + const onGroupBySelect = (event: any, data: any) => { + setGroupBy(data.value) + getCosts(data.value, invoiceMonth) + } + + const onInvoiceMonthSelect = (event: any, data: any) => { + setInvoiceMonth(data.value) + getCosts(groupBy, data.value) + } + + React.useEffect(() => { + getCosts(groupBy, invoiceMonth) + }, []) + + const HEADER_FIELDS = [ + { category: 'field', title: groupBy.toUpperCase(), show_always: true }, + { category: 'compute_daily', title: 'C', show_always: false }, + { category: 'storage_daily', title: 'S', show_always: false }, + { category: 'total_daily', title: 'Total', show_always: false }, + { category: 'compute_monthly', title: 'C', show_always: true }, + { category: 'storage_monthly', title: 'S', show_always: true }, + { category: 'total_monthly', title: 'Total', show_always: true }, + ] + + const handleToggle = (field: string) => { + if (!openRows.includes(field)) { + setOpenRows([...openRows, field]) + } else { + setOpenRows(openRows.filter((i) => i !== field)) + } + } + + function currencyFormat(num: number): string { + if (num === undefined || num === null) { + return '' + } + + return `$${num.toFixed(2).replace(/(\d)(?=(\d{3})+(?!\d))/g, '$1,')}` + } + + function percFormat(num: number): string { + if (num === undefined || num === null) { + return '' + } + + return `${num.toFixed(0).toString()} % ` + } + + if (error) + return ( + + {error} +
+ +
+ ) + + if (isLoading) + return ( +
+ +

+ This query takes a while... +

+
+ ) + + const handleSort = (clickedColumn: string) => { + if (sort.column !== clickedColumn) { + setSort({ column: clickedColumn, direction: 'ascending' }) + return + } + if (sort.direction === 'ascending') { + setSort({ column: clickedColumn, direction: 'descending' }) + return + } + setSort({ column: null, direction: null }) + } + + const checkDirection = (category: string) => { + if (sort.column === category && sort.direction !== null) { + return sort.direction === 'ascending' ? 'ascending' : 'descending' + } + return undefined + } + + const linkTo = (data: string) => { + // convert invoice month to start and end dates + const year = invoiceMonth.substring(0, 4) + const month = invoiceMonth.substring(4, 6) + let nextYear = year + let nextMonth = (parseInt(month, 10) + 1).toString() + if (month === '12') { + nextYear = (parseInt(year, 10) + 1).toString() + nextMonth = '01' + } + const startDate = `${year}-${month}-01` + const nextMth = new Date(`${nextYear}-${nextMonth}-01`) + nextMth.setDate(-0.01) + const endDate = nextMth.toISOString().substring(0, 10) + return `/billing/costByTime?groupBy=${groupBy}&selectedData=${data}&start=${startDate}&end=${endDate}` + } + + return ( + <> +

Billing By Invoice Month

+ + + + + + + + + + + + + + + + + + {invoiceMonth === thisMonth ? ( + + 24H (day UTC {lastLoadedDay}) + + ) : null} + + {groupBy === BillingColumn.GcpProject ? ( + + Invoice Month (Acc) + + ) : ( + + Invoice Month (Acc) + + )} + + + + + {HEADER_FIELDS.map((k) => { + switch (k.show_always || invoiceMonth === thisMonth) { + case true: + return ( + handleSort(k.category)} + style={{ + borderBottom: 'none', + position: 'sticky', + resize: 'horizontal', + }} + > + {convertFieldName(k.title)} + + ) + default: + return null + } + })} + + {groupBy === BillingColumn.GcpProject && invoiceMonth === thisMonth ? ( + handleSort('budget_spent')} + style={{ + borderBottom: 'none', + position: 'sticky', + resize: 'horizontal', + }} + > + Budget Spend % + + ) : null} + + + + {_.orderBy( + costRecords, + [sort.column], + sort.direction === 'ascending' ? ['asc'] : ['desc'] + ).map((p) => ( + + + + handleToggle(p.field)} + /> + + {HEADER_FIELDS.map((k) => { + switch (k.category) { + case 'field': + return ( + + + + {p[k.category]} + + + + ) + default: + switch (k.show_always || invoiceMonth === thisMonth) { + case true: + return ( + + {currencyFormat(p[k.category])} + + ) + default: + return null + } + } + })} + + {groupBy === BillingColumn.GcpProject && + invoiceMonth === thisMonth ? ( + {percFormat(p.budget_spent)} + ) : null} + + {typeof p === 'object' && + 'details' in p && + _.orderBy(p?.details, ['monthly_cost'], ['desc']).map((dk) => ( + + + {dk.cost_category} + + {dk.cost_group === 'C' ? ( + + {invoiceMonth === thisMonth ? ( + + + {currencyFormat(dk.daily_cost)} + + + + + ) : null} + + {currencyFormat(dk.monthly_cost)} + + + + ) : ( + + + {invoiceMonth === thisMonth ? ( + + + {currencyFormat(dk.daily_cost)} + + + + + ) : null} + + {currencyFormat(dk.monthly_cost)} + + + + )} + + {groupBy === BillingColumn.GcpProject ? ( + + ) : null} + + ))} + + ))} + +
+ + ) +} + +export default BillingCurrentCost diff --git a/web/src/pages/billing/BillingSeqrProp.tsx b/web/src/pages/billing/BillingSeqrProp.tsx new file mode 100644 index 000000000..cca54ed43 --- /dev/null +++ b/web/src/pages/billing/BillingSeqrProp.tsx @@ -0,0 +1,65 @@ +import * as React from 'react' +import { useLocation, useNavigate } from 'react-router-dom' +import { Grid, Card, Input } from 'semantic-ui-react' +import SeqrProportionalMapGraph from './components/SeqrProportionalMapGraph' + +const BillingSeqrProp: React.FunctionComponent = () => { + const now = new Date() + const [start, setStart] = React.useState(`${now.getFullYear()}-01-01`) + const [end, setEnd] = React.useState( + `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` + ) + + // use navigate and update url params + const location = useLocation() + const navigate = useNavigate() + + const updateNav = (start: string, end: string) => { + let url = `${location.pathname}` + if (start || end) url += '?' + + let params: string[] = [] + if (start) params.push(`start=${start}`) + if (end) params.push(`end=${end}`) + + url += params.join('&') + navigate(url) + } + + React.useEffect(() => { + updateNav(start, end) + }, [start, end]) + + return ( + +

+ Billing Seqr Proportionate Map over Time +

+ + + setStart(e.target.value)} + value={start} + /> + + + setEnd(e.target.value)} + value={end} + /> + + + +
+ ) +} + +export default BillingSeqrProp diff --git a/web/src/pages/billing/components/BillingCostByTimeTable.tsx b/web/src/pages/billing/components/BillingCostByTimeTable.tsx new file mode 100644 index 000000000..992ccaeca --- /dev/null +++ b/web/src/pages/billing/components/BillingCostByTimeTable.tsx @@ -0,0 +1,248 @@ +import { Checkbox, Grid, Header, Table as SUITable } from 'semantic-ui-react' +import Table from '../../../shared/components/Table' +import React from 'react' +import { BillingColumn } from '../../../sm-api' +import { convertFieldName } from '../../../shared/utilities/fieldName' +import LoadingDucks from '../../../shared/components/LoadingDucks/LoadingDucks' +import { IStackedAreaByDateChartData } from '../../../shared/components/Graphs/StackedAreaByDateChart' +import orderBy from '../../../shared/utilities/orderBy' +import { ErrorBarDataPointFormatter } from 'recharts/types/cartesian/ErrorBar' + +interface IBillingCostByTimeTableProps { + heading: string + start: string + end: string + groups: string[] + isLoading: boolean + data: IStackedAreaByDateChartData[] +} + +const BillingCostByTimeTable: React.FC = ({ + heading, + start, + end, + groups, + isLoading, + data, +}) => { + const [internalData, setInternalData] = React.useState([]) + const [internalGroups, setInternalGroups] = React.useState([]) + + // Format data + React.useEffect(() => { + setInternalData( + data.map((p) => { + let newP = { ...p } + const total = Object.values(p.values).reduce((acc, cur) => acc + cur, 0) + newP.values['Daily Total'] = total + newP.values['Compute Cost'] = total - p.values['Cloud Storage'] + return newP + }) + ) + + setInternalGroups(groups.concat(['Daily Total', 'Compute Cost'])) + }, [data, groups]) + + // Properties + const [expandCompute, setExpandCompute] = React.useState(false) + const [sort, setSort] = React.useState<{ column: string | null; direction: string | null }>({ + column: null, + direction: null, + }) + + // Header sort + const priorityColumns = ['Daily Total', 'Cloud Storage', 'Compute Cost'] + const headerSort = (a: string, b: string) => { + if (priorityColumns.includes(a) && priorityColumns.includes(b)) { + return priorityColumns.indexOf(a) < priorityColumns.indexOf(b) ? -1 : 1 + } else if (priorityColumns.includes(a)) { + return -1 + } else if (priorityColumns.includes(b)) { + return 1 + } + return a < b ? -1 : 1 + } + + const headerFields = () => { + if (expandCompute) { + return internalGroups + .sort(headerSort) + .filter((group) => group != 'Compute Cost') + .map((group: string) => ({ + category: group, + title: group, + })) + } + return [ + { + category: 'Daily Total', + title: 'Daily Total', + }, + { + category: 'Cloud Storage', + title: 'Cloud Storage', + }, + { + category: 'Compute Cost', + title: 'Compute Cost', + }, + ] + } + + const handleSort = (clickedColumn: string) => { + if (sort.column !== clickedColumn) { + setSort({ column: clickedColumn, direction: 'ascending' }) + return + } + if (sort.direction === 'ascending') { + setSort({ column: clickedColumn, direction: 'descending' }) + return + } + setSort({ column: null, direction: null }) + } + + const checkDirection = (category: string) => { + if (sort.column === category && sort.direction !== null) { + return sort.direction === 'ascending' ? 'ascending' : 'descending' + } + return undefined + } + + const currencyFormat = (num: number): string => { + if (num === undefined || num === null) { + return '' + } + + return `$${num.toFixed(2).replace(/(\d)(?=(\d{3})+(?!\d))/g, '$1,')}` + } + + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + const dataSort = ( + data: IStackedAreaByDateChartData[], + props: string[], + orders?: ('asc' | 'desc')[] + ) => + [...data].sort( + (a, b) => + props.reduce((acc, prop, i) => { + if (acc === 0) { + const [p1, p2] = + orders && orders[i] === 'desc' + ? [ + b.values[prop as keyof typeof b], + a.values[prop as keyof typeof a], + ] + : [ + a.values[prop as keyof typeof a], + b.values[prop as keyof typeof b], + ] + acc = p1 > p2 ? 1 : p1 < p2 ? -1 : 0 + } + return acc + }, 0) as number // explicitly cast the result to a number + ) + + const dataToBody = (data: IStackedAreaByDateChartData[]) => ( + <> + {dataSort( + data, + sort.column ? [sort.column] : [], + sort.direction === 'ascending' ? ['asc'] : ['desc'] + ).map((p) => ( + + + + {p.date.toLocaleDateString()} + + {headerFields().map((k) => ( + {currencyFormat(p.values[k.category])} + ))} + + + ))} + + ) + + return ( + <> +
+ {convertFieldName(heading)} costs from {start} to {end} +
+ + + + + setExpandCompute(!expandCompute)} + /> + + Storage Cost + + Compute Cost + + + + + Date + + {headerFields().map((k) => ( + handleSort(k.category)} + style={{ + borderBottom: 'none', + position: 'sticky', + resize: 'horizontal', + }} + > + {convertFieldName(k.title)} + + ))} + + + + {dataToBody(internalData)} + + + All Time Total + + {headerFields().map((k) => ( + + + {currencyFormat( + internalData.reduce( + (acc, cur) => acc + cur.values[k.category], + 0 + ) + )} + + + ))} + + +
+ + ) +} + +export default BillingCostByTimeTable diff --git a/web/src/pages/billing/components/CostByTimeChart.tsx b/web/src/pages/billing/components/CostByTimeChart.tsx new file mode 100644 index 000000000..4eeff8e90 --- /dev/null +++ b/web/src/pages/billing/components/CostByTimeChart.tsx @@ -0,0 +1,54 @@ +import * as React from 'react' +import _ from 'lodash' + +import LoadingDucks from '../../../shared/components/LoadingDucks/LoadingDucks' +import { + IStackedAreaByDateChartData, + StackedAreaByDateChart, +} from '../../../shared/components/Graphs/StackedAreaByDateChart' + +interface ICostByTimeChartProps { + start: string + end: string + groups: string[] + isLoading: boolean + data: IStackedAreaByDateChartData[] +} + +const CostByTimeChart: React.FunctionComponent = ({ + start, + end, + groups, + isLoading, + data, +}) => { + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + return ( + <> + + + ) +} + +export default CostByTimeChart diff --git a/web/src/pages/billing/components/FieldSelector.tsx b/web/src/pages/billing/components/FieldSelector.tsx new file mode 100644 index 000000000..e6e86450a --- /dev/null +++ b/web/src/pages/billing/components/FieldSelector.tsx @@ -0,0 +1,143 @@ +import * as React from 'react' + +import { Dropdown, Input, Message } from 'semantic-ui-react' + +import { BillingApi, BillingColumn } from '../../../sm-api' + +import '../Billing.css' + +import { convertFieldName } from '../../../shared/utilities/fieldName' + +interface FieldSelectorProps { + label: string + fieldName: string + selected?: string + includeAll?: boolean + onClickFunction: (_: any, { value }: any) => void +} + +const FieldSelector: React.FunctionComponent = ({ + label, + fieldName, + selected, + includeAll, + onClickFunction, +}) => { + const [loading, setLoading] = React.useState(true) + const [error, setError] = React.useState() + const [records, setRecords] = React.useState([]) + + const extendRecords = (records: string[]) => { + if (includeAll) { + if (fieldName === 'GCP-Project') { + return [`All ${convertFieldName(fieldName)}`, ...records] + } + return [`All ${convertFieldName(fieldName)}s`, ...records] + } + return records + } + + const getTopics = () => { + setLoading(true) + setError(undefined) + new BillingApi() + .getTopics() + .then((response) => { + setLoading(false) + setRecords(extendRecords(response.data)) + }) + .catch((er) => setError(er.message)) + } + + const getGcpProjects = () => { + setLoading(true) + setError(undefined) + new BillingApi() + .getGcpProjects() + .then((response) => { + setLoading(false) + setRecords(extendRecords(response.data)) + }) + .catch((er) => setError(er.message)) + } + + const getInvoiceMonths = () => { + setLoading(true) + setError(undefined) + new BillingApi() + .getInvoiceMonths() + .then((response) => { + setLoading(false) + setRecords(extendRecords(response.data)) + }) + .catch((er) => setError(er.message)) + } + + React.useEffect(() => { + if (fieldName === BillingColumn.Topic) getTopics() + else if (fieldName === BillingColumn.InvoiceMonth) getInvoiceMonths() + else if (fieldName === 'Group') { + setRecords([BillingColumn.GcpProject, BillingColumn.Topic]) + setLoading(false) + } else if (fieldName === BillingColumn.GcpProject) getGcpProjects() + else { + setError(`Could not load records for ${fieldName}`) + } + }, [label, fieldName]) + + const capitalize = (str: string): string => { + if (str === 'gcp_project') { + return 'GCP-Project' + } + return str.charAt(0).toUpperCase() + str.slice(1) + } + + const recordsMap = (records: any[]) => { + if (fieldName === 'Group') { + return records.map((p: BillingColumn) => ({ + key: p, + text: capitalize(p), + value: p, + })) + } + return records.map((p: string) => ({ + key: p, + text: p, + value: p, + })) + } + + if (error) { + return ( + +

An error occurred while getting projects

+

{JSON.stringify(error)}

+
+ ) + } + + return ( + + } + /> + ) +} + +export default FieldSelector diff --git a/web/src/pages/billing/SeqrProportionalMapGraph.tsx b/web/src/pages/billing/components/SeqrProportionalMapGraph.tsx similarity index 76% rename from web/src/pages/billing/SeqrProportionalMapGraph.tsx rename to web/src/pages/billing/components/SeqrProportionalMapGraph.tsx index 14e3c14b0..4991e4748 100644 --- a/web/src/pages/billing/SeqrProportionalMapGraph.tsx +++ b/web/src/pages/billing/components/SeqrProportionalMapGraph.tsx @@ -1,13 +1,13 @@ import * as React from 'react' import _ from 'lodash' -import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' -import { AnalysisApi, Project, ProjectApi, ProportionalDateTemporalMethod } from '../../sm-api' -import { Message, Select } from 'semantic-ui-react' +import LoadingDucks from '../../../shared/components/LoadingDucks/LoadingDucks' +import { AnalysisApi, Project, ProjectApi, ProportionalDateTemporalMethod } from '../../../sm-api' +import { Grid, Message, Select } from 'semantic-ui-react' import { IStackedAreaByDateChartData, StackedAreaByDateChart, -} from '../../shared/components/Graphs/StackedAreaByDateChart' +} from '../../../shared/components/Graphs/StackedAreaByDateChart' interface IProportionalDateProjectModel { project: string @@ -25,11 +25,6 @@ interface ISeqrProportionalMapGraphProps { end: string } -const TEMPORAL_METHODS_TO_DISPLAY = [ - ProportionalDateTemporalMethod.SampleCreateDate, - ProportionalDateTemporalMethod.EsIndexDate, -] - const SeqrProportionalMapGraph: React.FunctionComponent = ({ start, end, @@ -38,7 +33,7 @@ const SeqrProportionalMapGraph: React.FunctionComponent() const [temporalMethod, setTemporalMethod] = React.useState( - TEMPORAL_METHODS_TO_DISPLAY[0] + ProportionalDateTemporalMethod.SampleCreateDate ) const [projectSelections, setProjectSelections] = React.useState< { [key: string]: boolean } | undefined @@ -69,7 +64,10 @@ const SeqrProportionalMapGraph: React.FunctionComponent projectSelections[project])) : [] + const chart = ( + <> + + + ({ - key: m, - text: m, - value: m, - }))} - value={temporalMethod} - onChange={(e, { value }) => { - setTemporalMethod(value as ProportionalDateTemporalMethod) - }} - /> - ) } diff --git a/web/src/pages/billing/index.ts b/web/src/pages/billing/index.ts new file mode 100644 index 000000000..cdb6832fb --- /dev/null +++ b/web/src/pages/billing/index.ts @@ -0,0 +1,4 @@ +export { default as BillingHome } from "./BillingHome"; +export { default as BillingSeqrProp } from "./BillingSeqrProp"; +export { default as BillingCostByTime } from "./BillingCostByTime"; +export { default as BillingInvoiceMonthCost } from "./BillingInvoiceMonthCost"; diff --git a/web/src/shared/components/Graphs/BarChart.tsx b/web/src/shared/components/Graphs/BarChart.tsx new file mode 100644 index 000000000..67d88fff7 --- /dev/null +++ b/web/src/shared/components/Graphs/BarChart.tsx @@ -0,0 +1,126 @@ +import React from 'react' +import { axisBottom, axisLeft, scaleBand, scaleLinear, select, interpolateRainbow } from 'd3' +import LoadingDucks from '../LoadingDucks/LoadingDucks' +import formatMoney from '../../utilities/formatMoney' + +export interface IData { + label: string + value: number +} + +interface BarChartProps { + data: IData[] + maxSlices: number + colors: (t: number) => string | undefined + isLoading: boolean +} + +export const BarChart: React.FC = ({ data, maxSlices, colors, isLoading }) => { + if (!data || data.length === 0) { + return
No data available
+ } + + const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow + const margin = { top: 50, right: 0, bottom: 150, left: 100 } + // const width = 1000 - margin.left - margin.right; + const height = 400 - margin.top - margin.bottom + + const containerDivRef = React.useRef() + const [width, setWidth] = React.useState(768) + + const scaleX = scaleBand() + .domain(data.map(({ label }) => label)) + .range([0, width - margin.left - margin.right]) + .padding(0.5) + const scaleY = scaleLinear() + .domain([0, Math.max(...data.map(({ value }) => value))]) + .range([height, 0]) + + React.useEffect(() => { + function updateWindowWidth() { + setWidth(containerDivRef.current?.clientWidth || 768) + } + if (containerDivRef.current) { + updateWindowWidth() + } + window.addEventListener('resize', updateWindowWidth) + + return () => { + window.removeEventListener('resize', updateWindowWidth) + } + }, []) + + const contDiv = containerDivRef.current + if (contDiv) { + // reset svg + contDiv.innerHTML = '' + + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + // construct svg + const svg = select(contDiv) + .append('svg') + .attr('width', `${width + margin.left + margin.right}`) + .attr('height', `${height + margin.top + margin.bottom}`) + .append('g') + .attr('transform', `translate(${margin.left}, ${margin.top})`) + + // Series + svg.selectAll('whatever') + .data(data) + .enter() + .append('rect') + .attr('key', (d) => `bar-${d.label}`) + .attr('x', (d) => scaleX(d.label)) + .attr('y', (d) => scaleY(d.value)) + .attr('id', (d, i) => `rect${i}`) + .attr('width', scaleX.bandwidth()) + .attr('height', (d) => height - scaleY(d.value)) + .attr('fill', (d, i) => colorFunc(i / maxSlices)) + .attr('stroke', '#fff') + + // Axis Left + svg.append('g') + .call(axisLeft(scaleY)) + .selectAll('text') + .style('text-anchor', 'end') + .style('font-size', '1.5em') + .attr('transform', 'translate(-10, 0)') + + // Axis Bottom + svg.append('g') + .attr('transform', `translate(0, ${height})`) + .call(axisBottom(scaleX)) + .selectAll('text') + .style('text-anchor', 'end') + .style('font-size', '1.5em') + .attr('id', (d, i) => `lgd${i}`) + .attr('transform', 'translate(-10, 0) rotate(-25)') + + // Labels + svg.append('g') + .attr('text-anchor', 'middle') + .style('font-size', '1.1em') + .selectAll('text') + .data(data) + .join('text') + .attr('transform', (d) => `translate(${scaleX(d.label)},${scaleY(d.value) - 5})`) + .attr('dx', '2em') + .attr('id', (d, i) => `lbl${i}`) + .selectAll('tspan') + .data((d) => `${formatMoney(d.value)}`.split(/\n/)) + .join('tspan') + .attr('font-weight', (_, i) => (i ? null : 'normal')) + .text((d) => d) + } + return
+} diff --git a/web/src/shared/components/Graphs/DonutChart.tsx b/web/src/shared/components/Graphs/DonutChart.tsx new file mode 100644 index 000000000..2a6fc4713 --- /dev/null +++ b/web/src/shared/components/Graphs/DonutChart.tsx @@ -0,0 +1,214 @@ +import React from 'react' +import { select, interpolateRainbow, pie, arc } from 'd3' +import LoadingDucks from '../LoadingDucks/LoadingDucks' +import formatMoney from '../../utilities/formatMoney' + +export interface IDonutChartData { + label: string + value: number +} + +export interface IDonutChartProps { + data?: IDonutChartData[] + maxSlices: number + colors: (t: number) => string | undefined + isLoading: boolean +} + +interface IDonutChartPreparadData { + index: number + startAngle: number + endAngle: number + data: IDonutChartData +} + +function calcTranslate(data: IDonutChartPreparadData, move = 4) { + const moveAngle = data.startAngle + (data.endAngle - data.startAngle) / 2 + return `translate(${-2 * move * Math.cos(moveAngle + Math.PI / 2)}, ${ + -2 * move * Math.sin(moveAngle + Math.PI / 2) + })` +} + +export const DonutChart: React.FC = ({ data, maxSlices, colors, isLoading }) => { + if (!data || data.length === 0) { + return
No data available
+ } + const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow + const duration = 250 + const containerDivRef = React.useRef() + const [graphWidth, setGraphWidth] = React.useState(768) + + const onHoverOver = (tg: HTMLElement, v: IDonutChartPreparadData) => { + select(`#lbl${v.index}`).select('tspan').attr('font-weight', 'bold') + select(`#legend${v.index}`).attr('font-weight', 'bold') + select(`#lgd${v.index}`).attr('font-weight', 'bold') + select(tg).transition().duration(duration).attr('transform', calcTranslate(v, 6)) + select(tg) + .select('path') + .transition() + .duration(duration) + .attr('stroke', 'rgba(100, 100, 100, 0.2)') + .attr('stroke-width', 4) + select(tg) + } + + const onHoverOut = (tg: HTMLElement, v: IDonutChartPreparadData) => { + select(`#lbl${v.index}`).select('tspan').attr('font-weight', 'normal') + select(`#legend${v.index}`).attr('font-weight', 'normal') + select(`#lgd${v.index}`).attr('font-weight', 'normal') + select(tg).transition().duration(duration).attr('transform', 'translate(0, 0)') + select(tg) + .select('path') + .transition() + .duration(duration) + .attr('stroke', 'white') + .attr('stroke-width', 1) + } + + const width = graphWidth + const height = width + const margin = 15 + const radius = Math.min(width, height) / 2 - margin + + // keep order of the slices + const pieFnc = pie() + .value((d) => d.value) + .sort((a) => { + if (typeof a === 'object' && a.type === 'inc') { + return 1 + } + return -1 + }) + const data_ready = pieFnc(data) + const innerRadius = radius / 1.75 // inner radius of pie, in pixels (non-zero for donut) + const outerRadius = radius // outer radius of pie, in pixels + const labelRadius = outerRadius * 0.8 // center radius of labels + const arcData = arc().innerRadius(innerRadius).outerRadius(outerRadius) + const arcLabel = arc().innerRadius(labelRadius).outerRadius(labelRadius) + + React.useEffect(() => { + function updateWindowWidth() { + setGraphWidth(containerDivRef.current?.clientWidth || 768) + } + if (containerDivRef.current) { + updateWindowWidth() + } + window.addEventListener('resize', updateWindowWidth) + + return () => { + window.removeEventListener('resize', updateWindowWidth) + } + }, []) + + const contDiv = containerDivRef.current + if (contDiv) { + // reset svg + contDiv.innerHTML = '' + + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + // construct svg + const svg = select(contDiv) + .append('svg') + .attr('width', '55%') + .attr('height', '100%') + .attr('viewBox', `0 0 ${width} ${width}`) + .append('g') + .attr( + 'transform', + `translate(${Math.min(width, height) / 2}, ${Math.min(width, height) / 2})` + ) + + // Donut partitions + svg.selectAll('whatever') + .data(data_ready) + .enter() + .append('path') + .attr('d', arcData) + .attr('fill', (d) => colorFunc(d.index / maxSlices)) + .attr('stroke', '#fff') + .style('stroke-width', '2') + .style('opacity', '0.8') + .style('cursor', 'pointer') + .attr('id', (d) => `path${d.index}`) + .on('mouseover', (event, v) => { + onHoverOver(event.currentTarget, v) + }) + .on('mouseout', (event, v) => { + onHoverOut(event.currentTarget, v) + }) + .append('title') + .text((d) => `${d.data.label} ${d.data.value}`) + .style('text-anchor', 'middle') + .style('font-size', 17) + + // labels + svg.append('g') + .attr('font-family', 'sans-serif') + .attr('font-size', '1.5em') + .attr('text-anchor', 'middle') + .selectAll('text') + .data(data_ready) + .join('text') + .attr('transform', (d) => `translate(${arcLabel.centroid(d)})`) + .attr('id', (d) => `lbl${d.index}`) + .selectAll('tspan') + .data((d) => { + const lines = `${formatMoney(d.data.value)}`.split(/\n/) + return d.endAngle - d.startAngle > 0.25 ? lines : lines.slice(0, 1) + }) + .join('tspan') + .attr('x', 0) + .attr('y', (_, i) => `${i * 2.1}em`) + .attr('font-weight', (_, i) => (i ? null : 'normal')) + .text((d) => d) + + // add legend + const svgLegend = select(contDiv) + .append('svg') + .attr('width', '45%') + .attr('viewBox', '0 0 200 200') + .attr('vertical-align', 'top') + + svgLegend + .selectAll('g.legend') + .data(data_ready) + .enter() + .append('g') + .attr('transform', (d) => `translate(${margin},${margin + d.index * 20})`) + .each(function (d, i) { + select(this) + .append('circle') + .attr('r', 8) + .attr('fill', (d) => colorFunc(d.index / maxSlices)) + select(this) + .append('text') + .attr('text-anchor', 'start') + .attr('x', 20) + .attr('y', 0) + .attr('dy', '0.35em') + .attr('id', (d) => `legend${d.index}`) + .text(d.data.label) + .attr('font-size', '0.9em') + select(this) + .on('mouseover', (event, v) => { + const element = select(`#path${d.index}`) + onHoverOver(element.node(), d) + }) + .on('mouseout', (event, v) => { + const element = select(`#path${d.index}`) + onHoverOut(element.node(), d) + }) + }) + } + return
+} diff --git a/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx b/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx index f909ccd0c..08ebf3d34 100644 --- a/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx +++ b/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx @@ -12,6 +12,7 @@ import { interpolateRainbow, TimeInterval, utcHour, + stackOffsetNone, } from 'd3' import _ from 'lodash' import React from 'react' @@ -29,6 +30,12 @@ interface IStackedAreaByDateChartProps { data?: IStackedAreaByDateChartData[] keys: string[] isPercentage: boolean + xLabel: string + yLabel: string + seriesLabel: string + extended?: boolean + showDate?: boolean + colors?: (t: number) => string } function getDisplayValue(value: number, isPercentage: boolean) { @@ -42,10 +49,12 @@ function getTimeInterval(timeDiffMinutes: number) { if (timeDiffMinutes < 60 * 24) { // less than one day return utcHour.every(1) - } else if (timeDiffMinutes < 60 * 24 * 28) { + } + if (timeDiffMinutes < 60 * 24 * 28) { // less than one month return utcDay.every(1) - } else if (timeDiffMinutes < 60 * 24 * 365) { + } + if (timeDiffMinutes < 60 * 24 * 365) { // less than one year return utcMonth.every(1) } @@ -59,11 +68,19 @@ export const StackedAreaByDateChart: React.FC = ({ start, end, isPercentage, + xLabel, + yLabel, + seriesLabel, + extended, + showDate, + colors, }) => { if (!data || data.length === 0) { return } + const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow + const tooltipRef = React.useRef() const containerDivRef = React.useRef() const [hoveredIndex, setHoveredIndex] = React.useState(null) @@ -105,9 +122,10 @@ export const StackedAreaByDateChart: React.FC = ({ const id = '1' // d3 function that turns the data into stacked proportions - const stackedData = stack().offset(stackOffsetExpand).keys(keys)( - data.map((d) => ({ date: d.date, ...d.values })) - ) + const stackedData = stack() + .offset(extended ? stackOffsetExpand : stackOffsetNone) + .keys(keys)(data.map((d) => ({ date: d.date, ...d.values }))) + // function for generating the x Axis // domain refers to the min and max of the data (in this case earliest and latest dates) // range refers to the min and max pixel positions on the screen @@ -116,9 +134,16 @@ export const StackedAreaByDateChart: React.FC = ({ .domain(extent(data, (d) => d.date)) // date is a string, will this take a date object? Yes :) .range([0, width - margin.left - margin.right]) + // use last stackData value to calculate max Y axis point + const diffX = stackedData[stackedData.length - 1].flatMap((val) => val[1]) + // function for generating the y Axis // no domain needed as it defaults to [0, 1] which is appropriate for proportions - const yScale = scaleLinear().range([height - margin.top - margin.bottom, 0]) + const yScale = extended + ? scaleLinear().range([height - margin.top - margin.bottom, 0]) + : scaleLinear() + .domain([0, Math.max(...diffX.flatMap((val) => val))]) + .range([height - margin.top - margin.bottom, 0]) // function that assigns each category a colour // can fiddle with the schemeAccent parameter for different colour scales - see https://d3js.org/d3-scale-chromatic/categorical#schemeAccent @@ -215,10 +240,16 @@ then to draw in svg you just need to give coordinates. We've specified the width cursor="help" > {/* change this for different date formats */} - {`${tick.toLocaleString('en-us', { - month: 'short', - year: 'numeric', - })}`} + {showDate + ? `${tick.toLocaleString('en-us', { + day: 'numeric', + month: 'short', + year: 'numeric', + })}` + : `${tick.toLocaleString('en-us', { + month: 'short', + year: 'numeric', + })}`} {/* this is the tiny vertical tick line that getting drawn (6 pixels tall) */} @@ -265,7 +296,7 @@ then to draw in svg you just need to give coordinates. We've specified the width return } - const colour = interpolateRainbow(i / keys.length) + const colour = colorFunc(i / keys.length) // @ts-ignore const key = keys[i] const date = data[j]?.date @@ -351,7 +382,7 @@ then to draw in svg you just need to give coordinates. We've specified the width fontSize={20} textAnchor="middle" > - {'Date'} + {xLabel} @@ -361,12 +392,12 @@ then to draw in svg you just need to give coordinates. We've specified the width transform={`rotate(-90) translate(-${innerHeight / 2}, -60)`} > - {'Proportion'} + {yLabel} - Projects + {seriesLabel} {keys.map((project, i) => ( {project} diff --git a/web/src/shared/components/Header/DarkModeTriButton/DarkModeTriButton.css b/web/src/shared/components/Header/DarkModeTriButton/DarkModeTriButton.css index fde1fa15e..debecf4a4 100644 --- a/web/src/shared/components/Header/DarkModeTriButton/DarkModeTriButton.css +++ b/web/src/shared/components/Header/DarkModeTriButton/DarkModeTriButton.css @@ -1,19 +1,22 @@ .switch-toggle { float: left; background: #242729; - margin-right: 20px; + /* margin-right: 20px; */ border-radius: 25px; } + .switch-toggle input { position: absolute; opacity: 0; } + .switch-toggle input + label { padding: 7px; float: left; color: #fff; cursor: pointer; } + .switch-toggle input:checked + label { background: green; border-radius: 25px; diff --git a/web/src/shared/components/Header/NavBar.css b/web/src/shared/components/Header/NavBar.css index b674a87c4..b4e81f651 100644 --- a/web/src/shared/components/Header/NavBar.css +++ b/web/src/shared/components/Header/NavBar.css @@ -1,46 +1,90 @@ .App-header { background-color: #24344f; - position: fixed; + position: sticky; top: 0; width: 100%; z-index: 999; - padding-top: 10px; - padding-bottom: 10px; + padding: 0 8px; /* height: 50px; */ } -.metamist { - font-family: 'Raleway', sans-serif; +.App-header a { + color: white !important; + text-decoration: none; +} + +.App-header div { color: white; - font-size: 28px; - /* display: inline-block; */ + text-decoration: none; +} + +.navItem { + padding: 8px; } -.header { +.ui.header.menu { background-color: #24344f; - display: flex; - align-items: center; - padding: 0px 80px 0px 80px; + box-shadow: none; + border: none; } -.metamist-img { - vertical-align: bottom; +#navDrop { + background: #24344f !important; + top: 150% !important; } -.navbarLink { +.ui.menu .ui.dropdown .menu > #navItem { + color: #f2f2f2 !important; + padding: 16px !important; + background: #24344f !important; + + &:hover { + color: #f2f2f2 !important; + background: #0f1827 !important; + } +} + +.ui.menu .item { + padding: 8px !important; +} + +.navbarText { color: #f2f2f2; - padding: 0 16px; text-decoration: none; font-size: 17px; display: inline-block; } .navbarIcon { - padding: 0 8px; + padding: 8px; display: inline-block; + color: #f2f2f2; } -.App-header a { +.navPopup { + background-color: #24344f; + color: #f2f2f2; +} + +.navbarLink { + padding: 8px; +} + +#metamist { + font-family: 'Raleway', sans-serif; color: white; - text-decoration: none; + font-size: 28px; + /* padding: 0 8px; */ +} + +#metamist-img { + vertical-align: bottom; + font-size: 28px; + font-weight: bold; + padding: 16px; + + &:hover { + cursor: pointer; + background-color: #24344f; + } } diff --git a/web/src/shared/components/Header/NavBar.tsx b/web/src/shared/components/Header/NavBar.tsx index 1b9cd09f0..420371ced 100644 --- a/web/src/shared/components/Header/NavBar.tsx +++ b/web/src/shared/components/Header/NavBar.tsx @@ -1,80 +1,174 @@ import * as React from 'react' import { Link } from 'react-router-dom' -import { Popup } from 'semantic-ui-react' +import { Menu, Dropdown, Popup } from 'semantic-ui-react' + +import { BillingApi } from '../../../sm-api' // this wasn't working, so added import to HTML // import 'bootstrap/dist/css/bootstrap.min.css' -import ExploreIcon from '@mui/icons-material/Explore' -import DescriptionIcon from '@mui/icons-material/Description' -import InsightsIcon from '@mui/icons-material/Insights' -import BuildIcon from '@mui/icons-material/Build' import Searchbar from './Search' import MuckTheDuck from '../MuckTheDuck' -import DarkModeTriButton from './DarkModeTriButton/DarkModeTriButton' import SwaggerIcon from '../SwaggerIcon' +import HomeIcon from '@mui/icons-material/Home' +import ExploreIcon from '@mui/icons-material/Explore' +import InsightsIcon from '@mui/icons-material/Insights' +import TableRowsIcon from '@mui/icons-material/TableRows' +import AttachMoneyIcon from '@mui/icons-material/AttachMoney' +import DescriptionIcon from '@mui/icons-material/Description' +import TroubleshootIcon from '@mui/icons-material/Troubleshoot' +import DarkModeTriButton from './DarkModeTriButton/DarkModeTriButton' + +import { ThemeContext } from '../ThemeProvider' + import './NavBar.css' -const NavBar: React.FunctionComponent = () => ( -
-
- - - - - - METAMIST - - - - Explore - - } hoverable position="bottom center"> -
Explore
-
-
- - - Analysis Runner - - } hoverable position="bottom center"> -
Analysis Runner
-
-
- - - Swagger - - } - hoverable - position="bottom center" - > -
Swagger
-
-
- - - Docs - - } hoverable position="bottom center"> -
Docs
-
-
- - - GraphQL - - } hoverable position="bottom center"> -
GraphQL
-
-
-
-
- -
- -
-
-) +const billingPages = { + title: 'Billing', + url: '/billing', + icon: , + submenu: [ + { + title: 'Home', + url: '/billing', + icon: , + }, + { + title: 'Invoice Month Cost', + url: '/billing/invoiceMonthCost', + icon: , + }, + { + title: 'Cost By Time', + url: '/billing/costByTime', + icon: , + }, + { + title: 'Seqr Prop Map', + url: '/billing/seqrPropMap', + icon: , + }, + ], +} + +interface MenuItem { + title: string + url: string + icon: JSX.Element + submenu?: MenuItem[] +} +interface MenuItemProps { + index: number + item: MenuItem +} + +const MenuItem: React.FC = ({ index, item }) => { + const theme = React.useContext(ThemeContext) + const isDarkMode = theme.theme === 'dark-mode' + + const dropdown = (item: MenuItem) => ( + + + {item.submenu && + item.submenu.map((subitem, subindex) => ( + + {subitem.title} + + ))} + + + ) + + const popup = (child: React.ReactNode, icon: JSX.Element) => ( + <> + {child} + + +
{child}
+
+
+ + ) + + return item.submenu ? ( + {popup(dropdown(item), item.icon)} + ) : ( + + {popup(item.title, item.icon)} + + ) +} + +interface NavBarProps { + fixed?: boolean +} + +const NavBar: React.FC = ({ fixed }) => { + const [menuItems, setMenuItems] = React.useState([ + { + title: 'Explore', + url: '/project', + icon: , + }, + { + title: 'Analysis Runner', + url: '/analysis-runner', + icon: , + }, + { + title: 'Swagger', + url: '/swagger', + icon: , + }, + { + title: 'Docs', + url: '/documentation', + icon: , + }, + { + title: 'GraphQL', + url: '/graphql', + icon: , + }, + ]) + + React.useEffect(() => { + new BillingApi().getTopics().then((response) => { + if (response.status === 200) { + setMenuItems([...menuItems.slice(0, 2), billingPages, ...menuItems.slice(2)]) + } + }) + }, []) + + return ( +
+ + + + METAMIST + + + {menuItems.map((item, index) => ( + + ))} + + + + + + + + + + + +
+ ) +} export default NavBar diff --git a/web/src/shared/components/Header/Search.css b/web/src/shared/components/Header/Search.css index 9e899d185..8b4a0c9c6 100644 --- a/web/src/shared/components/Header/Search.css +++ b/web/src/shared/components/Header/Search.css @@ -2,6 +2,7 @@ display: flex; align-items: center; height: 38px; + min-width: 38px; } @media (max-width: 1350px) { @@ -45,7 +46,7 @@ } .ui.search.nav-searchbar.focus #navsearch { - width: 363px; + width: 363px !important; padding: 10px; margin-top: 8px; opacity: 1; diff --git a/web/src/shared/components/ResponsiveContainer.tsx b/web/src/shared/components/ResponsiveContainer.tsx new file mode 100644 index 000000000..4c9831ab0 --- /dev/null +++ b/web/src/shared/components/ResponsiveContainer.tsx @@ -0,0 +1,198 @@ +/* + Following Semantic UI's example for a responsive webpage + See: https://github.com/Semantic-Org/Semantic-UI-React/blob/master/docs/src/layouts/HomepageLayout.js +*/ + +import { createMedia } from '@artsy/fresnel' +import * as React from 'react' +import PropTypes from 'prop-types' +import { InView } from 'react-intersection-observer' +import { + Button, + Container, + Divider, + Grid, + Header, + Icon, + Image, + List, + Menu, + Segment, + Sidebar, +} from 'semantic-ui-react' +import NavBar from './Header/NavBar' + +const { MediaContextProvider, Media } = createMedia({ + breakpoints: { + mobile: 0, + tablet: 768, + computer: 1024, + }, +}) + +/* Heads up! + * HomepageHeading uses inline styling, however it's not the best practice. Use CSS or styled + * components for such things. + */ + +interface HomepageHeadingProps { + mobile?: boolean +} + +const HomepageHeading: React.FC = ({ mobile }) => ( + +
+
+ + +) + +interface DesktopContainerProps { + children: React.ReactNode +} + +const DesktopContainer: React.FC = ({ children }) => { + const [fixed, toggleFixedMenu] = React.useState(true) + + return ( + + + + + + + Home + + Work + Company + Careers + + + + + + + + + + + {children} + + ) +} + +interface MobileContainerProps { + children: React.ReactNode +} + +const MobileContainer: React.FC = ({ children }) => { + const [sidebarOpened, setSidebarOpened] = React.useState(false) + + const handleSidebarHide = () => setSidebarOpened(false) + const handleToggle = () => setSidebarOpened(true) + + return ( + + + + + Home + + Work + Company + Careers + Log in + Sign Up + + + + + + + + + + + + + + + + + + + {children} + + + + ) +} + +interface ResponsiveContainerProps { + children: React.ReactNode +} + +const ResponsiveContainer: React.FC = ({ children }) => ( + /* Heads up! + * For large applications it may not be best option to put all page into these containers at + * they will be rendered twice for SSR. + */ + + {children} + {children} + +) + +export default ResponsiveContainer diff --git a/web/src/shared/utilities/fieldName.ts b/web/src/shared/utilities/fieldName.ts new file mode 100644 index 000000000..b14ca38af --- /dev/null +++ b/web/src/shared/utilities/fieldName.ts @@ -0,0 +1,10 @@ +const convertFieldName = (fieldName: string | undefined) => { + if (!fieldName) return '' + + return fieldName.replaceAll('_', ' ').replace('-', ' ').split(' ').map((word) => { + if (word === 'gcp') return word.toUpperCase() + return word[0].toUpperCase() + word.slice(1) + }).join(' ') +} + +export { convertFieldName } diff --git a/web/src/shared/utilities/formatMoney.ts b/web/src/shared/utilities/formatMoney.ts new file mode 100644 index 000000000..3a270b7fc --- /dev/null +++ b/web/src/shared/utilities/formatMoney.ts @@ -0,0 +1,3 @@ +const formatMoney = (val: number): string => `$${val.toFixed(2).replace(/\d(?=(\d{3})+\.)/g, '$&,')}` + +export default formatMoney diff --git a/web/src/shared/utilities/orderBy.ts b/web/src/shared/utilities/orderBy.ts new file mode 100644 index 000000000..c9d0489ad --- /dev/null +++ b/web/src/shared/utilities/orderBy.ts @@ -0,0 +1,19 @@ +const orderBy = ( + arr: T[], + props: (keyof T)[], + orders?: ("asc" | "desc")[] +) => + [...arr].sort((a, b) => + props.reduce((acc, prop, i) => { + if (acc === 0) { + const [p1, p2] = + orders && orders[i] === "desc" + ? [b[prop], a[prop]] + : [a[prop], b[prop]]; + acc = p1 > p2 ? 1 : p1 < p2 ? -1 : 0; + } + return acc; + }, 0) + ); + +export default orderBy; From 6f619d0cd036cb58fc80709b0947f2bf7cb43988 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 15 Jan 2024 16:08:10 +1100 Subject: [PATCH 02/34] script to create samples into tob-wgs metamist --- scripts/add_tob_samples_to_metamist.py | 88 ++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 scripts/add_tob_samples_to_metamist.py diff --git a/scripts/add_tob_samples_to_metamist.py b/scripts/add_tob_samples_to_metamist.py new file mode 100644 index 000000000..ce8c2b13d --- /dev/null +++ b/scripts/add_tob_samples_to_metamist.py @@ -0,0 +1,88 @@ +from metamist.apis import SampleApi +from metamist.models import SampleUpsert, SequencingGroupUpsert, AssayUpsert +import csv + +import click + +sapi = SampleApi() + + +def add_sample_to_metamist(extID: str, project: str): + """ + Adds samples to metamist + + Args: + sample_ids (list[str]): List of sample IDs to add to metamist + project (str): The name of the project to add samples to + """ + sample_upsert = SampleUpsert( + id=None, + external_id=extID, + meta={}, + project=project, + type='blood', + participant_id=None, + active=None, + sequencing_groups=[ + SequencingGroupUpsert( + id=None, + type='genome', + technology='short-read', + platform='illumina', + meta=None, + sample_id=None, + external_ids=None, + assays=None, + ), + ], + non_sequencing_assays=[ + AssayUpsert( + id=None, + type='sequencing', + external_ids=None, + sample_id=None, + meta=None, + ), + ], + ) + + return sample_upsert + + +@click.command() +@click.option( + '--project', + required=True, + help='The name of the project to add samples to.', +) +@click.option( + '--files_path', + required=True, + help='Path to the CSV file with sample data.', +) +def main(project: str, files_path: str): + """ + Adds samples to metamist + + Args: + project (str): The name of the project to add samples to + files_path (str): Path to the CSV file with sample data + """ + # Get list of sample IDs from bucket directory + upsert_list = [] + with open(files_path, 'r') as f: + reader = csv.reader(f) + next(reader) # skip the header + for row in reader: + sgid, extID, gvcf, gvcf_idx = row[0], row[1], row[2], row[3] + upsert_list.append(add_sample_to_metamist(extID, project)) + + for upsert in upsert_list: + api_response = sapi.create_sample( + project, upsert + ) # returns the internal sample ID + print(api_response) + + +if __name__ == '__main__': + main() From 26459353b82df53d61cb6febfb6370eb4bd57699 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 15 Jan 2024 16:36:08 +1100 Subject: [PATCH 03/34] changed how to read path --- scripts/add_tob_samples_to_metamist.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/add_tob_samples_to_metamist.py b/scripts/add_tob_samples_to_metamist.py index ce8c2b13d..a844b0617 100644 --- a/scripts/add_tob_samples_to_metamist.py +++ b/scripts/add_tob_samples_to_metamist.py @@ -1,5 +1,6 @@ from metamist.apis import SampleApi from metamist.models import SampleUpsert, SequencingGroupUpsert, AssayUpsert +from cpg_utils import to_path import csv import click @@ -70,7 +71,7 @@ def main(project: str, files_path: str): """ # Get list of sample IDs from bucket directory upsert_list = [] - with open(files_path, 'r') as f: + with to_path(files_path).open() as f: reader = csv.reader(f) next(reader) # skip the header for row in reader: From b97ecd2bc0c8dbc608a93d6a3d0fd5128886ab2f Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 15 Jan 2024 16:39:21 +1100 Subject: [PATCH 04/34] changed project id to be an integer --- scripts/add_tob_samples_to_metamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_tob_samples_to_metamist.py b/scripts/add_tob_samples_to_metamist.py index a844b0617..9e281ff70 100644 --- a/scripts/add_tob_samples_to_metamist.py +++ b/scripts/add_tob_samples_to_metamist.py @@ -20,7 +20,7 @@ def add_sample_to_metamist(extID: str, project: str): id=None, external_id=extID, meta={}, - project=project, + project=8, type='blood', participant_id=None, active=None, From 7f963b7b06792724b935a513ae6ced2b82fa1913 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 07:27:33 +1100 Subject: [PATCH 05/34] making assays not None --- scripts/add_tob_samples_to_metamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_tob_samples_to_metamist.py b/scripts/add_tob_samples_to_metamist.py index 9e281ff70..ecbdd7389 100644 --- a/scripts/add_tob_samples_to_metamist.py +++ b/scripts/add_tob_samples_to_metamist.py @@ -33,7 +33,7 @@ def add_sample_to_metamist(extID: str, project: str): meta=None, sample_id=None, external_ids=None, - assays=None, + assays={}, ), ], non_sequencing_assays=[ From db607f245cd960e8e337474d0b272af45c39c787 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 07:32:32 +1100 Subject: [PATCH 06/34] assays has to be a list --- scripts/add_tob_samples_to_metamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_tob_samples_to_metamist.py b/scripts/add_tob_samples_to_metamist.py index ecbdd7389..4e75c441a 100644 --- a/scripts/add_tob_samples_to_metamist.py +++ b/scripts/add_tob_samples_to_metamist.py @@ -33,7 +33,7 @@ def add_sample_to_metamist(extID: str, project: str): meta=None, sample_id=None, external_ids=None, - assays={}, + assays=[], ), ], non_sequencing_assays=[ From c896d307dc58762c7b0d9eeb78f31708355a9f13 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 07:37:55 +1100 Subject: [PATCH 07/34] adding assay meta fields --- scripts/add_tob_samples_to_metamist.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/add_tob_samples_to_metamist.py b/scripts/add_tob_samples_to_metamist.py index 4e75c441a..78467e17f 100644 --- a/scripts/add_tob_samples_to_metamist.py +++ b/scripts/add_tob_samples_to_metamist.py @@ -42,7 +42,11 @@ def add_sample_to_metamist(extID: str, project: str): type='sequencing', external_ids=None, sample_id=None, - meta=None, + meta={ + 'sequencing_type': 'genome', + 'sequencing_platform': 'illumina', + 'sequencing_technology': 'short-read', + }, ), ], ) From beb62f1eedc78f81e1712a11ed3bde8b555fc92b Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 09:24:01 +1100 Subject: [PATCH 08/34] added script to add gvcf analysis entries metamist --- scripts/add_test_analyses_to_tob_wgs_test.py | 107 +++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 scripts/add_test_analyses_to_tob_wgs_test.py diff --git a/scripts/add_test_analyses_to_tob_wgs_test.py b/scripts/add_test_analyses_to_tob_wgs_test.py new file mode 100644 index 000000000..1a752cd2a --- /dev/null +++ b/scripts/add_test_analyses_to_tob_wgs_test.py @@ -0,0 +1,107 @@ +from metamist.apis import AnalysisApi +from metamist.models import AnalysisStatus +from metamist.models import Analysis +import csv +from datetime import datetime +import click + +from metamist.graphql import gql, query +from cpg_utils import to_path + +aapi = AnalysisApi() + +SAMPLE_QUERY = gql( + """ + query ($project: String!, $sample_ids: [String!]) { + sample(project: {eq: $project}, id: {in_: $sample_ids}) { + externalId + sequencingGroups { + id + assays { + id + meta + } + } + } + } + """ +) + + +def add_analysis_to_metamist(gvcf_link: str, sgid: str, project_id: int): + analysis_upsert = Analysis( + type='gvcf', + status=AnalysisStatus("COMPLETED"), + id=None, + output=gvcf_link, + sequencing_group_ids=[sgid], + author=None, + timestamp_completed=datetime.now().strftime("%Y-%m-%dT%H:%M:%S"), + project=project_id, + active=True, + meta={}, + ) + + return analysis_upsert + + +@click.command() +@click.option( + '--project', + required=True, + help='The name of the project to add samples to.', +) +@click.option( + '--project_id', + required=True, + type=int, + help='The ID of the project to add samples to.', +) +@click.option( + '--new_samples_path', + required=True, + help='Path to the CSV file with sample data.', +) +@click.option( + '--sgid_gvcf_mapping_file', + required=True, + help='Path to the CSV file with sample-gvcf data.', +) +def main( + project: str, + project_id: int, + newly_added_samples_path: str, + sgid_gvcf_mapping_file: str, +): + # Read in the newly created samples + with to_path(newly_added_samples_path).open() as f: + samples = f.read().splitlines() + + # Read the CSV file into a dictionary + extID_to_row = {} + with to_path(sgid_gvcf_mapping_file).open() as f: + reader = csv.reader(f) + next(reader) # skip the header + for row in reader: + sgid, extID, gvcf, gvcf_idx = row[0], row[1], row[2], row[3] + extID_to_row[extID] = (sgid, gvcf, gvcf_idx) + + # Get the newly created samples + query_response = query(SAMPLE_QUERY, {"project": project, "sample_ids": samples}) + + # Look up each sample in the dictionary + analysis_upserts = [] + for sample in query_response['sample']: + ext_id = sample['externalId'] + if ext_id in extID_to_row: + sgid, gvcf, gvcf_idx = extID_to_row[ext_id] + analysis_upserts.append(add_analysis_to_metamist(gvcf, sgid, project_id)) + + # Add the analyses to metamist + for upsert in analysis_upserts: + api_response = aapi.create_analysis(project, upsert) + print(api_response) + + +if __name__ == '__main__': + main() From 57e8e836c928c6d0e1b8a81561571510c071a09e Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 09:28:26 +1100 Subject: [PATCH 09/34] fixed variable naming --- scripts/add_test_analyses_to_tob_wgs_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/add_test_analyses_to_tob_wgs_test.py b/scripts/add_test_analyses_to_tob_wgs_test.py index 1a752cd2a..b3ab4baf8 100644 --- a/scripts/add_test_analyses_to_tob_wgs_test.py +++ b/scripts/add_test_analyses_to_tob_wgs_test.py @@ -70,11 +70,11 @@ def add_analysis_to_metamist(gvcf_link: str, sgid: str, project_id: int): def main( project: str, project_id: int, - newly_added_samples_path: str, + new_samples_path: str, sgid_gvcf_mapping_file: str, ): # Read in the newly created samples - with to_path(newly_added_samples_path).open() as f: + with to_path(new_samples_path).open() as f: samples = f.read().splitlines() # Read the CSV file into a dictionary From ce4f7b9627fa55f3c9f2c7d68918eec028022949 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 09:29:46 +1100 Subject: [PATCH 10/34] changed analysis status from COMPLETED to completed --- scripts/add_test_analyses_to_tob_wgs_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_test_analyses_to_tob_wgs_test.py b/scripts/add_test_analyses_to_tob_wgs_test.py index b3ab4baf8..1b46c4094 100644 --- a/scripts/add_test_analyses_to_tob_wgs_test.py +++ b/scripts/add_test_analyses_to_tob_wgs_test.py @@ -31,7 +31,7 @@ def add_analysis_to_metamist(gvcf_link: str, sgid: str, project_id: int): analysis_upsert = Analysis( type='gvcf', - status=AnalysisStatus("COMPLETED"), + status=AnalysisStatus('completed'), id=None, output=gvcf_link, sequencing_group_ids=[sgid], From fef13dc24645b545c9e45e97e1766629d535165b Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 09:49:54 +1100 Subject: [PATCH 11/34] removed unnecessary fields from analysis upsert --- scripts/add_test_analyses_to_tob_wgs_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/add_test_analyses_to_tob_wgs_test.py b/scripts/add_test_analyses_to_tob_wgs_test.py index 1b46c4094..16d75a55d 100644 --- a/scripts/add_test_analyses_to_tob_wgs_test.py +++ b/scripts/add_test_analyses_to_tob_wgs_test.py @@ -32,10 +32,8 @@ def add_analysis_to_metamist(gvcf_link: str, sgid: str, project_id: int): analysis_upsert = Analysis( type='gvcf', status=AnalysisStatus('completed'), - id=None, output=gvcf_link, sequencing_group_ids=[sgid], - author=None, timestamp_completed=datetime.now().strftime("%Y-%m-%dT%H:%M:%S"), project=project_id, active=True, From c1e1b7e16f43c428c85c6a67ff95674991fa1773 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 10:36:53 +1100 Subject: [PATCH 12/34] now adding analyses to correct sequencingGroups --- scripts/add_test_analyses_to_tob_wgs_test.py | 105 ++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/scripts/add_test_analyses_to_tob_wgs_test.py b/scripts/add_test_analyses_to_tob_wgs_test.py index 16d75a55d..c78584054 100644 --- a/scripts/add_test_analyses_to_tob_wgs_test.py +++ b/scripts/add_test_analyses_to_tob_wgs_test.py @@ -91,9 +91,12 @@ def main( analysis_upserts = [] for sample in query_response['sample']: ext_id = sample['externalId'] + new_sgid = sample['sequencingGroups'][0]['id'] if ext_id in extID_to_row: - sgid, gvcf, gvcf_idx = extID_to_row[ext_id] - analysis_upserts.append(add_analysis_to_metamist(gvcf, sgid, project_id)) + _, gvcf, gvcf_idx = extID_to_row[ext_id] + analysis_upserts.append( + add_analysis_to_metamist(gvcf, new_sgid, project_id) + ) # Add the analyses to metamist for upsert in analysis_upserts: @@ -103,3 +106,101 @@ def main( if __name__ == '__main__': main() + + +# Analyses added +# 193733 +# 193734 +# 193735 +# 193736 +# 193737 +# 193738 +# 193739 +# 193740 +# 193741 +# 193742 +# 193743 +# 193744 +# 193745 +# 193746 +# 193747 +# 193748 +# 193749 +# 193750 +# 193751 +# 193752 +# 193753 +# 193754 +# 193755 +# 193756 +# 193757 +# 193758 +# 193759 +# 193760 +# 193761 +# 193762 +# 193763 +# 193764 +# 193765 +# 193766 +# 193767 +# 193768 +# 193769 +# 193770 +# 193771 +# 193772 +# 193773 +# 193774 +# 193775 +# 193776 +# 193777 +# 193778 +# 193779 +# 193780 +# 193781 +# 193782 +# 193783 +# 193784 +# 193785 +# 193786 +# 193787 +# 193788 +# 193789 +# 193790 +# 193791 +# 193792 +# 193793 +# 193794 +# 193795 +# 193796 +# 193797 +# 193798 +# 193799 +# 193800 +# 193801 +# 193802 +# 193803 +# 193804 +# 193805 +# 193806 +# 193807 +# 193808 +# 193809 +# 193810 +# 193811 +# 193812 +# 193813 +# 193814 +# 193815 +# 193816 +# 193817 +# 193818 +# 193819 +# 193820 +# 193821 +# 193822 +# 193823 +# 193824 +# 193825 +# 193826 +# 193827 From b7ba499b9f435470ff5dd542678ddda3874255e9 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 14:29:08 +1100 Subject: [PATCH 13/34] generating participants --- scripts/make_tobwgs_test_participants.py | 122 +++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 scripts/make_tobwgs_test_participants.py diff --git a/scripts/make_tobwgs_test_participants.py b/scripts/make_tobwgs_test_participants.py new file mode 100644 index 000000000..5755448f6 --- /dev/null +++ b/scripts/make_tobwgs_test_participants.py @@ -0,0 +1,122 @@ +from metamist.apis import ParticipantApi +from metamist.models import ( + ParticipantUpsert, + SampleUpsert, + SequencingGroupUpsert, + AssayUpsert, +) +from metamist.models import Analysis +import csv +from datetime import datetime +import click + +from metamist.graphql import gql, query +from cpg_utils import to_path + + +new_samples_path = 'gs://cpg-tob-wgs-test/harper-hope/newly_created_samples.txt' + +SAMPLE_QUERY = gql( + """ + query ($project: String!, $sample_ids: [String!]) { + sample(project: {eq: $project}, id: {in_: $sample_ids}) { + id + externalId + sequencingGroups { + id + analyses { + id + meta + output + status + timestampCompleted + type + } + } + } + } + """ +) + +papi_instance = ParticipantApi() + + +def create_participant(project_id: int, extID: str, sampleID: str, sg_id: str): + participant_upsert = [ + ParticipantUpsert( + id=None, + external_id=extID, + samples=[ + SampleUpsert( + id=sampleID, + external_id=extID, + project=project_id, + sequencing_groups=[ + SequencingGroupUpsert( + id=sg_id, + sample_id=sampleID, + ), + ], + non_sequencing_assays=[ + AssayUpsert( + id=None, + type=None, + external_ids=None, + sample_id=None, + meta=None, + ), + ], + ), + ], + ), + ] + + return participant_upsert + + +@click.command() +@click.option( + '--project', + required=True, + help='The name of the project to add samples to.', +) +@click.option( + '--project_id', + required=True, + type=int, + help='The ID of the project to add samples to.', +) +@click.option( + '--new_samples_path', + required=True, + help='Path to the CSV file with sample data.', +) +def main(project: id, project_id: int, new_samples_path: str): + # Read in newly created samples + with to_path(new_samples_path).open() as f: + new_samples = f.read().splitlines() + + # Query for samples + query_response = query( + SAMPLE_QUERY, {"project": project, "sample_ids": new_samples} + ) + + # Create participant upserts + participant_upserts = [] + for sample in query_response['sample']: + extID = sample['externalId'] + sampleID = sample['id'] + for sg in sample['sequencingGroups']: + sg_id = sg['id'] + participant_upserts.append( + create_participant(project_id, extID, sampleID, sg_id) + ) + + # Upsert Participants + for upsert in participant_upserts: + api_response = papi_instance.upsert_participants(project, upsert) + print(api_response) + + +if __name__ == '__main__': + main() From 3c131d8a9b2d8cb7f44e6d28563887676a7d94bc Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 14:31:45 +1100 Subject: [PATCH 14/34] importing and global variable changes --- scripts/make_tobwgs_test_participants.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/make_tobwgs_test_participants.py b/scripts/make_tobwgs_test_participants.py index 5755448f6..8df7b7004 100644 --- a/scripts/make_tobwgs_test_participants.py +++ b/scripts/make_tobwgs_test_participants.py @@ -5,17 +5,12 @@ SequencingGroupUpsert, AssayUpsert, ) -from metamist.models import Analysis -import csv -from datetime import datetime import click from metamist.graphql import gql, query from cpg_utils import to_path -new_samples_path = 'gs://cpg-tob-wgs-test/harper-hope/newly_created_samples.txt' - SAMPLE_QUERY = gql( """ query ($project: String!, $sample_ids: [String!]) { From 7ee6d51212e5f4ee95f8d990ab2e1d5b7a9a64b8 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 14:39:49 +1100 Subject: [PATCH 15/34] adding necessary assay upsert fields --- scripts/make_tobwgs_test_participants.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/make_tobwgs_test_participants.py b/scripts/make_tobwgs_test_participants.py index 8df7b7004..2dd22a21d 100644 --- a/scripts/make_tobwgs_test_participants.py +++ b/scripts/make_tobwgs_test_participants.py @@ -55,10 +55,8 @@ def create_participant(project_id: int, extID: str, sampleID: str, sg_id: str): non_sequencing_assays=[ AssayUpsert( id=None, - type=None, - external_ids=None, - sample_id=None, - meta=None, + type='sequencing', + sample_id=sampleID, ), ], ), From fb2c6902a86799bf124e163fabec3d0fc3a26fe6 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 14:51:35 +1100 Subject: [PATCH 16/34] adding more required fields to assay upsert --- scripts/make_tobwgs_test_participants.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/scripts/make_tobwgs_test_participants.py b/scripts/make_tobwgs_test_participants.py index 2dd22a21d..ec549595e 100644 --- a/scripts/make_tobwgs_test_participants.py +++ b/scripts/make_tobwgs_test_participants.py @@ -19,13 +19,10 @@ externalId sequencingGroups { id - analyses { + assays { id + externalIds meta - output - status - timestampCompleted - type } } } @@ -57,6 +54,11 @@ def create_participant(project_id: int, extID: str, sampleID: str, sg_id: str): id=None, type='sequencing', sample_id=sampleID, + meta={ + 'sequencing_type': 'genome', + 'sequencing_platform': 'illumina', + 'sequencing_technology': 'short-read', + }, ), ], ), From 225f6fd66a1bfad2826a6102f39595dc2fdbf91d Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Tue, 16 Jan 2024 18:12:09 +1100 Subject: [PATCH 17/34] editing assays so that their "type" is unique --- scripts/edit_tobwgs_test_assays.py | 83 ++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 scripts/edit_tobwgs_test_assays.py diff --git a/scripts/edit_tobwgs_test_assays.py b/scripts/edit_tobwgs_test_assays.py new file mode 100644 index 000000000..5f1830407 --- /dev/null +++ b/scripts/edit_tobwgs_test_assays.py @@ -0,0 +1,83 @@ +from metamist.apis import AssayApi +from metamist.models import AssayUpsert +import click + +from metamist.graphql import gql, query +from cpg_utils import to_path + + +SAMPLE_QUERY = gql( + """ + query ($project: String!, $sample_ids: [String!]) { + sample(project: {eq: $project}, id: {in_: $sample_ids}) { + id + externalId + assays { + id + externalIds + meta + type + } + } + } + """ +) + + +def update_assays(assayID: str, sampleID: str, number: int): + assay_upsert = AssayUpsert( + id=assayID, + type=f'non_sequencing{number}', + sample_id=sampleID, + ) + # assay_upsert = AssayUpsert( + # id=None, + # type='sequencing', + # sample_id=sampleID, + # meta={ + # 'sequencing_type': 'genome', + # 'sequencing_technology': 'short-read', + # 'sequencing_platform': 'illumina', + # }, + # ) + + return assay_upsert + + +aapi = AssayApi() + + +@click.command() +@click.option( + '--project', + required=True, + help='The name of the project to add samples to.', +) +@click.option( + '--new_samples_path', + required=True, + help='The path to the file containing the newly created samples.', +) +def main(project: str, new_samples_path: str): + with to_path(new_samples_path).open() as f: + new_samples = f.read().splitlines() + + query_response = query( + SAMPLE_QUERY, {"project": project, "sample_ids": new_samples} + ) + + assay_upserts = [] + for sample in query_response['sample']: + sampleID = sample['id'] + extID = sample['externalId'] + for i, assay in enumerate(sample['assays']): + assayID = assay['id'] + assay_upserts.append(update_assays(assayID, sampleID, i)) + + for upsert in assay_upserts: + api_response = aapi.update_assay(upsert) + print(api_response) + + +if __name__ == '__main__': + main() From 8196100416576fcf498484a8bc2d948233532ca8 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 12:00:17 +1100 Subject: [PATCH 18/34] adding new participants to tob-wgs-test metamist --- ...dd_nagim_gvcfs_to_tob_wgs_test_metamist.py | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py new file mode 100644 index 000000000..cf0588c9d --- /dev/null +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -0,0 +1,147 @@ +from metamist.graphql import gql, query +from metamist.apis import ParticipantApi, AnalysisApi +from metamist.models import ( + ParticipantUpsert, + SampleUpsert, + SequencingGroupUpsert, + AssayUpsert, + AnalysisStatus, + Analysis, +) +from cpg_utils import to_path +import csv + +import click + +PARTICIPANT_QUERY = gql( + """ + query ($project: String!) { + project(name: $project) { + id + participants { + externalId + id + samples { + id + externalId + sequencingGroups { + externalIds + id + meta + platform + technology + type + assays { + meta + id + } + } + } + } + } +} +""" +) + +new_samples_path = ( + 'gs://cpg-tob-wgs-test/harper-hope/sample_nagim_test_bucket_mapping.csv' +) + + +@click.command() +@click.option( + '--project', + required=True, + help='The name of the project to add samples to.', +) +@click.option( + '--project_id', + required=True, + type=int, + help='The ID of the project to add samples to.', +) +@click.option( + '--new_samples_path', + required=True, + help='The path to the file containing the newly created samples.', +) +def main(project: str, projectID: int, new_samples_path: str): + # Read the CSV file into a dictionary + extID_to_row = {} + with to_path(new_samples_path).open() as f: + reader = csv.reader(f) + next(reader) # skip the header + for row in reader: + sgid, extID, gvcf, gvcf_idx = row[0], row[1], row[2], row[3] + extID_to_row[extID] = (sgid, gvcf, gvcf_idx) + + query_response = query(PARTICIPANT_QUERY, {"project": project}) + p_upserts = [] + for participant in query_response['project']['participants']: + if participant['externalId'] not in extID_to_row: + continue + extID = f"{participant['externalId']}-test" + p = ParticipantUpsert( + external_id=extID, + active=None, + samples=[], + ) + for sample in participant['samples']: + s = SampleUpsert( + external_id=extID, + project=projectID, + sequencing_groups=[], + ) + for sg in sample['sequencingGroups']: + s.sequencing_groups.append( + sg=SequencingGroupUpsert( + type=sg['type'], + technology=sg['technology'], + platform=sg['platform'], + meta=None, + sample_id=None, + external_ids=None, + assays=[ + AssayUpsert( + type=sg['assays'][0]['type'], + meta={ + 'sequencing_type': sg['assays'][0]['meta'][ + 'sequencing_type' + ], + 'sequencing_platform': sg['assays'][0]['meta'][ + 'sequencing_platform' + ], + 'sequencing_technology': sg['assays'][0]['meta'][ + 'sequencing_technology' + ], + }, + ), + ], + ) + ) + p.samples.append(s) + p_upserts.append(p) + + upserted_participants = ParticipantApi().upsert_participants(project, p_upserts) + + for participant in upserted_participants: + for sample in participant['samples']: + old_extID = sample['externalId'][:-5] # remove '-test' from the end + gvcf_path = extID_to_row[old_extID][1] # get gvcf path from dictionary + AnalysisApi().create_analysis( + project, + Analysis( + type='gvcf', + status=AnalysisStatus('completed'), + output=gvcf_path, + sequencing_group_id=sample['sequencingGroups '][0]['id'], + project=projectID, + active=True, + external_id=sample['externalId'], + sample=sample['id'], + ), + ) + + +if __name__ == '__main__': + main() From 2bf2f654c860eb01f2f9e1d778c16cc7b85103f6 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 12:01:37 +1100 Subject: [PATCH 19/34] removed hard coding of mapping file --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index cf0588c9d..3d8484851 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -43,10 +43,6 @@ """ ) -new_samples_path = ( - 'gs://cpg-tob-wgs-test/harper-hope/sample_nagim_test_bucket_mapping.csv' -) - @click.command() @click.option( From b46e8f85eaa0a2cdc3439cb728b1e8968e2c882c Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 13:04:13 +1100 Subject: [PATCH 20/34] remove unwanted files --- scripts/add_test_analyses_to_tob_wgs_test.py | 206 ------------------- scripts/add_tob_samples_to_metamist.py | 93 --------- scripts/edit_tobwgs_test_assays.py | 83 -------- scripts/make_tobwgs_test_participants.py | 117 ----------- 4 files changed, 499 deletions(-) delete mode 100644 scripts/add_test_analyses_to_tob_wgs_test.py delete mode 100644 scripts/add_tob_samples_to_metamist.py delete mode 100644 scripts/edit_tobwgs_test_assays.py delete mode 100644 scripts/make_tobwgs_test_participants.py diff --git a/scripts/add_test_analyses_to_tob_wgs_test.py b/scripts/add_test_analyses_to_tob_wgs_test.py deleted file mode 100644 index c78584054..000000000 --- a/scripts/add_test_analyses_to_tob_wgs_test.py +++ /dev/null @@ -1,206 +0,0 @@ -from metamist.apis import AnalysisApi -from metamist.models import AnalysisStatus -from metamist.models import Analysis -import csv -from datetime import datetime -import click - -from metamist.graphql import gql, query -from cpg_utils import to_path - -aapi = AnalysisApi() - -SAMPLE_QUERY = gql( - """ - query ($project: String!, $sample_ids: [String!]) { - sample(project: {eq: $project}, id: {in_: $sample_ids}) { - externalId - sequencingGroups { - id - assays { - id - meta - } - } - } - } - """ -) - - -def add_analysis_to_metamist(gvcf_link: str, sgid: str, project_id: int): - analysis_upsert = Analysis( - type='gvcf', - status=AnalysisStatus('completed'), - output=gvcf_link, - sequencing_group_ids=[sgid], - timestamp_completed=datetime.now().strftime("%Y-%m-%dT%H:%M:%S"), - project=project_id, - active=True, - meta={}, - ) - - return analysis_upsert - - -@click.command() -@click.option( - '--project', - required=True, - help='The name of the project to add samples to.', -) -@click.option( - '--project_id', - required=True, - type=int, - help='The ID of the project to add samples to.', -) -@click.option( - '--new_samples_path', - required=True, - help='Path to the CSV file with sample data.', -) -@click.option( - '--sgid_gvcf_mapping_file', - required=True, - help='Path to the CSV file with sample-gvcf data.', -) -def main( - project: str, - project_id: int, - new_samples_path: str, - sgid_gvcf_mapping_file: str, -): - # Read in the newly created samples - with to_path(new_samples_path).open() as f: - samples = f.read().splitlines() - - # Read the CSV file into a dictionary - extID_to_row = {} - with to_path(sgid_gvcf_mapping_file).open() as f: - reader = csv.reader(f) - next(reader) # skip the header - for row in reader: - sgid, extID, gvcf, gvcf_idx = row[0], row[1], row[2], row[3] - extID_to_row[extID] = (sgid, gvcf, gvcf_idx) - - # Get the newly created samples - query_response = query(SAMPLE_QUERY, {"project": project, "sample_ids": samples}) - - # Look up each sample in the dictionary - analysis_upserts = [] - for sample in query_response['sample']: - ext_id = sample['externalId'] - new_sgid = sample['sequencingGroups'][0]['id'] - if ext_id in extID_to_row: - _, gvcf, gvcf_idx = extID_to_row[ext_id] - analysis_upserts.append( - add_analysis_to_metamist(gvcf, new_sgid, project_id) - ) - - # Add the analyses to metamist - for upsert in analysis_upserts: - api_response = aapi.create_analysis(project, upsert) - print(api_response) - - -if __name__ == '__main__': - main() - - -# Analyses added -# 193733 -# 193734 -# 193735 -# 193736 -# 193737 -# 193738 -# 193739 -# 193740 -# 193741 -# 193742 -# 193743 -# 193744 -# 193745 -# 193746 -# 193747 -# 193748 -# 193749 -# 193750 -# 193751 -# 193752 -# 193753 -# 193754 -# 193755 -# 193756 -# 193757 -# 193758 -# 193759 -# 193760 -# 193761 -# 193762 -# 193763 -# 193764 -# 193765 -# 193766 -# 193767 -# 193768 -# 193769 -# 193770 -# 193771 -# 193772 -# 193773 -# 193774 -# 193775 -# 193776 -# 193777 -# 193778 -# 193779 -# 193780 -# 193781 -# 193782 -# 193783 -# 193784 -# 193785 -# 193786 -# 193787 -# 193788 -# 193789 -# 193790 -# 193791 -# 193792 -# 193793 -# 193794 -# 193795 -# 193796 -# 193797 -# 193798 -# 193799 -# 193800 -# 193801 -# 193802 -# 193803 -# 193804 -# 193805 -# 193806 -# 193807 -# 193808 -# 193809 -# 193810 -# 193811 -# 193812 -# 193813 -# 193814 -# 193815 -# 193816 -# 193817 -# 193818 -# 193819 -# 193820 -# 193821 -# 193822 -# 193823 -# 193824 -# 193825 -# 193826 -# 193827 diff --git a/scripts/add_tob_samples_to_metamist.py b/scripts/add_tob_samples_to_metamist.py deleted file mode 100644 index 78467e17f..000000000 --- a/scripts/add_tob_samples_to_metamist.py +++ /dev/null @@ -1,93 +0,0 @@ -from metamist.apis import SampleApi -from metamist.models import SampleUpsert, SequencingGroupUpsert, AssayUpsert -from cpg_utils import to_path -import csv - -import click - -sapi = SampleApi() - - -def add_sample_to_metamist(extID: str, project: str): - """ - Adds samples to metamist - - Args: - sample_ids (list[str]): List of sample IDs to add to metamist - project (str): The name of the project to add samples to - """ - sample_upsert = SampleUpsert( - id=None, - external_id=extID, - meta={}, - project=8, - type='blood', - participant_id=None, - active=None, - sequencing_groups=[ - SequencingGroupUpsert( - id=None, - type='genome', - technology='short-read', - platform='illumina', - meta=None, - sample_id=None, - external_ids=None, - assays=[], - ), - ], - non_sequencing_assays=[ - AssayUpsert( - id=None, - type='sequencing', - external_ids=None, - sample_id=None, - meta={ - 'sequencing_type': 'genome', - 'sequencing_platform': 'illumina', - 'sequencing_technology': 'short-read', - }, - ), - ], - ) - - return sample_upsert - - -@click.command() -@click.option( - '--project', - required=True, - help='The name of the project to add samples to.', -) -@click.option( - '--files_path', - required=True, - help='Path to the CSV file with sample data.', -) -def main(project: str, files_path: str): - """ - Adds samples to metamist - - Args: - project (str): The name of the project to add samples to - files_path (str): Path to the CSV file with sample data - """ - # Get list of sample IDs from bucket directory - upsert_list = [] - with to_path(files_path).open() as f: - reader = csv.reader(f) - next(reader) # skip the header - for row in reader: - sgid, extID, gvcf, gvcf_idx = row[0], row[1], row[2], row[3] - upsert_list.append(add_sample_to_metamist(extID, project)) - - for upsert in upsert_list: - api_response = sapi.create_sample( - project, upsert - ) # returns the internal sample ID - print(api_response) - - -if __name__ == '__main__': - main() diff --git a/scripts/edit_tobwgs_test_assays.py b/scripts/edit_tobwgs_test_assays.py deleted file mode 100644 index 5f1830407..000000000 --- a/scripts/edit_tobwgs_test_assays.py +++ /dev/null @@ -1,83 +0,0 @@ -from metamist.apis import AssayApi -from metamist.models import AssayUpsert -import click - -from metamist.graphql import gql, query -from cpg_utils import to_path - - -SAMPLE_QUERY = gql( - """ - query ($project: String!, $sample_ids: [String!]) { - sample(project: {eq: $project}, id: {in_: $sample_ids}) { - id - externalId - assays { - id - externalIds - meta - type - } - } - } - """ -) - - -def update_assays(assayID: str, sampleID: str, number: int): - assay_upsert = AssayUpsert( - id=assayID, - type=f'non_sequencing{number}', - sample_id=sampleID, - ) - # assay_upsert = AssayUpsert( - # id=None, - # type='sequencing', - # sample_id=sampleID, - # meta={ - # 'sequencing_type': 'genome', - # 'sequencing_technology': 'short-read', - # 'sequencing_platform': 'illumina', - # }, - # ) - - return assay_upsert - - -aapi = AssayApi() - - -@click.command() -@click.option( - '--project', - required=True, - help='The name of the project to add samples to.', -) -@click.option( - '--new_samples_path', - required=True, - help='The path to the file containing the newly created samples.', -) -def main(project: str, new_samples_path: str): - with to_path(new_samples_path).open() as f: - new_samples = f.read().splitlines() - - query_response = query( - SAMPLE_QUERY, {"project": project, "sample_ids": new_samples} - ) - - assay_upserts = [] - for sample in query_response['sample']: - sampleID = sample['id'] - extID = sample['externalId'] - for i, assay in enumerate(sample['assays']): - assayID = assay['id'] - assay_upserts.append(update_assays(assayID, sampleID, i)) - - for upsert in assay_upserts: - api_response = aapi.update_assay(upsert) - print(api_response) - - -if __name__ == '__main__': - main() diff --git a/scripts/make_tobwgs_test_participants.py b/scripts/make_tobwgs_test_participants.py deleted file mode 100644 index ec549595e..000000000 --- a/scripts/make_tobwgs_test_participants.py +++ /dev/null @@ -1,117 +0,0 @@ -from metamist.apis import ParticipantApi -from metamist.models import ( - ParticipantUpsert, - SampleUpsert, - SequencingGroupUpsert, - AssayUpsert, -) -import click - -from metamist.graphql import gql, query -from cpg_utils import to_path - - -SAMPLE_QUERY = gql( - """ - query ($project: String!, $sample_ids: [String!]) { - sample(project: {eq: $project}, id: {in_: $sample_ids}) { - id - externalId - sequencingGroups { - id - assays { - id - externalIds - meta - } - } - } - } - """ -) - -papi_instance = ParticipantApi() - - -def create_participant(project_id: int, extID: str, sampleID: str, sg_id: str): - participant_upsert = [ - ParticipantUpsert( - id=None, - external_id=extID, - samples=[ - SampleUpsert( - id=sampleID, - external_id=extID, - project=project_id, - sequencing_groups=[ - SequencingGroupUpsert( - id=sg_id, - sample_id=sampleID, - ), - ], - non_sequencing_assays=[ - AssayUpsert( - id=None, - type='sequencing', - sample_id=sampleID, - meta={ - 'sequencing_type': 'genome', - 'sequencing_platform': 'illumina', - 'sequencing_technology': 'short-read', - }, - ), - ], - ), - ], - ), - ] - - return participant_upsert - - -@click.command() -@click.option( - '--project', - required=True, - help='The name of the project to add samples to.', -) -@click.option( - '--project_id', - required=True, - type=int, - help='The ID of the project to add samples to.', -) -@click.option( - '--new_samples_path', - required=True, - help='Path to the CSV file with sample data.', -) -def main(project: id, project_id: int, new_samples_path: str): - # Read in newly created samples - with to_path(new_samples_path).open() as f: - new_samples = f.read().splitlines() - - # Query for samples - query_response = query( - SAMPLE_QUERY, {"project": project, "sample_ids": new_samples} - ) - - # Create participant upserts - participant_upserts = [] - for sample in query_response['sample']: - extID = sample['externalId'] - sampleID = sample['id'] - for sg in sample['sequencingGroups']: - sg_id = sg['id'] - participant_upserts.append( - create_participant(project_id, extID, sampleID, sg_id) - ) - - # Upsert Participants - for upsert in participant_upserts: - api_response = papi_instance.upsert_participants(project, upsert) - print(api_response) - - -if __name__ == '__main__': - main() From 76eb16f0e84500bbe82d44142d7c28ed090f6411 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 14:33:13 +1100 Subject: [PATCH 21/34] making suggested changes --- ...dd_nagim_gvcfs_to_tob_wgs_test_metamist.py | 78 +++++++++++-------- 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index 3d8484851..e4ecee762 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -10,8 +10,18 @@ ) from cpg_utils import to_path import csv - +from collections import namedtuple import click +from dataclasses import dataclass + + +@dataclass +class RowData: + sgid: str + ext_id: str + gvcf: str + gvcf_idx: str + PARTICIPANT_QUERY = gql( """ @@ -25,22 +35,22 @@ id externalId sequencingGroups { - externalIds - id - meta - platform - technology - type - assays { - meta + externalIds id + meta + platform + technology + type + assays { + meta + id + } } } } } } -} -""" + """ ) @@ -51,41 +61,44 @@ help='The name of the project to add samples to.', ) @click.option( - '--project_id', - required=True, - type=int, - help='The ID of the project to add samples to.', -) -@click.option( - '--new_samples_path', + '--sample-path-mappings', required=True, - help='The path to the file containing the newly created samples.', + help='''The path to a CSV file containing mappings of `main` CPG ID's, + the `external_id` and `gvcf` paths. + The file should have at least four columns: sgid, ext_id, gvcf, and gvcf_idx. + Here's an example of what the first couple of lines might look like: + + sgid,ext_id,gvcf,gvcf_idx + sg1,ext1,gvcf1,gvcf_idx1 + sg2,ext2,gvcf2,gvcf_idx2 + ''', ) -def main(project: str, projectID: int, new_samples_path: str): +def main(project: str, project_id: int, sample_path_mappings: str): # Read the CSV file into a dictionary - extID_to_row = {} - with to_path(new_samples_path).open() as f: + ext_id_to_row = {} + with to_path(sample_path_mappings).open() as f: reader = csv.reader(f) next(reader) # skip the header for row in reader: - sgid, extID, gvcf, gvcf_idx = row[0], row[1], row[2], row[3] - extID_to_row[extID] = (sgid, gvcf, gvcf_idx) + data = RowData(*row[:4]) + ext_id_to_row[data.ext_id] = data query_response = query(PARTICIPANT_QUERY, {"project": project}) + project_id = query_response['project']['id'] p_upserts = [] for participant in query_response['project']['participants']: - if participant['externalId'] not in extID_to_row: + if participant['externalId'] not in ext_id_to_row: continue - extID = f"{participant['externalId']}-test" + ext_id = f"{participant['externalId']}-test" p = ParticipantUpsert( - external_id=extID, + external_id=ext_id, active=None, samples=[], ) for sample in participant['samples']: s = SampleUpsert( - external_id=extID, - project=projectID, + external_id=ext_id, + project=project_id, sequencing_groups=[], ) for sg in sample['sequencingGroups']: @@ -122,8 +135,8 @@ def main(project: str, projectID: int, new_samples_path: str): for participant in upserted_participants: for sample in participant['samples']: - old_extID = sample['externalId'][:-5] # remove '-test' from the end - gvcf_path = extID_to_row[old_extID][1] # get gvcf path from dictionary + old_ext_id = sample['externalId'].removesuffix('-test') + gvcf_path = ext_id_to_row[old_ext_id][1] # get gvcf path from dictionary AnalysisApi().create_analysis( project, Analysis( @@ -131,10 +144,7 @@ def main(project: str, projectID: int, new_samples_path: str): status=AnalysisStatus('completed'), output=gvcf_path, sequencing_group_id=sample['sequencingGroups '][0]['id'], - project=projectID, active=True, - external_id=sample['externalId'], - sample=sample['id'], ), ) From cf8b60472a4caa1ee22faf530c34c1dd66c08204 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 14:37:52 +1100 Subject: [PATCH 22/34] adding suffix to append to external ids as a parameter --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index e4ecee762..bf42e79db 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -73,7 +73,15 @@ class RowData: sg2,ext2,gvcf2,gvcf_idx2 ''', ) -def main(project: str, project_id: int, sample_path_mappings: str): +@click.option( + '--suffix', + required=True, + help='''The suffix to add to the external ID's of the participants. + For example, if the suffix is `test`, then the external ID's of the participants + will be `ext_id1-test`, `ext_id2-test`, etc. + ''', +) +def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): # Read the CSV file into a dictionary ext_id_to_row = {} with to_path(sample_path_mappings).open() as f: @@ -89,7 +97,7 @@ def main(project: str, project_id: int, sample_path_mappings: str): for participant in query_response['project']['participants']: if participant['externalId'] not in ext_id_to_row: continue - ext_id = f"{participant['externalId']}-test" + ext_id = f"{participant['externalId']}-{suffix}" p = ParticipantUpsert( external_id=ext_id, active=None, From 52d7c3c6c81c9016a2e14db2fcb7509dd7c3b61a Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 14:43:12 +1100 Subject: [PATCH 23/34] removing external id suffix properly --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index bf42e79db..1330a94d8 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -143,7 +143,7 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): for participant in upserted_participants: for sample in participant['samples']: - old_ext_id = sample['externalId'].removesuffix('-test') + old_ext_id = sample['externalId'].removesuffix(f'-{suffix}') gvcf_path = ext_id_to_row[old_ext_id][1] # get gvcf path from dictionary AnalysisApi().create_analysis( project, From 103793143d001d92360ed113b7e4d0403e72b522 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 14:47:01 +1100 Subject: [PATCH 24/34] using dataclass to retriev gvcf path from dictionary --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index 1330a94d8..e97a88ba0 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -144,7 +144,8 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): for participant in upserted_participants: for sample in participant['samples']: old_ext_id = sample['externalId'].removesuffix(f'-{suffix}') - gvcf_path = ext_id_to_row[old_ext_id][1] # get gvcf path from dictionary + row_data = ext_id_to_row[old_ext_id] + gvcf_path = row_data.gvcf # get gvcf path from dictionary AnalysisApi().create_analysis( project, Analysis( From 3295307a13e288cb117353dc57088628dd8a38e6 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 15:16:29 +1100 Subject: [PATCH 25/34] not using sg= inside a list as this is not allowed --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index e97a88ba0..babf925ce 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -111,7 +111,7 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): ) for sg in sample['sequencingGroups']: s.sequencing_groups.append( - sg=SequencingGroupUpsert( + SequencingGroupUpsert( type=sg['type'], technology=sg['technology'], platform=sg['platform'], From ebe2689bb95bea2555b09ea1f7044dc51b151da7 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 15:17:09 +1100 Subject: [PATCH 26/34] removing project_id as parameter input --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index babf925ce..761c502df 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -81,7 +81,7 @@ class RowData: will be `ext_id1-test`, `ext_id2-test`, etc. ''', ) -def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): +def main(project: str, sample_path_mappings: str, suffix: str): # Read the CSV file into a dictionary ext_id_to_row = {} with to_path(sample_path_mappings).open() as f: From 5239ebd0a2ba8f9164db5890588918bd1f0aae92 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 15:24:00 +1100 Subject: [PATCH 27/34] adding print to sg for debugging --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index 761c502df..0e167697c 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -10,9 +10,9 @@ ) from cpg_utils import to_path import csv -from collections import namedtuple import click from dataclasses import dataclass +from pprint import pprint @dataclass @@ -110,6 +110,7 @@ def main(project: str, sample_path_mappings: str, suffix: str): sequencing_groups=[], ) for sg in sample['sequencingGroups']: + pprint(sg) s.sequencing_groups.append( SequencingGroupUpsert( type=sg['type'], From 6c07839e3a99b788f3af7cad3b6582d9246b7d33 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 15:33:42 +1100 Subject: [PATCH 28/34] was iterating over "test" project to fetch data instead of "main" project --- ...add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index 0e167697c..a7ce5ef60 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -73,6 +73,16 @@ class RowData: sg2,ext2,gvcf2,gvcf_idx2 ''', ) +@click.option( + '--project-id-to-add-to', + required=True, + type=int, + help='''The ID of the project to add samples to. + For example: iterate over `main` project (identified by the --project flag) to get data for each sample, then create a new participant + with the same data, but with a new external ID that has the suffix (--suffix) specified by the user. + Then upsert these into the `test` project. + ''', +) @click.option( '--suffix', required=True, @@ -81,7 +91,12 @@ class RowData: will be `ext_id1-test`, `ext_id2-test`, etc. ''', ) -def main(project: str, sample_path_mappings: str, suffix: str): +def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): + ''' + Iterate over `main` project to get data for each sample, then create a new participant + with the same data, but with a new external ID that has the suffix specified by the user. + Then upsert these into the `test` project. + ''' # Read the CSV file into a dictionary ext_id_to_row = {} with to_path(sample_path_mappings).open() as f: @@ -92,7 +107,6 @@ def main(project: str, sample_path_mappings: str, suffix: str): ext_id_to_row[data.ext_id] = data query_response = query(PARTICIPANT_QUERY, {"project": project}) - project_id = query_response['project']['id'] p_upserts = [] for participant in query_response['project']['participants']: if participant['externalId'] not in ext_id_to_row: From 315bc0ab86737c5bc9173bbaabd5cb4415bed038 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Wed, 17 Jan 2024 15:39:04 +1100 Subject: [PATCH 29/34] correct assay["type"] assignment --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index a7ce5ef60..d1124a184 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -135,7 +135,7 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): external_ids=None, assays=[ AssayUpsert( - type=sg['assays'][0]['type'], + type=sg['type'], meta={ 'sequencing_type': sg['assays'][0]['meta'][ 'sequencing_type' From 0299f7ecf8cde034cb614ef835b0322517412495 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Thu, 18 Jan 2024 07:34:01 +1100 Subject: [PATCH 30/34] changed naming of project-id-to-add-to parameter. Shortened it to project-id --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index d1124a184..fbe493144 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -74,7 +74,7 @@ class RowData: ''', ) @click.option( - '--project-id-to-add-to', + '--project-id', required=True, type=int, help='''The ID of the project to add samples to. From 94c7cc51d094a400ab9d93660fe62db00e5dc990 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Thu, 18 Jan 2024 08:28:55 +1100 Subject: [PATCH 31/34] linting issues! --- ...dd_nagim_gvcfs_to_tob_wgs_test_metamist.py | 55 +++++++++++++------ 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index fbe493144..9cc8cc77b 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -1,5 +1,12 @@ -from metamist.graphql import gql, query +import csv +from dataclasses import dataclass +from pprint import pprint + +import click +from cpg_utils import to_path + from metamist.apis import ParticipantApi, AnalysisApi +from metamist.graphql import gql, query from metamist.models import ( ParticipantUpsert, SampleUpsert, @@ -8,15 +15,25 @@ AnalysisStatus, Analysis, ) -from cpg_utils import to_path -import csv -import click -from dataclasses import dataclass -from pprint import pprint @dataclass class RowData: + """ + A class used to represent a row of data. + + Attributes + ---------- + sgid : str + an identifier for the sequencing group + ext_id : str + an external identifier for the row + gvcf : str + the path to the gvcf file + gvcf_idx : str + the path to the gvcf index file + """ + sgid: str ext_id: str gvcf: str @@ -63,40 +80,40 @@ class RowData: @click.option( '--sample-path-mappings', required=True, - help='''The path to a CSV file containing mappings of `main` CPG ID's, - the `external_id` and `gvcf` paths. - The file should have at least four columns: sgid, ext_id, gvcf, and gvcf_idx. + help="""The path to a CSV file containing mappings of `main` CPG ID's, + the `external_id` and `gvcf` paths. + The file should have at least four columns: sgid, ext_id, gvcf, and gvcf_idx. Here's an example of what the first couple of lines might look like: sgid,ext_id,gvcf,gvcf_idx sg1,ext1,gvcf1,gvcf_idx1 sg2,ext2,gvcf2,gvcf_idx2 - ''', + """, ) @click.option( '--project-id', required=True, type=int, - help='''The ID of the project to add samples to. + help="""The ID of the project to add samples to. For example: iterate over `main` project (identified by the --project flag) to get data for each sample, then create a new participant with the same data, but with a new external ID that has the suffix (--suffix) specified by the user. Then upsert these into the `test` project. - ''', + """, ) @click.option( '--suffix', required=True, - help='''The suffix to add to the external ID's of the participants. + help="""The suffix to add to the external ID's of the participants. For example, if the suffix is `test`, then the external ID's of the participants will be `ext_id1-test`, `ext_id2-test`, etc. - ''', + """, ) def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): - ''' + """ Iterate over `main` project to get data for each sample, then create a new participant with the same data, but with a new external ID that has the suffix specified by the user. Then upsert these into the `test` project. - ''' + """ # Read the CSV file into a dictionary ext_id_to_row = {} with to_path(sample_path_mappings).open() as f: @@ -106,8 +123,9 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): data = RowData(*row[:4]) ext_id_to_row[data.ext_id] = data - query_response = query(PARTICIPANT_QUERY, {"project": project}) + query_response = query(PARTICIPANT_QUERY, {'project': project}) p_upserts = [] + # pylint: disable=unsubscriptable-object for participant in query_response['project']['participants']: if participant['externalId'] not in ext_id_to_row: continue @@ -153,6 +171,7 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): ) p.samples.append(s) p_upserts.append(p) + # pylint: enable=unsubscriptable-object upserted_participants = ParticipantApi().upsert_participants(project, p_upserts) @@ -174,4 +193,6 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): if __name__ == '__main__': + # pylint: disable=no-value-for-parameter main() + # pylint: enable=no-value-for-parameter From 64310cfa2c1f5f9e90189be505e2ef45054d54b0 Mon Sep 17 00:00:00 2001 From: michael-harper <109899932+michael-harper@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:50:27 +1100 Subject: [PATCH 32/34] Update scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py Co-authored-by: Michael Franklin <22381693+illusional@users.noreply.github.com> --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index 9cc8cc77b..4c3ec8bde 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -193,6 +193,4 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): if __name__ == '__main__': - # pylint: disable=no-value-for-parameter - main() - # pylint: enable=no-value-for-parameter + main() # pylint: disable=no-value-for-parameter From 4df08e1c1b38b803df72fb498f39272ad1bafd3a Mon Sep 17 00:00:00 2001 From: michael-harper <109899932+michael-harper@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:51:33 +1100 Subject: [PATCH 33/34] Update scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py Co-authored-by: Michael Franklin <22381693+illusional@users.noreply.github.com> --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index 4c3ec8bde..1dfc64479 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -186,7 +186,7 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): type='gvcf', status=AnalysisStatus('completed'), output=gvcf_path, - sequencing_group_id=sample['sequencingGroups '][0]['id'], + sequencing_group_id=sample['sequencingGroups'][0]['id'], active=True, ), ) From 2f66f18767a3b3daaca10051c6999a4b587f7e2e Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Thu, 18 Jan 2024 16:03:09 +1100 Subject: [PATCH 34/34] fixing referencing of sequencing group attributes when upserting assay --- scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py index 1dfc64479..9e789f8d8 100644 --- a/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py +++ b/scripts/add_nagim_gvcfs_to_tob_wgs_test_metamist.py @@ -155,13 +155,9 @@ def main(project: str, project_id: int, sample_path_mappings: str, suffix: str): AssayUpsert( type=sg['type'], meta={ - 'sequencing_type': sg['assays'][0]['meta'][ - 'sequencing_type' - ], - 'sequencing_platform': sg['assays'][0]['meta'][ - 'sequencing_platform' - ], - 'sequencing_technology': sg['assays'][0]['meta'][ + 'sequencing_type': sg['sequencing_type'], + 'sequencing_platform': sg['sequencing_platform'], + 'sequencing_technology': sg[ 'sequencing_technology' ], },