From fcf07f21ad30bf6b9c97f4888ea5671f6ae10b94 Mon Sep 17 00:00:00 2001 From: EddieLF <34049565+EddieLF@users.noreply.github.com> Date: Wed, 17 Jan 2024 10:52:29 +1100 Subject: [PATCH 1/7] Update md5 creating script for requester pays buckets (#651) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update create_md5s.py script to use billing project in gsutil commands * Bump version: 6.6.2 → 6.6.3 * Fix missing line * Use GCP billing project not Hail billing project * Linting * Revert bumpversion, ignore mypy errors in api/server.py --- api/server.py | 4 ++-- scripts/create_md5s.py | 21 +++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/api/server.py b/api/server.py index 574b341f5..652aa4322 100644 --- a/api/server.py +++ b/api/server.py @@ -140,11 +140,11 @@ async def exception_handler(request: Request, e: Exception): cors_middleware = middlewares[0] request_origin = request.headers.get('origin', '') - if cors_middleware and '*' in cors_middleware.options['allow_origins']: + if cors_middleware and '*' in cors_middleware.options['allow_origins']: # type: ignore response.headers['Access-Control-Allow-Origin'] = '*' elif ( cors_middleware - and request_origin in cors_middleware.options['allow_origins'] + and request_origin in cors_middleware.options['allow_origins'] # type: ignore ): response.headers['Access-Control-Allow-Origin'] = request_origin diff --git a/scripts/create_md5s.py b/scripts/create_md5s.py index 488ec8297..f99f64906 100644 --- a/scripts/create_md5s.py +++ b/scripts/create_md5s.py @@ -12,24 +12,25 @@ def create_md5s_for_files_in_directory(skip_filetypes: tuple[str, str], force_re if not gs_dir.startswith('gs://'): raise ValueError(f'Expected GS directory, got: {gs_dir}') - billing_project = get_config()['hail']['billing_project'] + billing_project = get_config()['workflow']['gcp_billing_project'] driver_image = get_config()['workflow']['driver_image'] bucket_name, *components = gs_dir[5:].split('/') client = storage.Client() - blobs = client.list_blobs(bucket_name, prefix='/'.join(components)) + bucket = client.bucket(bucket_name, user_project=billing_project) + blobs = bucket.list_blobs(prefix='/'.join(components)) files: set[str] = {f'gs://{bucket_name}/{blob.name}' for blob in blobs} - for obj in files: - if obj.endswith('.md5') or obj.endswith(skip_filetypes): + for filepath in files: + if filepath.endswith('.md5') or filepath.endswith(skip_filetypes): continue - if f'{obj}.md5' in files and not force_recreate: - print(f'{obj}.md5 already exists, skipping') + if f'{filepath}.md5' in files and not force_recreate: + print(f'{filepath}.md5 already exists, skipping') continue - print('Creating md5 for', obj) - job = b.new_job(f'Create {os.path.basename(obj)}.md5') - create_md5(job, obj, billing_project, driver_image) + print('Creating md5 for', filepath) + job = b.new_job(f'Create {os.path.basename(filepath)}.md5') + create_md5(job, filepath, billing_project, driver_image) b.run(wait=False) @@ -46,7 +47,7 @@ def create_md5(job, file, billing_project, driver_image): f"""\ set -euxo pipefail gcloud -q auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS - gsutil cat {file} | md5sum | cut -d " " -f1 > /tmp/uploaded.md5 + gsutil -u {billing_project} cat {file} | md5sum | cut -d " " -f1 > /tmp/uploaded.md5 gsutil -u {billing_project} cp /tmp/uploaded.md5 {md5} """ ) From 11002afb878d6dd07611205c03fddb9eff768e6a Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Thu, 25 Jan 2024 12:04:28 +1100 Subject: [PATCH 2/7] Billing release 002 (#637) * Billing api extra labels (#619) * Added compute_category, cromwell_sub_workflow_name, cromwell_workflow_id, goog_pipelines_worker and wdl_task_name to extended view and created relevant filters and API points. * Added labels to all BQ queries, refactoring billing layer. * Added examples to billing-total-cost API regarding the new filters. * Billing - fixing styling issues after the first Billing release (#624) * Temporarily disable seqr and hail from /topics API. * Autoselect 1st topic / 1st project value from the DDL. * Merging Billing.css into index.css * Small fix - reusing extRecords in FieldSelector component. * Refactoring duplicated code in FieldSelector. * Added Stages to the Group by DDL. * Billing API IsBillingEnabled (#626) * Added API point to check if billing is enabled. * Added simple Total Cost By Batch Page. (#627) * Added simple Total Cost By Batch Page. * Billing cost by category (#629) * Added simple Total Cost By Batch Page. * Fixed autoselect day format. * Fixing day format for autoselect (missing leading 0) * Added first draft of billing page to show detail SKU per selected cost category over selected time periods (day, week, month or invoice month) * Small fix for BillingCostByBatch page, disable search if searchBy is empty or < 6 chars. * New: Billing API GET namespaces, added namespace to allowed fields for total cost. * Implemented HorizontalStackedBarChart, updated Billing By Invoice Month page to enable toggle between chart and table view. * Stacked Bars Chart with option to accumulate data. (#634) * Implemented Stacked bars with option to accumulate data. * Added budget bar to billing horizontal bar chart, added background color for the billing table to reflect the chart colours. * Added simple prediction of billing stacked bar chart. * Billing hail batch layout (#633) * Added simple Total Cost By Batch Page. * Removing debug prints. * Fixed autoselect day format. * Fixing day format for autoselect (missing leading 0) * Added first draft of billing page to show detail SKU per selected cost category over selected time periods (day, week, month or invoice month) * Small fix for BillingCostByBatch page, disable search if searchBy is empty or < 6 chars. * New: Billing API GET namespaces, added namespace to allowed fields for total cost. * Implemented HorizontalStackedBarChart, updated Billing By Invoice Month page to enable toggle between chart and table view. * ADD: Cost by Analysis page * ADD: add start of Analysis grid * ADD: add start of Analysis grid * FIX: table fixes for the HailBatchGrid * API: api changes to enable query of the raw table * API: fixed and working with updated get_total_cost endpoint * API: fix typing of get_total_cost (default return is now a list[dict] and can be converted in the layer/route to a specific output type * API: add endpoint to get costs by batch_id * API: done * IN PROGRESS: modifying Cost By Analysis to use new endpoints * IN PROGRESS: changes to Cost By Analysis, linking with backend API. * IN PROGRESS: changes to Cost By Analysis, grid grouping by ar/batch/job. * NEW: finalising Cost By Analysis page * ADD: durations to Cost By Analysis page --------- Co-authored-by: Milo Hyben * FIX: Billing - fixing time_column condition. * Removing draft billing page. * Remove unused API point & cleanup, changes as per code review. * Small Frontend refactoring, reflecting PR review. * Updating billing style for dark mode. * Optimised Frontend, replacing reduce with forEach where possible. * Refactoring Billing DB structures. * Cleaning up unused dependencies. * FIX: replaced button 'color=red' with 'negative' property. * FIX: replace HEX color for pattern with CSS var. * FIX: replace async call with sync for a simple function. * FIX: dark mode for Horizontal Stacked Bar. * FIX: billing cost by analysis page, esp. search control resizing and functionality. * FIX: duplicated keys in the grid on Billing Cost By Analysis page. * FIX: refactoring BQ tables, small fixes for billing pages. * FIX: BillingCostPageAnalysis, keeping the old record until loading of data finishes. * FIX: Billing StackedChart various issues. * FIX: missing filters checks, updating charts when loading. * FIX: silenece linting no attribute msg for Middleware. * Refactoring filters, implemented first Billing GraphQL integration. * Removing temporary Billing GraphQL, will be properly implemented in the next PRs. * Applied changes as suggested by review. * Update db/python/tables/bq/generic_bq_filter_model.py Co-authored-by: Michael Franklin <22381693+illusional@users.noreply.github.com> --------- Co-authored-by: Sabrina Yan <9669990+violetbrina@users.noreply.github.com> Co-authored-by: Michael Franklin <22381693+illusional@users.noreply.github.com> --- api/graphql/schema.py | 1 - api/routes/billing.py | 273 ++++- api/settings.py | 5 +- db/python/layers/__init__.py | 1 + db/python/layers/billing.py | 1039 +++-------------- db/python/tables/billing.py | 18 - db/python/tables/bq/billing_ar_batch.py | 69 ++ db/python/tables/bq/billing_base.py | 695 +++++++++++ db/python/tables/bq/billing_daily.py | 131 +++ db/python/tables/bq/billing_daily_extended.py | 51 + db/python/tables/bq/billing_filter.py | 48 + db/python/tables/bq/billing_gcp_daily.py | 135 +++ db/python/tables/bq/billing_raw.py | 36 + db/python/tables/bq/function_bq_filter.py | 109 ++ db/python/tables/bq/generic_bq_filter.py | 101 ++ .../tables/bq/generic_bq_filter_model.py | 111 ++ models/enums/billing.py | 31 + models/models/__init__.py | 6 +- models/models/billing.py | 332 ++++-- web/src/Routes.tsx | 19 +- web/src/index.css | 92 ++ web/src/pages/admin/ProjectsAdmin.tsx | 2 +- web/src/pages/billing/Billing.css | 15 - .../pages/billing/BillingCostByAnalysis.tsx | 253 ++++ .../pages/billing/BillingCostByCategory.tsx | 293 +++++ web/src/pages/billing/BillingCostByTime.tsx | 298 +++-- .../pages/billing/BillingInvoiceMonthCost.tsx | 447 ++++--- web/src/pages/billing/BillingSeqrProp.tsx | 20 +- .../components/BillingCostByTimeTable.tsx | 12 +- .../billing/components/CostByTimeBarChart.tsx | 37 + .../billing/components/CostByTimeChart.tsx | 3 - .../billing/components/FieldSelector.tsx | 79 +- .../billing/components/HailBatchGrid.tsx | 499 ++++++++ web/src/pages/billing/index.ts | 2 + web/src/shared/components/Graphs/BarChart.tsx | 23 +- .../shared/components/Graphs/DonutChart.tsx | 24 +- .../Graphs/HorizontalStackedBarChart.tsx | 350 ++++++ .../Graphs/StackedAreaByDateChart.tsx | 6 +- .../components/Graphs/StackedBarChart.tsx | 392 +++++++ web/src/shared/components/Header/NavBar.tsx | 14 +- web/src/shared/utilities/generateUrl.ts | 15 + web/src/shared/utilities/monthStartEndDate.ts | 15 + 42 files changed, 4661 insertions(+), 1441 deletions(-) delete mode 100644 db/python/tables/billing.py create mode 100644 db/python/tables/bq/billing_ar_batch.py create mode 100644 db/python/tables/bq/billing_base.py create mode 100644 db/python/tables/bq/billing_daily.py create mode 100644 db/python/tables/bq/billing_daily_extended.py create mode 100644 db/python/tables/bq/billing_filter.py create mode 100644 db/python/tables/bq/billing_gcp_daily.py create mode 100644 db/python/tables/bq/billing_raw.py create mode 100644 db/python/tables/bq/function_bq_filter.py create mode 100644 db/python/tables/bq/generic_bq_filter.py create mode 100644 db/python/tables/bq/generic_bq_filter_model.py create mode 100644 models/enums/billing.py delete mode 100644 web/src/pages/billing/Billing.css create mode 100644 web/src/pages/billing/BillingCostByAnalysis.tsx create mode 100644 web/src/pages/billing/BillingCostByCategory.tsx create mode 100644 web/src/pages/billing/components/CostByTimeBarChart.tsx create mode 100644 web/src/pages/billing/components/HailBatchGrid.tsx create mode 100644 web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx create mode 100644 web/src/shared/components/Graphs/StackedBarChart.tsx create mode 100644 web/src/shared/utilities/generateUrl.ts create mode 100644 web/src/shared/utilities/monthStartEndDate.ts diff --git a/api/graphql/schema.py b/api/graphql/schema.py index d0ac1619f..436a1c334 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -497,7 +497,6 @@ class GraphQLSequencingGroup: @staticmethod def from_internal(internal: SequencingGroupInternal) -> 'GraphQLSequencingGroup': - # print(internal) return GraphQLSequencingGroup( id=sequencing_group_id_format(internal.id), type=internal.type, diff --git a/api/routes/billing.py b/api/routes/billing.py index bdc0d8b52..be1fb46ff 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -1,28 +1,48 @@ """ Billing routes """ -from fastapi import APIRouter from async_lru import alru_cache +from fastapi import APIRouter -from api.settings import BILLING_CACHE_RESPONSE_TTL -from api.utils.db import ( - BqConnection, - get_author, -) +from api.settings import BILLING_CACHE_RESPONSE_TTL, BQ_AGGREG_VIEW +from api.utils.db import BqConnection, get_author from db.python.layers.billing import BillingLayer -from models.models.billing import ( +from models.models import ( BillingColumn, BillingCostBudgetRecord, - BillingQueryModel, - BillingRowRecord, - BillingTotalCostRecord, + BillingHailBatchCostRecord, + BillingSource, BillingTotalCostQueryModel, + BillingTotalCostRecord, ) - router = APIRouter(prefix='/billing', tags=['billing']) +def _get_billing_layer_from(author: str) -> BillingLayer: + """ + Initialise billing + """ + if not is_billing_enabled(): + raise ValueError('Billing is not enabled') + + connection = BqConnection(author) + billing_layer = BillingLayer(connection) + return billing_layer + + +@router.get( + '/is-billing-enabled', + response_model=bool, + operation_id='isBillingEnabled', +) +def is_billing_enabled() -> bool: + """ + Return true if billing ie enabled, false otherwise + """ + return BQ_AGGREG_VIEW is not None + + @router.get( '/gcp-projects', response_model=list[str], @@ -33,8 +53,7 @@ async def get_gcp_projects( author: str = get_author, ) -> list[str]: """Get list of all GCP projects in database""" - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_gcp_projects() return records @@ -49,8 +68,7 @@ async def get_topics( author: str = get_author, ) -> list[str]: """Get list of all topics in database""" - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_topics() return records @@ -65,8 +83,7 @@ async def get_cost_categories( author: str = get_author, ) -> list[str]: """Get list of all service description / cost categories in database""" - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_cost_categories() return records @@ -87,8 +104,7 @@ async def get_skus( There is over 400 Skus so limit is required Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_skus(limit, offset) return records @@ -106,8 +122,7 @@ async def get_datasets( Get list of all datasets in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_datasets() return records @@ -125,8 +140,7 @@ async def get_sequencing_types( Get list of all sequencing_types in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_sequencing_types() return records @@ -144,8 +158,7 @@ async def get_stages( Get list of all stages in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_stages() return records @@ -163,12 +176,65 @@ async def get_sequencing_groups( Get list of all sequencing_groups in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_sequencing_groups() return records +@router.get( + '/compute-categories', + response_model=list[str], + operation_id='getComputeCategories', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_compute_categories( + author: str = get_author, +) -> list[str]: + """ + Get list of all compute categories in database + Results are sorted ASC + """ + billing_layer = _get_billing_layer_from(author) + records = await billing_layer.get_compute_categories() + return records + + +@router.get( + '/cromwell-sub-workflow-names', + response_model=list[str], + operation_id='getCromwellSubWorkflowNames', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_cromwell_sub_workflow_names( + author: str = get_author, +) -> list[str]: + """ + Get list of all cromwell_sub_workflow_names in database + Results are sorted ASC + """ + billing_layer = _get_billing_layer_from(author) + records = await billing_layer.get_cromwell_sub_workflow_names() + return records + + +@router.get( + '/wdl-task-names', + response_model=list[str], + operation_id='getWdlTaskNames', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_wdl_task_names( + author: str = get_author, +) -> list[str]: + """ + Get list of all wdl_task_names in database + Results are sorted ASC + """ + billing_layer = _get_billing_layer_from(author) + records = await billing_layer.get_wdl_task_names() + return records + + @router.get( '/invoice-months', response_model=list[str], @@ -182,36 +248,58 @@ async def get_invoice_months( Get list of all invoice months in database Results are sorted DESC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_invoice_months() return records -@router.post( - '/query', response_model=list[BillingRowRecord], operation_id='queryBilling' +@router.get( + '/namespaces', + response_model=list[str], + operation_id='getNamespaces', ) -@alru_cache(maxsize=10, ttl=BILLING_CACHE_RESPONSE_TTL) -async def query_billing( - query: BillingQueryModel, - limit: int = 10, +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_namespaces( author: str = get_author, -) -> list[BillingRowRecord]: +) -> list[str]: """ - Get Billing records by some criteria, date is required to minimize BQ cost + Get list of all namespaces in database + Results are sorted DESC + """ + billing_layer = _get_billing_layer_from(author) + records = await billing_layer.get_namespaces() + return records - E.g. - { - "topic": ["hail"], - "date": "2023-03-02", - "cost_category": ["Hail compute Credit"] - } +@router.get( + '/cost-by-ar-guid/{ar_guid}', + response_model=BillingHailBatchCostRecord, + operation_id='costByArGuid', +) +@alru_cache(maxsize=10, ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_cost_by_ar_guid( + ar_guid: str, + author: str = get_author, +) -> BillingHailBatchCostRecord: + """Get Hail Batch costs by AR GUID""" + billing_layer = _get_billing_layer_from(author) + records = await billing_layer.get_cost_by_ar_guid(ar_guid) + return records - """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) - records = await billing_layer.query(query.to_filter(), limit) + +@router.get( + '/cost-by-batch-id/{batch_id}', + response_model=BillingHailBatchCostRecord, + operation_id='costByBatchId', +) +@alru_cache(maxsize=10, ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_cost_by_batch_id( + batch_id: str, + author: str = get_author, +) -> BillingHailBatchCostRecord: + """Get Hail Batch costs by Batch ID""" + billing_layer = _get_billing_layer_from(author) + records = await billing_layer.get_cost_by_batch_id(batch_id) return records @@ -341,12 +429,87 @@ async def get_total_cost( "order_by": {"cost": true} } - """ + 12. Get total cost by compute_category order by cost DESC: - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + { + "fields": ["compute_category"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "order_by": {"cost": true} + } + + 13. Get total cost by cromwell_sub_workflow_name, order by cost DESC: + + { + "fields": ["cromwell_sub_workflow_name"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "order_by": {"cost": true} + } + + 14. Get total cost by sku for given cromwell_workflow_id, order by cost DESC: + + { + "fields": ["sku"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "filters": {"cromwell_workflow_id": "cromwell-00448f7b-8ef3-4d22-80ab-e302acdb2d28"}, + "order_by": {"cost": true} + } + + 15. Get total cost by sku for given goog_pipelines_worker, order by cost DESC: + + { + "fields": ["goog_pipelines_worker"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "order_by": {"cost": true} + } + + 16. Get total cost by sku for given wdl_task_name, order by cost DESC: + + { + "fields": ["wdl_task_name"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "order_by": {"cost": true} + } + + 17. Get total cost by sku for provided ID, which can be any of + [ar_guid, batch_id, sequencing_group or cromwell_workflow_id], + order by cost DESC: + + { + "fields": ["sku", "ar_guid", "batch_id", "sequencing_group", "cromwell_workflow_id"], + "start_date": "2023-11-01", + "end_date": "2023-11-30", + "filters": { + "ar_guid": "855a6153-033c-4398-8000-46ed74c02fe8", + "batch_id": "429518", + "sequencing_group": "cpg246751", + "cromwell_workflow_id": "cromwell-e252f430-4143-47ec-a9c0-5f7face1b296" + }, + "filters_op": "OR", + "order_by": {"cost": true} + } + + 18. Get weekly total cost by sku for selected cost_category, order by day ASC: + + { + "fields": ["sku"], + "start_date": "2022-11-01", + "end_date": "2023-12-07", + "filters": { + "cost_category": "Cloud Storage" + }, + "order_by": {"day": false}, + "time_periods": "week" + } + + """ + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_total_cost(query) - return records + return [BillingTotalCostRecord.from_json(record) for record in records] @router.get( @@ -358,20 +521,18 @@ async def get_total_cost( async def get_running_costs( field: BillingColumn, invoice_month: str | None = None, - source: str | None = None, + source: BillingSource | None = None, author: str = get_author, ) -> list[BillingCostBudgetRecord]: """ Get running cost for specified fields in database - e.g. fields = ['gcp_project', 'topic'] + e.g. fields = ['gcp_project', 'topic', 'wdl_task_names', 'cromwell_sub_workflow_name', 'compute_category'] """ - # TODO replace alru_cache with async-cache? # so we can skip author for caching? # pip install async-cache # @AsyncTTL(time_to_live=BILLING_CACHE_RESPONSE_TTL, maxsize=1024, skip_args=2) - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_running_cost(field, invoice_month, source) return records diff --git a/api/settings.py b/api/settings.py index ab92c717c..98d11062f 100644 --- a/api/settings.py +++ b/api/settings.py @@ -42,10 +42,11 @@ BQ_AGGREG_EXT_VIEW = os.getenv('SM_GCP_BQ_AGGREG_EXT_VIEW') BQ_BUDGET_VIEW = os.getenv('SM_GCP_BQ_BUDGET_VIEW') BQ_GCP_BILLING_VIEW = os.getenv('SM_GCP_BQ_BILLING_VIEW') +BQ_BATCHES_VIEW = os.getenv('SM_GCP_BQ_BATCHES_VIEW') # This is to optimise BQ queries, DEV table has data only for Mar 2023 -BQ_DAYS_BACK_OPTIMAL = 30 # Look back 30 days for optimal query -BILLING_CACHE_RESPONSE_TTL = 3600 # 1 Hour +BQ_DAYS_BACK_OPTIMAL = 30 # Look back 30 days for optimal query +BILLING_CACHE_RESPONSE_TTL = 3600 # 1 Hour def get_default_user() -> str | None: diff --git a/db/python/layers/__init__.py b/db/python/layers/__init__.py index dafa085df..fafacc017 100644 --- a/db/python/layers/__init__.py +++ b/db/python/layers/__init__.py @@ -2,6 +2,7 @@ from db.python.layers.assay import AssayLayer from db.python.layers.audit_log import AuditLogLayer from db.python.layers.base import BaseLayer +from db.python.layers.billing import BillingLayer from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer from db.python.layers.sample import SampleLayer diff --git a/db/python/layers/billing.py b/db/python/layers/billing.py index 93ce3cfc7..8f991c728 100644 --- a/db/python/layers/billing.py +++ b/db/python/layers/billing.py @@ -1,49 +1,73 @@ -import re - -from typing import Any -from datetime import datetime -from collections import Counter, defaultdict -from google.cloud import bigquery - +from db.python.layers.bq_base import BqBaseLayer +from db.python.tables.bq.billing_ar_batch import BillingArBatchTable +from db.python.tables.bq.billing_daily import BillingDailyTable +from db.python.tables.bq.billing_daily_extended import BillingDailyExtendedTable +from db.python.tables.bq.billing_gcp_daily import BillingGcpDailyTable +from db.python.tables.bq.billing_raw import BillingRawTable from models.models import ( - BillingRowRecord, - BillingTotalCostRecord, - BillingTotalCostQueryModel, BillingColumn, BillingCostBudgetRecord, + BillingHailBatchCostRecord, + BillingSource, + BillingTimeColumn, + BillingTimePeriods, + BillingTotalCostQueryModel, ) -from db.python.gcp_connect import BqDbBase -from db.python.layers.bq_base import BqBaseLayer -from db.python.tables.billing import BillingFilter - -from api.settings import ( - BQ_DAYS_BACK_OPTIMAL, - BQ_AGGREG_VIEW, - BQ_AGGREG_RAW, - BQ_AGGREG_EXT_VIEW, - BQ_BUDGET_VIEW, - BQ_GCP_BILLING_VIEW, -) -from api.utils.dates import get_invoice_month_range, reformat_datetime - - -def abbrev_cost_category(cost_category: str) -> str: - """abbreviate cost category""" - return 'S' if cost_category == 'Cloud Storage' else 'C' - class BillingLayer(BqBaseLayer): """Billing layer""" + def table_factory( + self, + source: BillingSource, + fields: list[BillingColumn] | None = None, + filters: dict[BillingColumn, str | list | dict] | None = None, + ) -> ( + BillingDailyTable + | BillingDailyExtendedTable + | BillingGcpDailyTable + | BillingRawTable + ): + """Get billing table object based on source and fields""" + if source == BillingSource.GCP_BILLING: + return BillingGcpDailyTable(self.connection) + if source == BillingSource.RAW: + return BillingRawTable(self.connection) + + # check if any of the fields is in the extended columns + if fields: + used_extended_cols = [ + f + for f in fields + if f in BillingColumn.extended_cols() and BillingColumn.can_group_by(f) + ] + if used_extended_cols: + # there is a field from extended daily table + return BillingDailyExtendedTable(self.connection) + + # check if any of the filters is in the extended columns + if filters: + used_extended_cols = [ + f + for f in filters + if f in BillingColumn.extended_cols() and BillingColumn.can_group_by(f) + ] + if used_extended_cols: + # there is a field from extended daily table + return BillingDailyExtendedTable(self.connection) + + # by default look at the daily table + return BillingDailyTable(self.connection) + async def get_gcp_projects( self, ) -> list[str] | None: """ Get All GCP projects in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_gcp_projects() + billing_table = BillingGcpDailyTable(self.connection) + return await billing_table.get_gcp_projects() async def get_topics( self, @@ -51,8 +75,8 @@ async def get_topics( """ Get All topics in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_topics() + billing_table = BillingDailyTable(self.connection) + return await billing_table.get_topics() async def get_cost_categories( self, @@ -60,8 +84,8 @@ async def get_cost_categories( """ Get All service description / cost categories in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_cost_categories() + billing_table = BillingDailyTable(self.connection) + return await billing_table.get_cost_categories() async def get_skus( self, @@ -71,8 +95,8 @@ async def get_skus( """ Get All SKUs in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_skus(limit, offset) + billing_table = BillingDailyTable(self.connection) + return await billing_table.get_skus(limit, offset) async def get_datasets( self, @@ -80,8 +104,8 @@ async def get_datasets( """ Get All datasets in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('dataset') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('dataset') async def get_stages( self, @@ -89,8 +113,8 @@ async def get_stages( """ Get All stages in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('stage') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('stage') async def get_sequencing_types( self, @@ -98,8 +122,8 @@ async def get_sequencing_types( """ Get All sequencing_types in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('sequencing_type') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('sequencing_type') async def get_sequencing_groups( self, @@ -107,865 +131,172 @@ async def get_sequencing_groups( """ Get All sequencing_groups in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('sequencing_group') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('sequencing_group') - async def get_invoice_months( + async def get_compute_categories( self, ) -> list[str] | None: """ - Get All invoice months in database + Get All compute_category values in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_invoice_months() + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('compute_category') - async def query( + async def get_cromwell_sub_workflow_names( self, - _filter: BillingFilter, - limit: int = 10, - ) -> list[BillingRowRecord] | None: + ) -> list[str] | None: """ - Get Billing record for the given gilter + Get All cromwell_sub_workflow_name values in database """ - billing_db = BillingDb(self.connection) - return await billing_db.query(_filter, limit) + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('cromwell_sub_workflow_name') - async def get_total_cost( + async def get_wdl_task_names( self, - query: BillingTotalCostQueryModel, - ) -> list[BillingTotalCostRecord] | None: + ) -> list[str] | None: """ - Get Total cost of selected fields for requested time interval + Get All wdl_task_name values in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_total_cost(query) + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('wdl_task_name') - async def get_running_cost( + async def get_invoice_months( self, - field: BillingColumn, - invoice_month: str | None = None, - source: str | None = None, - ) -> list[BillingCostBudgetRecord]: - """ - Get Running costs including monthly budget - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_running_cost(field, invoice_month, source) - - -class BillingDb(BqDbBase): - """Db layer for billing related routes""" - - async def get_gcp_projects(self): - """Get all GCP projects in database""" - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this part_time > filter is to limit the amount of data scanned, - # saving cost for running BQ - _query = f""" - SELECT DISTINCT gcp_project - FROM `{BQ_GCP_BILLING_VIEW}` - WHERE part_time > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - AND gcp_project IS NOT NULL - ORDER BY gcp_project ASC; - """ - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ] - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)['gcp_project']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_topics(self): - """Get all topics in database""" - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this day > filter is to limit the amount of data scanned, - # saving cost for running BQ - # aggregated views are partitioned by day - _query = f""" - SELECT DISTINCT topic - FROM `{BQ_AGGREG_VIEW}` - WHERE day > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - ORDER BY topic ASC; + ) -> list[str] | None: """ - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ] - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)['topic']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_invoice_months(self): - """Get all invoice months in database""" - - _query = f""" - SELECT DISTINCT FORMAT_DATE("%Y%m", day) as invoice_month - FROM `{BQ_AGGREG_VIEW}` - WHERE EXTRACT(day from day) = 1 - ORDER BY invoice_month DESC; - """ - - query_job_result = list(self._connection.connection.query(_query).result()) - if query_job_result: - return [str(dict(row)['invoice_month']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_cost_categories(self): - """Get all service description in database""" - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this day > filter is to limit the amount of data scanned, - # saving cost for running BQ - # aggregated views are partitioned by day - _query = f""" - SELECT DISTINCT cost_category - FROM `{BQ_AGGREG_VIEW}` - WHERE day > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - ORDER BY cost_category ASC; + Get All invoice months in database """ + billing_table = BillingDailyTable(self.connection) + return await billing_table.get_invoice_months() - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ] - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)['cost_category']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_skus( + async def get_namespaces( self, - limit: int | None = None, - offset: int | None = None, - ): - """Get all SKUs in database""" - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this day > filter is to limit the amount of data scanned, - # saving cost for running BQ - # aggregated views are partitioned by day - _query = f""" - SELECT DISTINCT sku - FROM `{BQ_AGGREG_VIEW}` - WHERE day > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - ORDER BY sku ASC - """ - - # append LIMIT and OFFSET if present - if limit: - _query += ' LIMIT @limit_val' - if offset: - _query += ' OFFSET @offset_val' - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - bigquery.ScalarQueryParameter('limit_val', 'INT64', limit), - bigquery.ScalarQueryParameter('offset_val', 'INT64', offset), - ] - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)['sku']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_extended_values(self, field: str): + ) -> list[str] | None: """ - Get all extended values in database, - e.g. dataset, stage, sequencing_type or sequencing_group + Get All namespaces values in database """ - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this day > filter is to limit the amount of data scanned, - # saving cost for running BQ - # aggregated views are partitioned by day - _query = f""" - SELECT DISTINCT {field} - FROM `{BQ_AGGREG_EXT_VIEW}` - WHERE {field} IS NOT NULL - AND day > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - ORDER BY 1 ASC; - """ - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ] - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)[field]) for row in query_job_result] - - # return empty list if no record found - return [] - - async def query( - self, - filter_: BillingFilter, - limit: int = 10, - ) -> list[BillingRowRecord] | None: - """Get Billing record from BQ""" - - # TODO: THis function is not going to be used most likely - # get_total_cost will replace it - - # cost of this BQ is 30MB on DEV, - # DEV is partition by day and date is required filter params, - # cost is aprox per query: AU$ 0.000023 per query - - required_fields = [ - filter_.date, - ] - - if not any(required_fields): - raise ValueError('Must provide date to filter on') - - # construct filters - filters = [] - query_parameters = [] - - if filter_.topic: - filters.append('topic IN UNNEST(@topic)') - query_parameters.append( - bigquery.ArrayQueryParameter('topic', 'STRING', filter_.topic.in_), - ) - - if filter_.date: - filters.append('DATE_TRUNC(usage_end_time, DAY) = TIMESTAMP(@date)') - query_parameters.append( - bigquery.ScalarQueryParameter('date', 'STRING', filter_.date.eq), - ) - - if filter_.cost_category: - filters.append('service.description IN UNNEST(@cost_category)') - query_parameters.append( - bigquery.ArrayQueryParameter( - 'cost_category', 'STRING', filter_.cost_category.in_ - ), - ) - - filter_str = 'WHERE ' + ' AND '.join(filters) if filters else '' - - _query = f""" - SELECT id, topic, service, sku, usage_start_time, usage_end_time, project, - labels, export_time, cost, currency, currency_conversion_rate, invoice, cost_type - FROM `{BQ_AGGREG_RAW}` - {filter_str} - """ - if limit: - _query += ' LIMIT @limit_val' - query_parameters.append( - bigquery.ScalarQueryParameter('limit_val', 'INT64', limit) - ) - - job_config = bigquery.QueryJobConfig(query_parameters=query_parameters) - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - - if query_job_result: - return [BillingRowRecord.from_json(dict(row)) for row in query_job_result] - - raise ValueError('No record found') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('namespace') async def get_total_cost( self, query: BillingTotalCostQueryModel, - ) -> list[BillingTotalCostRecord] | None: - """ - Get Total cost of selected fields for requested time interval from BQ view + ) -> list[dict] | None: """ - if not query.start_date or not query.end_date or not query.fields: - raise ValueError('Date and Fields are required') - - extended_cols = BillingColumn.extended_cols() - - # by default look at the normal view - if query.source == 'gcp_billing': - view_to_use = BQ_GCP_BILLING_VIEW - else: - view_to_use = BQ_AGGREG_VIEW - - columns = [] - for field in query.fields: - col_name = str(field.value) - if col_name == 'cost': - # skip the cost field as it will be always present - continue - - if col_name in extended_cols: - # if one of the extended columns is needed, the view has to be extended - view_to_use = BQ_AGGREG_EXT_VIEW - - columns.append(col_name) - - fields_selected = ','.join(columns) - - # construct filters - filters = [] - query_parameters = [] - - filters.append('day >= TIMESTAMP(@start_date)') - query_parameters.append( - bigquery.ScalarQueryParameter('start_date', 'STRING', query.start_date) - ) - - filters.append('day <= TIMESTAMP(@end_date)') - query_parameters.append( - bigquery.ScalarQueryParameter('end_date', 'STRING', query.end_date) - ) - - if query.source == 'gcp_billing': - # BQ_GCP_BILLING_VIEW view is partitioned by different field - # BQ has limitation, materialized view can only by partition by base table - # partition or its subset, in our case _PARTITIONTIME - # (part_time field in the view) - # We are querying by day, - # which can be up to a week behind regarding _PARTITIONTIME - filters.append('part_time >= TIMESTAMP(@start_date)') - filters.append( - 'part_time <= TIMESTAMP_ADD(TIMESTAMP(@end_date), INTERVAL 7 DAY)' - ) - - if query.filters: - for filter_key, filter_value in query.filters.items(): - col_name = str(filter_key.value) - filters.append(f'{col_name} = @{col_name}') - query_parameters.append( - bigquery.ScalarQueryParameter(col_name, 'STRING', filter_value) - ) - if col_name in extended_cols: - # if one of the extended columns is needed, - # the view has to be extended - view_to_use = BQ_AGGREG_EXT_VIEW - - filter_str = 'WHERE ' + ' AND '.join(filters) if filters else '' - - # construct order by - order_by_cols = [] - if query.order_by: - for order_field, reverse in query.order_by.items(): - col_name = str(order_field.value) - col_order = 'DESC' if reverse else 'ASC' - order_by_cols.append(f'{col_name} {col_order}') - - order_by_str = f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' - - _query = f""" - SELECT {fields_selected}, SUM(cost) as cost - FROM `{view_to_use}` - {filter_str} - GROUP BY {fields_selected} - {order_by_str} - """ - - # append LIMIT and OFFSET if present - if query.limit: - _query += ' LIMIT @limit_val' - query_parameters.append( - bigquery.ScalarQueryParameter('limit_val', 'INT64', query.limit) - ) - if query.offset: - _query += ' OFFSET @offset_val' - query_parameters.append( - bigquery.ScalarQueryParameter('offset_val', 'INT64', query.offset) - ) - - job_config = bigquery.QueryJobConfig(query_parameters=query_parameters) - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - - if query_job_result: - return [ - BillingTotalCostRecord.from_json(dict(row)) for row in query_job_result - ] - - # return empty list if no record found - return [] - - async def get_budgets_by_gcp_project( - self, field: BillingColumn, is_current_month: bool - ) -> dict[str, float]: - """ - Get budget for gcp-projects - """ - if field != BillingColumn.PROJECT or not is_current_month: - # only projects have budget and only for current month - return {} - - _query = f""" - WITH t AS ( - SELECT gcp_project, MAX(created_at) as last_created_at - FROM `{BQ_BUDGET_VIEW}` - GROUP BY 1 - ) - SELECT t.gcp_project, d.budget - FROM t inner join `{BQ_BUDGET_VIEW}` d - ON d.gcp_project = t.gcp_project AND d.created_at = t.last_created_at - """ - - query_job_result = list(self._connection.connection.query(_query).result()) - - if query_job_result: - return {row.gcp_project: row.budget for row in query_job_result} - - return {} - - async def get_last_loaded_day(self): - """Get the most recent fully loaded day in db - Go 2 days back as the data is not always available for the current day - 1 day back is not enough - """ - - _query = f""" - SELECT TIMESTAMP_ADD(MAX(day), INTERVAL -2 DAY) as last_loaded_day - FROM `{BQ_AGGREG_VIEW}` - WHERE day > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) + Get Total cost of selected fields for requested time interval """ + billing_table = self.table_factory(query.source, query.fields, query.filters) + return await billing_table.get_total_cost(query) - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ] - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return str(query_job_result[0].last_loaded_day) - - return None - - async def prepare_daily_cost_subquery( - self, field, view_to_use, source, query_params - ): - """prepare daily cost subquery""" - - if source == 'gcp_billing': - # add extra filter to limit materialized view partition - # Raw BQ billing table is partitioned by part_time (when data are loaded) - # and not by end of usage time (day) - # There is a delay up to 4-5 days between part_time and day - # 7 days is added to be sure to get all data - gcp_billing_optimise_filter = """ - AND part_time >= TIMESTAMP(@last_loaded_day) - AND part_time <= TIMESTAMP_ADD( - TIMESTAMP(@last_loaded_day), INTERVAL 7 DAY - ) - """ - else: - gcp_billing_optimise_filter = '' - - # Find the last fully loaded day in the view - last_loaded_day = await self.get_last_loaded_day() - - daily_cost_field = ', day.cost as daily_cost' - daily_cost_join = f"""LEFT JOIN ( - SELECT - {field.value} as field, - cost_category, - SUM(cost) as cost - FROM - `{view_to_use}` - WHERE day = TIMESTAMP(@last_loaded_day) - {gcp_billing_optimise_filter} - GROUP BY - field, - cost_category - ) day - ON month.field = day.field - AND month.cost_category = day.cost_category - """ - - query_params.append( - bigquery.ScalarQueryParameter('last_loaded_day', 'STRING', last_loaded_day), - ) - return (last_loaded_day, query_params, daily_cost_field, daily_cost_join) - - async def execute_running_cost_query( + async def get_running_cost( self, field: BillingColumn, invoice_month: str | None = None, - source: str | None = None, - ): + source: BillingSource | None = None, + ) -> list[BillingCostBudgetRecord]: """ - Run query to get running cost of selected field - """ - # check if invoice month is valid first - if not invoice_month or not re.match(r'^\d{6}$', invoice_month): - raise ValueError('Invalid invoice month') - - invoice_month_date = datetime.strptime(invoice_month, '%Y%m') - if invoice_month != invoice_month_date.strftime('%Y%m'): - raise ValueError('Invalid invoice month') - - # get start day and current day for given invoice month - # This is to optimise the query, BQ view is partitioned by day - # and not by invoice month - start_day_date, last_day_date = get_invoice_month_range(invoice_month_date) - start_day = start_day_date.strftime('%Y-%m-%d') - last_day = last_day_date.strftime('%Y-%m-%d') - - # by default look at the normal view - if field in BillingColumn.extended_cols(): - # if any of the extendeid fields are needed use the extended view - view_to_use = BQ_AGGREG_EXT_VIEW - elif source == 'gcp_billing': - # if source is gcp_billing, - # use the view on top of the raw billing table - view_to_use = BQ_GCP_BILLING_VIEW - else: - # otherwise use the normal view - view_to_use = BQ_AGGREG_VIEW - - if source == 'gcp_billing': - # add extra filter to limit materialized view partition - # Raw BQ billing table is partitioned by part_time (when data are loaded) - # and not by end of usage time (day) - # There is a delay up to 4-5 days between part_time and day - # 7 days is added to be sure to get all data - filter_to_optimise_query = """ - part_time >= TIMESTAMP(@start_day) - AND part_time <= TIMESTAMP_ADD( - TIMESTAMP(@last_day), INTERVAL 7 DAY - ) - """ - else: - # add extra filter to limit materialized view partition - filter_to_optimise_query = """ - day >= TIMESTAMP(@start_day) - AND day <= TIMESTAMP(@last_day) - """ - - # start_day and last_day are in to optimise the query - query_params = [ - bigquery.ScalarQueryParameter('start_day', 'STRING', start_day), - bigquery.ScalarQueryParameter('last_day', 'STRING', last_day), - ] - - current_day = datetime.now().strftime('%Y-%m-%d') - is_current_month = last_day >= current_day - last_loaded_day = None - - if is_current_month: - # Only current month can have last 24 hours cost - # Last 24H in UTC time - ( - last_loaded_day, - query_params, - daily_cost_field, - daily_cost_join, - ) = await self.prepare_daily_cost_subquery( - field, view_to_use, source, query_params - ) - else: - # Do not calculate last 24H cost - daily_cost_field = ', NULL as daily_cost' - daily_cost_join = '' - - _query = f""" - SELECT - CASE WHEN month.field IS NULL THEN 'N/A' ELSE month.field END as field, - month.cost_category, - month.cost as monthly_cost - {daily_cost_field} - FROM - ( - SELECT - {field.value} as field, - cost_category, - SUM(cost) as cost - FROM - `{view_to_use}` - WHERE {filter_to_optimise_query} - AND invoice_month = @invoice_month - GROUP BY - field, - cost_category - HAVING cost > 0.1 - ) month - {daily_cost_join} - ORDER BY field ASC, daily_cost DESC, monthly_cost DESC; - """ - - query_params.append( - bigquery.ScalarQueryParameter('invoice_month', 'STRING', invoice_month) - ) - - return ( - is_current_month, - last_loaded_day, - list( - self._connection.connection.query( - _query, - job_config=bigquery.QueryJobConfig(query_parameters=query_params), - ).result() - ), - ) + Get Running costs including monthly budget + """ + billing_table = self.table_factory(source, [field]) + return await billing_table.get_running_cost(field, invoice_month) - async def append_total_running_cost( + async def get_cost_by_ar_guid( self, - field: BillingColumn, - is_current_month: bool, - last_loaded_day: str | None, - total_monthly: dict, - total_daily: dict, - total_monthly_category: dict, - total_daily_category: dict, - results: list[BillingCostBudgetRecord], - ) -> list[BillingCostBudgetRecord]: + ar_guid: str | None = None, + ) -> BillingHailBatchCostRecord: """ - Add total row: compute + storage to the results - """ - # construct ALL fields details - all_details = [] - for cat, mth_cost in total_monthly_category.items(): - all_details.append( - { - 'cost_group': abbrev_cost_category(cat), - 'cost_category': cat, - 'daily_cost': total_daily_category[cat] - if is_current_month - else None, - 'monthly_cost': mth_cost, - } + Get Costs by AR GUID + """ + ar_batch_lookup_table = BillingArBatchTable(self.connection) + + # First get all batches and the min/max day to use for the query + ( + start_day, + end_day, + batches, + ) = await ar_batch_lookup_table.get_batches_by_ar_guid(ar_guid) + + if not batches: + return BillingHailBatchCostRecord( + ar_guid=ar_guid, + batch_ids=[], + costs=[], ) - # add total row: compute + storage - results.append( - BillingCostBudgetRecord.from_json( - { - 'field': f'{BillingColumn.generate_all_title(field)}', - 'total_monthly': ( - total_monthly['C']['ALL'] + total_monthly['S']['ALL'] - ), - 'total_daily': (total_daily['C']['ALL'] + total_daily['S']['ALL']) - if is_current_month - else None, - 'compute_monthly': total_monthly['C']['ALL'], - 'compute_daily': (total_daily['C']['ALL']) - if is_current_month - else None, - 'storage_monthly': total_monthly['S']['ALL'], - 'storage_daily': (total_daily['S']['ALL']) - if is_current_month - else None, - 'details': all_details, - 'last_loaded_day': last_loaded_day, + # Then get the costs for the given AR GUID/batches from the main table + all_cols = [BillingColumn.str_to_enum(v) for v in BillingColumn.raw_cols()] + + query = BillingTotalCostQueryModel( + fields=all_cols, + source=BillingSource.RAW, + start_date=start_day.strftime('%Y-%m-%d'), + end_date=end_day.strftime('%Y-%m-%d'), + filters={ + BillingColumn.LABELS: { + 'batch_id': batches, + 'ar-guid': ar_guid, } - ) + }, + filters_op='OR', + group_by=False, + time_column=BillingTimeColumn.USAGE_END_TIME, + time_periods=BillingTimePeriods.DAY, ) - return results - - async def append_running_cost_records( - self, - field: BillingColumn, - is_current_month: bool, - last_loaded_day: str | None, - total_monthly: dict, - total_daily: dict, - field_details: dict, - results: list[BillingCostBudgetRecord], - ) -> list[BillingCostBudgetRecord]: - """ - Add all the selected field rows: compute + storage to the results - """ - # get budget map per gcp project - budgets_per_gcp_project = await self.get_budgets_by_gcp_project( - field, is_current_month + billing_table = self.table_factory(query.source, query.fields) + records = await billing_table.get_total_cost(query) + return BillingHailBatchCostRecord( + ar_guid=ar_guid, + batch_ids=batches, + costs=records, ) - # add rows by field - for key, details in field_details.items(): - compute_daily = total_daily['C'][key] if key in total_daily['C'] else 0 - storage_daily = total_daily['S'][key] if key in total_daily['S'] else 0 - compute_monthly = ( - total_monthly['C'][key] if key in total_monthly['C'] else 0 - ) - storage_monthly = ( - total_monthly['S'][key] if key in total_monthly['S'] else 0 - ) - monthly = compute_monthly + storage_monthly - budget_monthly = budgets_per_gcp_project.get(key) - - results.append( - BillingCostBudgetRecord.from_json( - { - 'field': key, - 'total_monthly': monthly, - 'total_daily': (compute_daily + storage_daily) - if is_current_month - else None, - 'compute_monthly': compute_monthly, - 'compute_daily': compute_daily, - 'storage_monthly': storage_monthly, - 'storage_daily': storage_daily, - 'details': details, - 'budget_spent': 100 * monthly / budget_monthly - if budget_monthly - else None, - 'last_loaded_day': last_loaded_day, - } - ) - ) - - return results - - async def get_running_cost( + async def get_cost_by_batch_id( self, - field: BillingColumn, - invoice_month: str | None = None, - source: str | None = None, - ) -> list[BillingCostBudgetRecord]: + batch_id: str | None = None, + ) -> BillingHailBatchCostRecord: """ - Get currently running cost of selected field + Get Costs by Batch ID """ + ar_batch_lookup_table = BillingArBatchTable(self.connection) - # accept only Topic, Dataset or Project at this stage - if field not in ( - BillingColumn.TOPIC, - BillingColumn.PROJECT, - BillingColumn.DATASET, - ): - raise ValueError('Invalid field only topic, dataset or project allowed') + # First get all batches and the min/max day to use for the query + ar_guid = await ar_batch_lookup_table.get_ar_guid_by_batch_id(batch_id) + # The get all batches for the ar_guid ( - is_current_month, - last_loaded_day, - query_job_result, - ) = await self.execute_running_cost_query(field, invoice_month, source) - if not query_job_result: - # return empty list - return [] - - # prepare data - results: list[BillingCostBudgetRecord] = [] - - # reformat last_loaded_day if present - last_loaded_day = reformat_datetime( - last_loaded_day, '%Y-%m-%d %H:%M:%S+00:00', '%b %d' - ) - - total_monthly: dict[str, Counter[str]] = defaultdict(Counter) - total_daily: dict[str, Counter[str]] = defaultdict(Counter) - field_details: dict[str, list[Any]] = defaultdict(list) - total_monthly_category: Counter[str] = Counter() - total_daily_category: Counter[str] = Counter() - - for row in query_job_result: - if row.field not in field_details: - field_details[row.field] = [] - - cost_group = abbrev_cost_category(row.cost_category) - - field_details[row.field].append( - { - 'cost_group': cost_group, - 'cost_category': row.cost_category, - 'daily_cost': row.daily_cost if is_current_month else None, - 'monthly_cost': row.monthly_cost, + start_day, + end_day, + batches, + ) = await ar_batch_lookup_table.get_batches_by_ar_guid(ar_guid) + + if not batches: + return BillingHailBatchCostRecord(ar_guid=ar_guid, batch_ids=[], costs=[]) + + # Then get the costs for the given AR GUID/batches from the main table + all_cols = [BillingColumn.str_to_enum(v) for v in BillingColumn.raw_cols()] + + query = BillingTotalCostQueryModel( + fields=all_cols, + source=BillingSource.RAW, + start_date=start_day.strftime('%Y-%m-%d'), + end_date=end_day.strftime('%Y-%m-%d'), + filters={ + BillingColumn.LABELS: { + 'batch_id': batches, + 'ar-guid': ar_guid, } - ) - - total_monthly_category[row.cost_category] += row.monthly_cost - if row.daily_cost: - total_daily_category[row.cost_category] += row.daily_cost - - # cost groups totals - total_monthly[cost_group]['ALL'] += row.monthly_cost - total_monthly[cost_group][row.field] += row.monthly_cost - if row.daily_cost and is_current_month: - total_daily[cost_group]['ALL'] += row.daily_cost - total_daily[cost_group][row.field] += row.daily_cost - - # add total row: compute + storage - results = await self.append_total_running_cost( - field, - is_current_month, - last_loaded_day, - total_monthly, - total_daily, - total_monthly_category, - total_daily_category, - results, + }, + filters_op='OR', + group_by=False, + time_column=BillingTimeColumn.USAGE_END_TIME, + time_periods=BillingTimePeriods.DAY, ) - - # add rest of the records: compute + storage - results = await self.append_running_cost_records( - field, - is_current_month, - last_loaded_day, - total_monthly, - total_daily, - field_details, - results, + billing_table = self.table_factory(query.source, query.fields) + records = await billing_table.get_total_cost(query) + return BillingHailBatchCostRecord( + ar_guid=ar_guid, + batch_ids=batches, + costs=records, ) - - return results diff --git a/db/python/tables/billing.py b/db/python/tables/billing.py deleted file mode 100644 index 54402a85c..000000000 --- a/db/python/tables/billing.py +++ /dev/null @@ -1,18 +0,0 @@ -import dataclasses - -from db.python.utils import ( - GenericFilter, - GenericFilterModel, -) - - -@dataclasses.dataclass -class BillingFilter(GenericFilterModel): - """Filter for billing""" - - topic: GenericFilter[str] = None - date: GenericFilter[str] = None - cost_category: GenericFilter[str] = None - - def __hash__(self): # pylint: disable=useless-parent-delegation - return super().__hash__() diff --git a/db/python/tables/bq/billing_ar_batch.py b/db/python/tables/bq/billing_ar_batch.py new file mode 100644 index 000000000..d9326b6b3 --- /dev/null +++ b/db/python/tables/bq/billing_ar_batch.py @@ -0,0 +1,69 @@ +from datetime import datetime, timedelta + +from google.cloud import bigquery + +from api.settings import BQ_BATCHES_VIEW +from db.python.tables.bq.billing_base import BillingBaseTable + + +class BillingArBatchTable(BillingBaseTable): + """Billing AR - BatchID lookup Big Query table""" + + table_name = BQ_BATCHES_VIEW + + def get_table_name(self): + """Get table name""" + return self.table_name + + async def get_batches_by_ar_guid( + self, ar_guid: str + ) -> tuple[datetime, datetime, list[str]]: + """ + Get batches for given ar_guid + """ + _query = f""" + SELECT + batch_id, + MIN(min_day) as start_day, + MAX(max_day) as end_day + FROM `{self.table_name}` + WHERE ar_guid = @ar_guid + AND batch_id IS NOT NULL + GROUP BY batch_id + ORDER BY batch_id; + """ + + query_parameters = [ + bigquery.ScalarQueryParameter('ar_guid', 'STRING', ar_guid), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + start_day = min((row.start_day for row in query_job_result)) + end_day = max((row.end_day for row in query_job_result)) + timedelta(days=1) + return start_day, end_day, [row.batch_id for row in query_job_result] + + # return empty list if no record found + return None, None, [] + + async def get_ar_guid_by_batch_id(self, batch_id: str) -> str: + """ + Get ar_guid for given batch_id + """ + _query = f""" + SELECT ar_guid + FROM `{self.table_name}` + WHERE batch_id = @batch_id + AND ar_guid IS NOT NULL + LIMIT 1; + """ + + query_parameters = [ + bigquery.ScalarQueryParameter('batch_id', 'STRING', batch_id), + ] + query_job_result = self._execute_query(_query, query_parameters) + if query_job_result: + return query_job_result[0]['ar_guid'] + + # return None if no ar_guid found + return None diff --git a/db/python/tables/bq/billing_base.py b/db/python/tables/bq/billing_base.py new file mode 100644 index 000000000..e953c7013 --- /dev/null +++ b/db/python/tables/bq/billing_base.py @@ -0,0 +1,695 @@ +import re +from abc import ABCMeta, abstractmethod +from collections import Counter, defaultdict, namedtuple +from datetime import datetime +from typing import Any + +from google.cloud import bigquery + +from api.settings import BQ_BUDGET_VIEW, BQ_DAYS_BACK_OPTIMAL +from api.utils.dates import get_invoice_month_range, reformat_datetime +from db.python.gcp_connect import BqDbBase +from db.python.tables.bq.billing_filter import BillingFilter +from db.python.tables.bq.function_bq_filter import FunctionBQFilter +from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from models.models import ( + BillingColumn, + BillingCostBudgetRecord, + BillingCostDetailsRecord, + BillingTimePeriods, + BillingTotalCostQueryModel, +) + +# Label added to each Billing Big Query request, +# so we can track the cost of metamist-api BQ usage +BQ_LABELS = {'source': 'metamist-api'} + + +# Day Time details used in grouping and parsing formulas +TimeGroupingDetails = namedtuple( + 'TimeGroupingDetails', ['field', 'formula', 'separator'] +) + +# constants to abbrevate (S)tores and (C)ompute +STORAGE = 'S' +COMPUTE = 'C' + + +def abbrev_cost_category(cost_category: str) -> str: + """abbreviate cost category""" + return STORAGE if cost_category == 'Cloud Storage' else COMPUTE + + +def prepare_time_periods( + query: BillingTotalCostQueryModel, +) -> TimeGroupingDetails: + """Prepare Time periods grouping and parsing formulas""" + time_column = query.time_column or BillingTimePeriods.DAY + + # Based on specified time period, add the corresponding column + if query.time_periods == BillingTimePeriods.DAY: + return TimeGroupingDetails( + field=f'FORMAT_DATE("%Y-%m-%d", {time_column}) as day', + formula='PARSE_DATE("%Y-%m-%d", day) as day', + separator=',', + ) + + if query.time_periods == BillingTimePeriods.WEEK: + return TimeGroupingDetails( + field=f'FORMAT_DATE("%Y%W", {time_column}) as day', + formula='PARSE_DATE("%Y%W", day) as day', + separator=',', + ) + + if query.time_periods == BillingTimePeriods.MONTH: + return TimeGroupingDetails( + field=f'FORMAT_DATE("%Y%m", {time_column}) as day', + formula='PARSE_DATE("%Y%m", day) as day', + separator=',', + ) + + if query.time_periods == BillingTimePeriods.INVOICE_MONTH: + return TimeGroupingDetails( + field='invoice_month as day', + formula='PARSE_DATE("%Y%m", day) as day', + separator=',', + ) + + return TimeGroupingDetails('', '', '') + + +def time_optimisation_parameter() -> bigquery.ScalarQueryParameter: + """ + BQ tables and views are partitioned by day, to avoid full scans + we need to limit the amount of data scanned + """ + return bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)) + + +class BillingBaseTable(BqDbBase): + """Billing Base Table + This is abstract class, it should not be instantiated + """ + + __metaclass__ = ABCMeta + + @abstractmethod + def get_table_name(self): + """Get table name""" + raise NotImplementedError('Calling Abstract method directly') + + def _execute_query( + self, query: str, params: list[Any] = None, results_as_list: bool = True + ) -> list[Any]: + """Execute query, add BQ labels""" + if params: + job_config = bigquery.QueryJobConfig( + query_parameters=params, labels=BQ_LABELS + ) + else: + job_config = bigquery.QueryJobConfig(labels=BQ_LABELS) + + if results_as_list: + return list( + self._connection.connection.query(query, job_config=job_config).result() + ) + + # otherwise return as BQ iterator + return self._connection.connection.query(query, job_config=job_config) + + def _query_to_partitioned_filter( + self, query: BillingTotalCostQueryModel + ) -> BillingFilter: + """ + By default views are partitioned by 'day', + if different then overwrite in the subclass + """ + billing_filter = query.to_filter() + + # initial partition filter + billing_filter.day = GenericBQFilter[datetime]( + gte=datetime.strptime(query.start_date, '%Y-%m-%d') + if query.start_date + else None, + lte=datetime.strptime(query.end_date, '%Y-%m-%d') + if query.end_date + else None, + ) + return billing_filter + + def _filter_to_optimise_query(self) -> str: + """Filter string to optimise BQ query""" + return 'day >= TIMESTAMP(@start_day) AND day <= TIMESTAMP(@last_day)' + + def _last_loaded_day_filter(self) -> str: + """Last Loaded day filter string""" + return 'day = TIMESTAMP(@last_loaded_day)' + + def _convert_output(self, query_job_result): + """Convert query result to json""" + if not query_job_result or query_job_result.result().total_rows == 0: + # return empty list if no record found + return [] + + records = query_job_result.result() + results = [] + + def transform_labels(row): + return {r['key']: r['value'] for r in row} + + for record in records: + drec = dict(record) + if 'labels' in drec: + drec.update(transform_labels(drec['labels'])) + + results.append(drec) + + return results + + async def _budgets_by_gcp_project( + self, field: BillingColumn, is_current_month: bool + ) -> dict[str, float]: + """ + Get budget for gcp-projects + """ + if field != BillingColumn.GCP_PROJECT or not is_current_month: + # only projects have budget and only for current month + return {} + + _query = f""" + WITH t AS ( + SELECT gcp_project, MAX(created_at) as last_created_at + FROM `{BQ_BUDGET_VIEW}` + GROUP BY gcp_project + ) + SELECT t.gcp_project, d.budget + FROM t inner join `{BQ_BUDGET_VIEW}` d + ON d.gcp_project = t.gcp_project AND d.created_at = t.last_created_at + """ + + query_job_result = self._execute_query(_query) + if query_job_result: + return {row.gcp_project: row.budget for row in query_job_result} + + return {} + + async def _last_loaded_day(self): + """Get the most recent fully loaded day in db + Go 2 days back as the data is not always available for the current day + 1 day back is not enough + """ + + _query = f""" + SELECT TIMESTAMP_ADD(MAX(day), INTERVAL -2 DAY) as last_loaded_day + FROM `{self.get_table_name()}` + WHERE day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + """ + + query_parameters = [ + time_optimisation_parameter(), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return str(query_job_result[0].last_loaded_day) + + return None + + def _prepare_daily_cost_subquery(self, field, query_params, last_loaded_day): + """prepare daily cost subquery""" + + daily_cost_field = ', day.cost as daily_cost' + daily_cost_join = f"""LEFT JOIN ( + SELECT + {field.value} as field, + cost_category, + SUM(cost) as cost + FROM + `{self.get_table_name()}` + WHERE {self._last_loaded_day_filter()} + GROUP BY + field, + cost_category + ) day + ON month.field = day.field + AND month.cost_category = day.cost_category + """ + + query_params.append( + bigquery.ScalarQueryParameter('last_loaded_day', 'STRING', last_loaded_day), + ) + return (query_params, daily_cost_field, daily_cost_join) + + async def _execute_running_cost_query( + self, + field: BillingColumn, + invoice_month: str | None = None, + ): + """ + Run query to get running cost of selected field + """ + # check if invoice month is valid first + if not invoice_month or not re.match(r'^\d{6}$', invoice_month): + raise ValueError('Invalid invoice month') + + invoice_month_date = datetime.strptime(invoice_month, '%Y%m') + if invoice_month != invoice_month_date.strftime('%Y%m'): + raise ValueError('Invalid invoice month') + + # get start day and current day for given invoice month + # This is to optimise the query, BQ view is partitioned by day + # and not by invoice month + start_day_date, last_day_date = get_invoice_month_range(invoice_month_date) + start_day = start_day_date.strftime('%Y-%m-%d') + last_day = last_day_date.strftime('%Y-%m-%d') + + # start_day and last_day are in to optimise the query + query_params = [ + bigquery.ScalarQueryParameter('start_day', 'STRING', start_day), + bigquery.ScalarQueryParameter('last_day', 'STRING', last_day), + ] + + current_day = datetime.now().strftime('%Y-%m-%d') + is_current_month = last_day >= current_day + last_loaded_day = None + + if is_current_month: + # Only current month can have last 24 hours cost + # Last 24H in UTC time + # Find the last fully loaded day in the view + last_loaded_day = await self._last_loaded_day() + ( + query_params, + daily_cost_field, + daily_cost_join, + ) = self._prepare_daily_cost_subquery(field, query_params, last_loaded_day) + else: + # Do not calculate last 24H cost + daily_cost_field = ', NULL as daily_cost' + daily_cost_join = '' + + _query = f""" + SELECT + CASE WHEN month.field IS NULL THEN 'N/A' ELSE month.field END as field, + month.cost_category, + month.cost as monthly_cost + {daily_cost_field} + FROM + ( + SELECT + {field.value} as field, + cost_category, + SUM(cost) as cost + FROM + `{self.get_table_name()}` + WHERE {self._filter_to_optimise_query()} + AND invoice_month = @invoice_month + GROUP BY + field, + cost_category + HAVING cost > 0.1 + ) month + {daily_cost_join} + ORDER BY field ASC, daily_cost DESC, monthly_cost DESC; + """ + + query_params.append( + bigquery.ScalarQueryParameter('invoice_month', 'STRING', invoice_month) + ) + + return ( + is_current_month, + last_loaded_day, + self._execute_query(_query, query_params), + ) + + async def _append_total_running_cost( + self, + field: BillingColumn, + is_current_month: bool, + last_loaded_day: str | None, + total_monthly: dict, + total_daily: dict, + total_monthly_category: dict, + total_daily_category: dict, + results: list[BillingCostBudgetRecord], + ) -> list[BillingCostBudgetRecord]: + """ + Add total row: compute + storage to the results + """ + # construct ALL fields details + all_details = [] + for cat, mth_cost in total_monthly_category.items(): + all_details.append( + BillingCostDetailsRecord( + cost_group=abbrev_cost_category(cat), + cost_category=cat, + daily_cost=total_daily_category[cat] if is_current_month else None, + monthly_cost=mth_cost, + ) + ) + + # add total row: compute + storage + results.append( + BillingCostBudgetRecord( + field=f'{BillingColumn.generate_all_title(field)}', + total_monthly=( + total_monthly[COMPUTE]['ALL'] + total_monthly[STORAGE]['ALL'] + ), + total_daily=(total_daily[COMPUTE]['ALL'] + total_daily[STORAGE]['ALL']) + if is_current_month + else None, + compute_monthly=total_monthly[COMPUTE]['ALL'], + compute_daily=(total_daily[COMPUTE]['ALL']) + if is_current_month + else None, + storage_monthly=total_monthly[STORAGE]['ALL'], + storage_daily=(total_daily[STORAGE]['ALL']) + if is_current_month + else None, + details=all_details, + budget_spent=None, + budget=None, + last_loaded_day=last_loaded_day, + ) + ) + + return results + + async def _append_running_cost_records( + self, + field: BillingColumn, + is_current_month: bool, + last_loaded_day: str | None, + total_monthly: dict, + total_daily: dict, + field_details: dict, + results: list[BillingCostBudgetRecord], + ) -> list[BillingCostBudgetRecord]: + """ + Add all the selected field rows: compute + storage to the results + """ + # get budget map per gcp project + budgets_per_gcp_project = await self._budgets_by_gcp_project( + field, is_current_month + ) + + # add rows by field + for key, details in field_details.items(): + compute_daily = ( + total_daily[COMPUTE][key] if key in total_daily[COMPUTE] else 0 + ) + storage_daily = ( + total_daily[STORAGE][key] if key in total_daily[STORAGE] else 0 + ) + compute_monthly = ( + total_monthly[COMPUTE][key] if key in total_monthly[COMPUTE] else 0 + ) + storage_monthly = ( + total_monthly[STORAGE][key] if key in total_monthly[STORAGE] else 0 + ) + monthly = compute_monthly + storage_monthly + budget_monthly = budgets_per_gcp_project.get(key) + + results.append( + BillingCostBudgetRecord.from_json( + { + 'field': key, + 'total_monthly': monthly, + 'total_daily': (compute_daily + storage_daily) + if is_current_month + else None, + 'compute_monthly': compute_monthly, + 'compute_daily': compute_daily, + 'storage_monthly': storage_monthly, + 'storage_daily': storage_daily, + 'details': details, + 'budget_spent': 100 * monthly / budget_monthly + if budget_monthly + else None, + 'budget': budget_monthly, + 'last_loaded_day': last_loaded_day, + } + ) + ) + + return results + + def _prepare_order_by_string( + self, order_by: dict[BillingColumn, bool] | None + ) -> str: + """Prepare order by string""" + if not order_by: + return '' + + order_by_cols = [] + for order_field, reverse in order_by.items(): + col_name = str(order_field.value) + col_order = 'DESC' if reverse else 'ASC' + order_by_cols.append(f'{col_name} {col_order}') + + return f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' + + def _prepare_aggregation( + self, query: BillingTotalCostQueryModel + ) -> tuple[str, str]: + """Prepare both fields for aggregation and group by string""" + # Get columns to group by + + # if group by is populated, then we need to group by day as well + grp_columns = ['day'] if query.group_by else [] + + for field in query.fields: + col_name = str(field.value) + if not BillingColumn.can_group_by(field): + # if the field cannot be grouped by, skip it + continue + + # append to potential columns to group by + grp_columns.append(col_name) + + fields_selected = ','.join( + (field.value for field in query.fields if field != BillingColumn.COST) + ) + + grp_selected = ','.join(grp_columns) + group_by = f'GROUP BY {grp_selected}' if query.group_by else '' + + return fields_selected, group_by + + def _prepare_labels_function(self, query: BillingTotalCostQueryModel): + if not query.filters: + return None + + if BillingColumn.LABELS in query.filters and isinstance( + query.filters[BillingColumn.LABELS], dict + ): + # prepare labels as function filters, parameterized both sides + func_filter = FunctionBQFilter( + name='getLabelValue', + implementation=""" + CREATE TEMP FUNCTION getLabelValue( + labels ARRAY>, label STRING + ) AS ( + (SELECT value FROM UNNEST(labels) WHERE key = label LIMIT 1) + ); + """, + ) + func_filter.to_sql( + BillingColumn.LABELS, + query.filters[BillingColumn.LABELS], + query.filters_op, + ) + return func_filter + + # otherwise + return None + + async def get_total_cost( + self, + query: BillingTotalCostQueryModel, + ) -> list[dict] | None: + """ + Get Total cost of selected fields for requested time interval from BQ views + """ + if not query.start_date or not query.end_date or not query.fields: + raise ValueError('Date and Fields are required') + + # Get columns to select and to group by + fields_selected, group_by = self._prepare_aggregation(query) + + # construct order by + order_by_str = self._prepare_order_by_string(query.order_by) + + # prepare grouping by time periods + time_group = TimeGroupingDetails('', '', '') + if query.time_periods or query.time_column: + time_group = prepare_time_periods(query) + + # overrides time specific fields with relevant time column name + query_filter = self._query_to_partitioned_filter(query) + + # prepare where string and SQL parameters + where_str, sql_parameters = query_filter.to_sql() + + # extract only BQ Query parameter, keys are not used in BQ SQL + # have to declare empty list first as linting is not happy + query_parameters: list[ + bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter + ] = [] + query_parameters.extend(sql_parameters.values()) + + # prepare labels as function filters if present + func_filter = self._prepare_labels_function(query) + if func_filter: + # extend where_str and query_parameters + query_parameters.extend(func_filter.func_sql_parameters) + + # now join Prepared Where with Labels Function Where + where_str = ' AND '.join([where_str, func_filter.func_where]) + + # if group by is populated, then we need SUM the cost, otherwise raw cost + cost_column = 'SUM(cost) as cost' if query.group_by else 'cost' + + if where_str: + # Where is not empty, prepend with WHERE + where_str = f'WHERE {where_str}' + + _query = f""" + {func_filter.fun_implementation if func_filter else ''} + + WITH t AS ( + SELECT {time_group.field}{time_group.separator} {fields_selected}, + {cost_column} + FROM `{self.get_table_name()}` + {where_str} + {group_by} + {order_by_str} + ) + SELECT {time_group.formula}{time_group.separator} {fields_selected}, cost FROM t + """ + + # append min cost condition + if query.min_cost: + _query += ' WHERE cost > @min_cost' + query_parameters.append( + bigquery.ScalarQueryParameter('min_cost', 'FLOAT64', query.min_cost) + ) + + # append LIMIT and OFFSET if present + if query.limit: + _query += ' LIMIT @limit_val' + query_parameters.append( + bigquery.ScalarQueryParameter('limit_val', 'INT64', query.limit) + ) + if query.offset: + _query += ' OFFSET @offset_val' + query_parameters.append( + bigquery.ScalarQueryParameter('offset_val', 'INT64', query.offset) + ) + + query_job_result = self._execute_query( + _query, query_parameters, results_as_list=False + ) + return self._convert_output(query_job_result) + + async def get_running_cost( + self, + field: BillingColumn, + invoice_month: str | None = None, + ) -> list[BillingCostBudgetRecord]: + """ + Get currently running cost of selected field + """ + + # accept only Topic, Dataset or Project at this stage + if field not in ( + BillingColumn.TOPIC, + BillingColumn.GCP_PROJECT, + BillingColumn.DATASET, + BillingColumn.STAGE, + BillingColumn.COMPUTE_CATEGORY, + BillingColumn.WDL_TASK_NAME, + BillingColumn.CROMWELL_SUB_WORKFLOW_NAME, + BillingColumn.NAMESPACE, + ): + raise ValueError( + 'Invalid field only topic, dataset, gcp-project, compute_category, ' + 'wdl_task_name, cromwell_sub_workflow_name & namespace are allowed' + ) + + ( + is_current_month, + last_loaded_day, + query_job_result, + ) = await self._execute_running_cost_query(field, invoice_month) + if not query_job_result: + # return empty list + return [] + + # prepare data + results: list[BillingCostBudgetRecord] = [] + + # reformat last_loaded_day if present + last_loaded_day = reformat_datetime( + last_loaded_day, '%Y-%m-%d %H:%M:%S+00:00', '%b %d' + ) + + total_monthly: dict[str, Counter[str]] = defaultdict(Counter) + total_daily: dict[str, Counter[str]] = defaultdict(Counter) + field_details: dict[str, list[Any]] = defaultdict(list) + total_monthly_category: Counter[str] = Counter() + total_daily_category: Counter[str] = Counter() + + for row in query_job_result: + if row.field not in field_details: + field_details[row.field] = [] + + cost_group = abbrev_cost_category(row.cost_category) + + field_details[row.field].append( + { + 'cost_group': cost_group, + 'cost_category': row.cost_category, + 'daily_cost': row.daily_cost if is_current_month else None, + 'monthly_cost': row.monthly_cost, + } + ) + + total_monthly_category[row.cost_category] += row.monthly_cost + if row.daily_cost: + total_daily_category[row.cost_category] += row.daily_cost + + # cost groups totals + total_monthly[cost_group]['ALL'] += row.monthly_cost + total_monthly[cost_group][row.field] += row.monthly_cost + if row.daily_cost and is_current_month: + total_daily[cost_group]['ALL'] += row.daily_cost + total_daily[cost_group][row.field] += row.daily_cost + + # add total row: compute + storage + results = await self._append_total_running_cost( + field, + is_current_month, + last_loaded_day, + total_monthly, + total_daily, + total_monthly_category, + total_daily_category, + results, + ) + + # add rest of the records: compute + storage + results = await self._append_running_cost_records( + field, + is_current_month, + last_loaded_day, + total_monthly, + total_daily, + field_details, + results, + ) + + return results diff --git a/db/python/tables/bq/billing_daily.py b/db/python/tables/bq/billing_daily.py new file mode 100644 index 000000000..14f21cef0 --- /dev/null +++ b/db/python/tables/bq/billing_daily.py @@ -0,0 +1,131 @@ +from google.cloud import bigquery + +from api.settings import BQ_AGGREG_VIEW +from db.python.tables.bq.billing_base import ( + BillingBaseTable, + time_optimisation_parameter, +) + + +class BillingDailyTable(BillingBaseTable): + """Billing Aggregated Daily Biq Query table""" + + table_name = BQ_AGGREG_VIEW + + def get_table_name(self): + """Get table name""" + return self.table_name + + async def get_topics(self): + """Get all topics in database""" + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day + _query = f""" + SELECT DISTINCT topic + FROM `{self.table_name}` + WHERE day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + ORDER BY topic ASC; + """ + + query_parameters = [ + time_optimisation_parameter(), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)['topic']) for row in query_job_result] + + # return empty list if no record found + return [] + + async def get_invoice_months(self): + """Get all invoice months in database + Aggregated views contain invoice_month field + """ + + _query = f""" + SELECT DISTINCT invoice_month + FROM `{self.table_name}` + ORDER BY invoice_month DESC; + """ + + query_job_result = self._execute_query(_query) + if query_job_result: + return [str(dict(row)['invoice_month']) for row in query_job_result] + + # return empty list if no record found + return [] + + async def get_cost_categories(self): + """Get all service description in database""" + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day + _query = f""" + SELECT DISTINCT cost_category + FROM `{BQ_AGGREG_VIEW}` + WHERE day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + ORDER BY cost_category ASC; + """ + + query_parameters = [ + time_optimisation_parameter(), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)['cost_category']) for row in query_job_result] + + # return empty list if no record found + return [] + + async def get_skus( + self, + limit: int | None = None, + offset: int | None = None, + ): + """Get all SKUs in database""" + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day + _query = f""" + SELECT DISTINCT sku + FROM `{self.table_name}` + WHERE day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + ORDER BY sku ASC + """ + + # append LIMIT and OFFSET if present + if limit: + _query += ' LIMIT @limit_val' + if offset: + _query += ' OFFSET @offset_val' + + query_parameters = [ + time_optimisation_parameter(), + bigquery.ScalarQueryParameter('limit_val', 'INT64', limit), + bigquery.ScalarQueryParameter('offset_val', 'INT64', offset), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)['sku']) for row in query_job_result] + + # return empty list if no record found + return [] diff --git a/db/python/tables/bq/billing_daily_extended.py b/db/python/tables/bq/billing_daily_extended.py new file mode 100644 index 000000000..009144911 --- /dev/null +++ b/db/python/tables/bq/billing_daily_extended.py @@ -0,0 +1,51 @@ +from api.settings import BQ_AGGREG_EXT_VIEW +from db.python.tables.bq.billing_base import ( + BillingBaseTable, + time_optimisation_parameter, +) +from models.models import BillingColumn + + +class BillingDailyExtendedTable(BillingBaseTable): + """Billing Aggregated Daily Extended Biq Query table""" + + table_name = BQ_AGGREG_EXT_VIEW + + def get_table_name(self): + """Get table name""" + return self.table_name + + async def get_extended_values(self, field: str): + """ + Get all extended values in database, for specified field. + Field is one of extended columns. + """ + + if field not in BillingColumn.extended_cols(): + raise ValueError('Invalid field value') + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day + _query = f""" + SELECT DISTINCT {field} + FROM `{self.table_name}` + WHERE {field} IS NOT NULL + AND day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + ORDER BY 1 ASC; + """ + + query_parameters = [ + time_optimisation_parameter(), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)[field]) for row in query_job_result] + + # return empty list if no record found + return [] diff --git a/db/python/tables/bq/billing_filter.py b/db/python/tables/bq/billing_filter.py new file mode 100644 index 000000000..9a379817f --- /dev/null +++ b/db/python/tables/bq/billing_filter.py @@ -0,0 +1,48 @@ +# pylint: disable=unused-import,too-many-instance-attributes + +import dataclasses +import datetime + +from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from db.python.tables.bq.generic_bq_filter_model import GenericBQFilterModel + + +@dataclasses.dataclass +class BillingFilter(GenericBQFilterModel): + """ + Filter for billing, contains all possible attributes to filter on + """ + + # partition specific filters: + + # most billing views are parttioned by day + day: GenericBQFilter[datetime.datetime] = None + + # gpc table has different partition field: part_time + part_time: GenericBQFilter[datetime.datetime] = None + + # aggregate has different partition field: usage_end_time + usage_end_time: GenericBQFilter[datetime.datetime] = None + + # common filters: + invoice_month: GenericBQFilter[str] = None + + # min cost e.g. 0.01, if not set, will show all + cost: GenericBQFilter[float] = None + + ar_guid: GenericBQFilter[str] = None + gcp_project: GenericBQFilter[str] = None + topic: GenericBQFilter[str] = None + batch_id: GenericBQFilter[str] = None + cost_category: GenericBQFilter[str] = None + sku: GenericBQFilter[str] = None + dataset: GenericBQFilter[str] = None + sequencing_type: GenericBQFilter[str] = None + stage: GenericBQFilter[str] = None + sequencing_group: GenericBQFilter[str] = None + compute_category: GenericBQFilter[str] = None + cromwell_sub_workflow_name: GenericBQFilter[str] = None + cromwell_workflow_id: GenericBQFilter[str] = None + goog_pipelines_worker: GenericBQFilter[str] = None + wdl_task_name: GenericBQFilter[str] = None + namespace: GenericBQFilter[str] = None diff --git a/db/python/tables/bq/billing_gcp_daily.py b/db/python/tables/bq/billing_gcp_daily.py new file mode 100644 index 000000000..037636993 --- /dev/null +++ b/db/python/tables/bq/billing_gcp_daily.py @@ -0,0 +1,135 @@ +from datetime import datetime, timedelta + +from google.cloud import bigquery + +from api.settings import BQ_GCP_BILLING_VIEW +from db.python.tables.bq.billing_base import ( + BillingBaseTable, + time_optimisation_parameter, +) +from db.python.tables.bq.billing_filter import BillingFilter +from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from models.models import BillingTotalCostQueryModel + + +class BillingGcpDailyTable(BillingBaseTable): + """Billing GCP Daily Big Query table""" + + table_name = BQ_GCP_BILLING_VIEW + + def get_table_name(self): + """Get table name""" + return self.table_name + + def _query_to_partitioned_filter( + self, query: BillingTotalCostQueryModel + ) -> BillingFilter: + """ + add extra filter to limit materialized view partition + Raw BQ billing table is partitioned by part_time (when data are loaded) + and not by end of usage time (day) + There is a delay up to 4-5 days between part_time and day + 7 days is added to be sure to get all data + """ + billing_filter = query.to_filter() + + # initial partition filter + billing_filter.part_time = GenericBQFilter[datetime]( + gte=datetime.strptime(query.start_date, '%Y-%m-%d') + if query.start_date + else None, + lte=(datetime.strptime(query.end_date, '%Y-%m-%d') + timedelta(days=7)) + if query.end_date + else None, + ) + return billing_filter + + async def _last_loaded_day(self): + """Get the most recent fully loaded day in db + Go 2 days back as the data is not always available for the current day + 1 day back is not enough + """ + + _query = f""" + SELECT TIMESTAMP_ADD(MAX(part_time), INTERVAL -2 DAY) as last_loaded_day + FROM `{self.table_name}` + WHERE part_time > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + """ + + query_parameters = [ + time_optimisation_parameter(), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return str(query_job_result[0].last_loaded_day) + + return None + + def _prepare_daily_cost_subquery(self, field, query_params, last_loaded_day): + """prepare daily cost subquery""" + + # add extra filter to limit materialized view partition + # Raw BQ billing table is partitioned by part_time (when data are loaded) + # and not by end of usage time (day) + # There is a delay up to 4-5 days between part_time and day + # 7 days is added to be sure to get all data + gcp_billing_optimise_filter = """ + AND part_time >= TIMESTAMP(@last_loaded_day) + AND part_time <= TIMESTAMP_ADD( + TIMESTAMP(@last_loaded_day), INTERVAL 7 DAY + ) + """ + + daily_cost_field = ', day.cost as daily_cost' + daily_cost_join = f"""LEFT JOIN ( + SELECT + {field.value} as field, + cost_category, + SUM(cost) as cost + FROM + `{self.get_table_name()}` + WHERE day = TIMESTAMP(@last_loaded_day) + {gcp_billing_optimise_filter} + GROUP BY + field, + cost_category + ) day + ON month.field = day.field + AND month.cost_category = day.cost_category + """ + + query_params.append( + bigquery.ScalarQueryParameter('last_loaded_day', 'STRING', last_loaded_day), + ) + return (query_params, daily_cost_field, daily_cost_join) + + async def get_gcp_projects(self): + """Get all GCP projects in database""" + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this part_time > filter is to limit the amount of data scanned, + # saving cost for running BQ + _query = f""" + SELECT DISTINCT gcp_project + FROM `{self.table_name}` + WHERE part_time > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + AND gcp_project IS NOT NULL + ORDER BY gcp_project ASC; + """ + + query_parameters = [ + time_optimisation_parameter(), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)['gcp_project']) for row in query_job_result] + + # return empty list if no record found + return [] diff --git a/db/python/tables/bq/billing_raw.py b/db/python/tables/bq/billing_raw.py new file mode 100644 index 000000000..a82fa4eec --- /dev/null +++ b/db/python/tables/bq/billing_raw.py @@ -0,0 +1,36 @@ +from datetime import datetime + +from api.settings import BQ_AGGREG_RAW +from db.python.tables.bq.billing_base import BillingBaseTable +from db.python.tables.bq.billing_filter import BillingFilter +from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from models.models import BillingTotalCostQueryModel + + +class BillingRawTable(BillingBaseTable): + """Billing Raw (Consolidated) Biq Query table""" + + table_name = BQ_AGGREG_RAW + + def get_table_name(self): + """Get table name""" + return self.table_name + + def _query_to_partitioned_filter( + self, query: BillingTotalCostQueryModel + ) -> BillingFilter: + """ + Raw BQ billing table is partitioned by usage_end_time + """ + billing_filter = query.to_filter() + + # initial partition filter + billing_filter.usage_end_time = GenericBQFilter[datetime]( + gte=datetime.strptime(query.start_date, '%Y-%m-%d') + if query.start_date + else None, + lte=datetime.strptime(query.end_date, '%Y-%m-%d') + if query.end_date + else None, + ) + return billing_filter diff --git a/db/python/tables/bq/function_bq_filter.py b/db/python/tables/bq/function_bq_filter.py new file mode 100644 index 000000000..f18f60211 --- /dev/null +++ b/db/python/tables/bq/function_bq_filter.py @@ -0,0 +1,109 @@ +from datetime import datetime +from enum import Enum +from typing import Any + +from google.cloud import bigquery + +from models.models import BillingColumn + + +class FunctionBQFilter: + """ + Function BigQuery filter where left site is a function call + In such case we need to parameterised values on both side of SQL + E.g. + + SELECT ... + FROM ... + WHERE getLabelValue(labels, 'batch_id') = '1234' + + In this case we have 2 string values which need to be parameterised + """ + + func_where = '' + func_sql_parameters: list[ + bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter + ] = [] + + def __init__(self, name: str, implementation: str): + self.func_name = name + self.fun_implementation = implementation + # param_id is a counter for parameterised values + self._param_id = 0 + + def to_sql( + self, + column_name: BillingColumn, + func_params: str | list[Any] | dict[Any, Any], + func_operator: str = None, + ) -> tuple[str, list[bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter]]: + """ + creates the left side of where : FUN(column_name, @params) + each of func_params convert to BQ parameter + combined multiple calls with provided operator, + if func_operator is None then AND is assumed by default + """ + values = [] + conditionals = [] + + if not isinstance(func_params, dict): + # Ignore func_params which are not dictionary for the time being + return '', [] + + for param_key, param_value in func_params.items(): + # parameterised both param_key and param_value + # e.g. this is raw SQL example: + # getLabelValue(labels, {param_key}) = {param_value} + self._param_id += 1 + key = f'param{self._param_id}' + val = f'value{self._param_id}' + # add param_key as parameterised BQ value + values.append(FunctionBQFilter._sql_value_prep(key, param_key)) + + # add param_value as parameterised BQ value + values.append(FunctionBQFilter._sql_value_prep(val, param_value)) + + # format as FUN(column_name, @param) = @value + conditionals.append( + ( + f'{self.func_name}({column_name.value},@{key}) = ' + f'{FunctionBQFilter._sql_cond_prep(val, param_value)}' + ) + ) + + if func_operator and func_operator == 'OR': + condition = ' OR '.join(conditionals) + else: + condition = ' AND '.join(conditionals) + + # set the class variables for later use + self.func_where = f'({condition})' + self.func_sql_parameters = values + return self.func_where, self.func_sql_parameters + + @staticmethod + def _sql_cond_prep(key: str, value: Any) -> str: + """ + By default '{key}' is used, + but for datetime it has to be wrapped in TIMESTAMP({key}) + """ + if isinstance(value, datetime): + return f'TIMESTAMP(@{key})' + + # otherwise as default + return f'@{key}' + + @staticmethod + def _sql_value_prep(key: str, value: Any) -> bigquery.ScalarQueryParameter: + """ """ + if isinstance(value, Enum): + return FunctionBQFilter._sql_value_prep(key, value.value) + if isinstance(value, int): + return bigquery.ScalarQueryParameter(key, 'INT64', value) + if isinstance(value, float): + return bigquery.ScalarQueryParameter(key, 'FLOAT64', value) + if isinstance(value, datetime): + return bigquery.ScalarQueryParameter(key, 'STRING', value) + + # otherwise as string parameter + return bigquery.ScalarQueryParameter(key, 'STRING', value) diff --git a/db/python/tables/bq/generic_bq_filter.py b/db/python/tables/bq/generic_bq_filter.py new file mode 100644 index 000000000..b0bfba973 --- /dev/null +++ b/db/python/tables/bq/generic_bq_filter.py @@ -0,0 +1,101 @@ +from datetime import datetime +from enum import Enum +from typing import Any + +from google.cloud import bigquery + +from db.python.utils import GenericFilter, T + + +class GenericBQFilter(GenericFilter[T]): + """ + Generic BigQuery filter is BQ specific filter class, based on GenericFilter + """ + + def to_sql( + self, column: str, column_name: str = None + ) -> tuple[str, dict[str, T | list[T] | Any | list[Any]]]: + """ + Convert to SQL, and avoid SQL injection + + """ + conditionals = [] + values: dict[str, T | list[T] | Any | list[Any]] = {} + _column_name = column_name or column + + if not isinstance(column, str): + raise ValueError(f'Column {_column_name!r} must be a string') + if self.eq is not None: + k = self.generate_field_name(_column_name + '_eq') + conditionals.append(f'{column} = {self._sql_cond_prep(k, self.eq)}') + values[k] = self._sql_value_prep(k, self.eq) + if self.in_ is not None: + if not isinstance(self.in_, list): + raise ValueError('IN filter must be a list') + if len(self.in_) == 1: + k = self.generate_field_name(_column_name + '_in_eq') + conditionals.append(f'{column} = {self._sql_cond_prep(k, self.in_[0])}') + values[k] = self._sql_value_prep(k, self.in_[0]) + else: + k = self.generate_field_name(_column_name + '_in') + conditionals.append(f'{column} IN ({self._sql_cond_prep(k, self.in_)})') + values[k] = self._sql_value_prep(k, self.in_) + if self.nin is not None: + if not isinstance(self.nin, list): + raise ValueError('NIN filter must be a list') + k = self.generate_field_name(column + '_nin') + conditionals.append(f'{column} NOT IN ({self._sql_cond_prep(k, self.nin)})') + values[k] = self._sql_value_prep(k, self.nin) + if self.gt is not None: + k = self.generate_field_name(column + '_gt') + conditionals.append(f'{column} > {self._sql_cond_prep(k, self.gt)}') + values[k] = self._sql_value_prep(k, self.gt) + if self.gte is not None: + k = self.generate_field_name(column + '_gte') + conditionals.append(f'{column} >= {self._sql_cond_prep(k, self.gte)}') + values[k] = self._sql_value_prep(k, self.gte) + if self.lt is not None: + k = self.generate_field_name(column + '_lt') + conditionals.append(f'{column} < {self._sql_cond_prep(k, self.lt)}') + values[k] = self._sql_value_prep(k, self.lt) + if self.lte is not None: + k = self.generate_field_name(column + '_lte') + conditionals.append(f'{column} <= {self._sql_cond_prep(k, self.lte)}') + values[k] = self._sql_value_prep(k, self.lte) + + return ' AND '.join(conditionals), values + + @staticmethod + def _sql_cond_prep(key, value) -> str: + """ + By default '@{key}' is used, + but for datetime it has to be wrapped in TIMESTAMP(@{k}) + """ + if isinstance(value, datetime): + return f'TIMESTAMP(@{key})' + + # otherwise as default + return f'@{key}' + + @staticmethod + def _sql_value_prep(key, value): + """ + Overrides the default _sql_value_prep to handle BQ parameters + """ + if isinstance(value, list): + return bigquery.ArrayQueryParameter( + key, 'STRING', ','.join([str(v) for v in value]) + ) + if isinstance(value, Enum): + return GenericBQFilter._sql_value_prep(key, value.value) + if isinstance(value, int): + return bigquery.ScalarQueryParameter(key, 'INT64', value) + if isinstance(value, float): + return bigquery.ScalarQueryParameter(key, 'FLOAT64', value) + if isinstance(value, datetime): + return bigquery.ScalarQueryParameter( + key, 'STRING', value.strftime('%Y-%m-%d %H:%M:%S') + ) + + # otherwise as string parameter + return bigquery.ScalarQueryParameter(key, 'STRING', value) diff --git a/db/python/tables/bq/generic_bq_filter_model.py b/db/python/tables/bq/generic_bq_filter_model.py new file mode 100644 index 000000000..c2736cc3a --- /dev/null +++ b/db/python/tables/bq/generic_bq_filter_model.py @@ -0,0 +1,111 @@ +import dataclasses +from typing import Any + +from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from db.python.utils import GenericFilterModel + + +def prepare_bq_query_from_dict_field( + filter_, field_name, column_name +) -> tuple[list[str], dict[str, Any]]: + """ + Prepare a SQL query from a dict field, which is a dict of GenericFilters. + Usually this is a JSON field in the database that we want to query on. + """ + conditionals: list[str] = [] + values: dict[str, Any] = {} + for key, value in filter_.items(): + if not isinstance(value, GenericBQFilter): + raise ValueError(f'Filter {field_name} must be a GenericFilter') + if '"' in key: + raise ValueError('Meta key contains " character, which is not allowed') + if "'" in key: + raise ValueError("Meta key contains ' character, which is not allowed") + fconditionals, fvalues = value.to_sql( + f"JSON_EXTRACT({column_name}, '$.{key}')", + column_name=f'{column_name}_{key}', + ) + conditionals.append(fconditionals) + values.update(fvalues) + + return conditionals, values + + +@dataclasses.dataclass(kw_only=True) +class GenericBQFilterModel(GenericFilterModel): + """ + Class that contains fields of GenericBQFilters that can be used to filter + """ + + def __post_init__(self): + for field in dataclasses.fields(self): + value = getattr(self, field.name) + if value is None: + continue + + if isinstance(value, tuple) and len(value) == 1 and value[0] is None: + raise ValueError( + 'There is very likely a trailing comma on the end of ' + f'{self.__class__.__name__}.{field.name}. If you actually want a ' + 'tuple of length one with the value = (None,), then use ' + 'dataclasses.field(default_factory=lambda: (None,))' + ) + if isinstance(value, GenericBQFilter): + continue + + if isinstance(value, dict): + # make sure each field is a GenericFilter, or set it to be one, + # in this case it's always 'eq', never automatically in_ + new_value = { + k: v if isinstance(v, GenericBQFilter) else GenericBQFilter(eq=v) + for k, v in value.items() + } + setattr(self, field.name, new_value) + continue + + # lazily provided a value, which we'll correct + if isinstance(value, list): + setattr(self, field.name, GenericBQFilter(in_=value)) + else: + setattr(self, field.name, GenericBQFilter(eq=value)) + + def to_sql( + self, field_overrides: dict[str, Any] = None + ) -> tuple[str, dict[str, Any]]: + """Convert the model to SQL, and avoid SQL injection""" + _foverrides = field_overrides or {} + + # check for bad field_overrides + bad_field_overrides = set(_foverrides.keys()) - set( + f.name for f in dataclasses.fields(self) + ) + if bad_field_overrides: + raise ValueError( + f'Specified field overrides that were not used: {bad_field_overrides}' + ) + + fields = dataclasses.fields(self) + conditionals, values = [], {} + for field in fields: + fcolumn = _foverrides.get(field.name, field.name) + if filter_ := getattr(self, field.name): + if isinstance(filter_, dict): + meta_conditionals, meta_values = prepare_bq_query_from_dict_field( + filter_=filter_, field_name=field.name, column_name=fcolumn + ) + conditionals.extend(meta_conditionals) + values.update(meta_values) + elif isinstance(filter_, GenericBQFilter): + fconditionals, fvalues = filter_.to_sql(fcolumn) + conditionals.append(fconditionals) + values.update(fvalues) + else: + raise ValueError( + f'Filter {field.name} must be a GenericBQFilter or ' + 'dict[str, GenericBQFilter]' + ) + + if not conditionals: + return 'True', {} + + return ' AND '.join(filter(None, conditionals)), values diff --git a/models/enums/billing.py b/models/enums/billing.py new file mode 100644 index 000000000..efcff271c --- /dev/null +++ b/models/enums/billing.py @@ -0,0 +1,31 @@ +from enum import Enum + + +class BillingSource(str, Enum): + """List of billing sources""" + + RAW = 'raw' + AGGREGATE = 'aggregate' + EXTENDED = 'extended' + BUDGET = 'budget' + GCP_BILLING = 'gcp_billing' + BATCHES = 'batches' + + +class BillingTimePeriods(str, Enum): + """List of billing grouping time periods""" + + # grouping time periods + DAY = 'day' + WEEK = 'week' + MONTH = 'month' + INVOICE_MONTH = 'invoice_month' + + +class BillingTimeColumn(str, Enum): + """List of billing time columns""" + + DAY = 'day' + USAGE_START_TIME = 'usage_start_time' + USAGE_END_TIME = 'usage_end_time' + EXPORT_TIME = 'export_time' diff --git a/models/models/__init__.py b/models/models/__init__.py index 4e52b2bd2..d3b836e9a 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -15,7 +15,11 @@ BillingColumn, BillingCostBudgetRecord, BillingCostDetailsRecord, - BillingRowRecord, + BillingHailBatchCostRecord, + BillingInternal, + BillingSource, + BillingTimeColumn, + BillingTimePeriods, BillingTotalCostQueryModel, BillingTotalCostRecord, ) diff --git a/models/models/billing.py b/models/models/billing.py index 481ea77ce..9587ed44c 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -1,135 +1,75 @@ import datetime from enum import Enum -from db.python.tables.billing import BillingFilter -from db.python.utils import GenericFilter - +from db.python.tables.bq.billing_filter import BillingFilter +from db.python.tables.bq.generic_bq_filter import GenericBQFilter from models.base import SMBase +from models.enums.billing import BillingSource, BillingTimeColumn, BillingTimePeriods -class BillingQueryModel(SMBase): - """Used to query for billing""" - - # topic is cluster index, provide some values to make it more efficient - topic: list[str] | None = None - - # make date required, to avoid full table scan - date: str +class BillingInternal(SMBase): + """Model for Analysis""" - cost_category: list[str] | None = None - - def to_filter(self) -> BillingFilter: - """Convert to internal analysis filter""" - return BillingFilter( - topic=GenericFilter(in_=self.topic) if self.topic else None, - date=GenericFilter(eq=self.date), - cost_category=GenericFilter(in_=self.cost_category) - if self.cost_category - else None, - ) - - def __hash__(self): - """Create hash for this object to use in caching""" - return hash(self.json()) - - -class BillingRowRecord(SMBase): - """Return class for the Billing record""" - - id: str + id: str | None + ar_guid: str | None + gcp_project: str | None topic: str | None - service_id: str | None - service_description: str | None - - sku_id: str | None - sku_description: str | None - - usage_start_time: datetime.datetime | None - usage_end_time: datetime.datetime | None - - gcp_project_id: str | None - gcp_project_number: str | None - gcp_project_name: str | None - - # labels - dataset: str | None batch_id: str | None - job_id: str | None - batch_name: str | None - sequencing_type: str | None - stage: str | None - sequencing_group: str | None - - export_time: datetime.datetime | None - cost: str | None - currency: str | None - currency_conversion_rate: str | None - invoice_month: str | None - cost_type: str | None - - class Config: - """Config for BillingRowRecord Response""" - - orm_mode = True + cost_category: str | None + cost: float | None + day: datetime.date | None @staticmethod - def from_json(record): - """Create BillingRowRecord from json""" - - record['service'] = record['service'] if record['service'] else {} - record['project'] = record['project'] if record['project'] else {} - record['invoice'] = record['invoice'] if record['invoice'] else {} - record['sku'] = record['sku'] if record['sku'] else {} - - labels = {} - - if record['labels']: - for lbl in record['labels']: - labels[lbl['key']] = lbl['value'] - - record['labels'] = labels - - return BillingRowRecord( - id=record['id'], - topic=record['topic'], - service_id=record['service'].get('id'), - service_description=record['service'].get('description'), - sku_id=record['sku'].get('id'), - sku_description=record['sku'].get('description'), - usage_start_time=record['usage_start_time'], - usage_end_time=record['usage_end_time'], - gcp_project_id=record['project'].get('id'), - gcp_project_number=record['project'].get('number'), - gcp_project_name=record['project'].get('name'), - # labels - dataset=record['labels'].get('dataset'), - batch_id=record['labels'].get('batch_id'), - job_id=record['labels'].get('job_id'), - batch_name=record['labels'].get('batch_name'), - sequencing_type=record['labels'].get('sequencing_type'), - stage=record['labels'].get('stage'), - sequencing_group=record['labels'].get('sequencing_group'), - export_time=record['export_time'], - cost=record['cost'], - currency=record['currency'], - currency_conversion_rate=record['currency_conversion_rate'], - invoice_month=record['invoice'].get('month', ''), - cost_type=record['cost_type'], + def from_db(**kwargs): + """ + Convert from db keys, mainly converting id to id_ + """ + return BillingInternal( + id=kwargs.get('id'), + ar_guid=kwargs.get('ar_guid', kwargs.get('ar-guid')), + gcp_project=kwargs.get('gcp_project'), + topic=kwargs.get('topic'), + batch_id=kwargs.get('batch_id'), + cost_category=kwargs.get('cost_category'), + cost=kwargs.get('cost'), + day=kwargs.get('day'), ) class BillingColumn(str, Enum): """List of billing columns""" - # base view columns + # raw view columns + ID = 'id' TOPIC = 'topic' - PROJECT = 'gcp_project' + SERVICE = 'service' + SKU = 'sku' + USAGE_START_TIME = 'usage_start_time' + USAGE_END_TIME = 'usage_end_time' + PROJECT = 'project' + LABELS = 'labels' + SYSTEM_LABELS = 'system_labels' + LOCATION = 'location' + EXPORT_TIME = 'export_time' + COST = 'cost' + CURRENCY = 'currency' + CURRENCY_CONVERSION_RATE = 'currency_conversion_rate' + USAGE = 'usage' + CREDITS = 'credits' + INVOICE = 'invoice' + COST_TYPE = 'cost_type' + ADJUSTMENT_INFO = 'adjustment_info' + + # base view columns + # TOPIC = 'topic' + # SKU = 'sku' + # CURRENCY = 'currency' + # COST = 'cost' + # LABELS = 'labels' + GCP_PROJECT = 'gcp_project' DAY = 'day' COST_CATEGORY = 'cost_category' - SKU = 'sku' AR_GUID = 'ar_guid' - CURRENCY = 'currency' - COST = 'cost' INVOICE_MONTH = 'invoice_month' # extended, filtered view columns @@ -138,23 +78,122 @@ class BillingColumn(str, Enum): SEQUENCING_TYPE = 'sequencing_type' STAGE = 'stage' SEQUENCING_GROUP = 'sequencing_group' + COMPUTE_CATEGORY = 'compute_category' + CROMWELL_SUB_WORKFLOW_NAME = 'cromwell_sub_workflow_name' + CROMWELL_WORKFLOW_ID = 'cromwell_workflow_id' + GOOG_PIPELINES_WORKER = 'goog_pipelines_worker' + WDL_TASK_NAME = 'wdl_task_name' + NAMESPACE = 'namespace' + + @classmethod + def can_group_by(cls, value: 'BillingColumn') -> bool: + """ + Return True if column can be grouped by + TODO: If any new columns are added above and cannot be in a group by, add them here + This could be record, array or struct type + """ + return value not in ( + BillingColumn.COST, + BillingColumn.SERVICE, + # BillingColumn.SKU, + BillingColumn.PROJECT, + BillingColumn.LABELS, + BillingColumn.SYSTEM_LABELS, + BillingColumn.LOCATION, + BillingColumn.USAGE, + BillingColumn.CREDITS, + BillingColumn.INVOICE, + BillingColumn.ADJUSTMENT_INFO, + ) + + @classmethod + def is_extended_column(cls, value: 'BillingColumn') -> bool: + """Return True if column is extended""" + return value in ( + BillingColumn.DATASET, + BillingColumn.BATCH_ID, + BillingColumn.SEQUENCING_TYPE, + BillingColumn.STAGE, + BillingColumn.SEQUENCING_GROUP, + BillingColumn.COMPUTE_CATEGORY, + BillingColumn.CROMWELL_SUB_WORKFLOW_NAME, + BillingColumn.CROMWELL_WORKFLOW_ID, + BillingColumn.GOOG_PIPELINES_WORKER, + BillingColumn.WDL_TASK_NAME, + BillingColumn.NAMESPACE, + ) + + @classmethod + def str_to_enum(cls, value: str) -> 'BillingColumn': + """Convert string to enum""" + # all column names have underscore in SQL, but dash in UI / stored data + adjusted_value = value.replace('-', '_') + str_to_enum = {v.value: v for k, v in BillingColumn.__members__.items()} + return str_to_enum[adjusted_value] + + @classmethod + def raw_cols(cls) -> list[str]: + """Return list of raw column names""" + return [ + BillingColumn.ID.value, + BillingColumn.TOPIC.value, + BillingColumn.SERVICE.value, + BillingColumn.SKU.value, + BillingColumn.USAGE_START_TIME.value, + BillingColumn.USAGE_END_TIME.value, + BillingColumn.PROJECT.value, + BillingColumn.LABELS.value, + BillingColumn.SYSTEM_LABELS.value, + BillingColumn.LOCATION.value, + BillingColumn.EXPORT_TIME.value, + BillingColumn.COST.value, + BillingColumn.CURRENCY.value, + BillingColumn.CURRENCY_CONVERSION_RATE.value, + BillingColumn.USAGE.value, + BillingColumn.CREDITS.value, + BillingColumn.INVOICE.value, + BillingColumn.COST_TYPE.value, + BillingColumn.ADJUSTMENT_INFO.value, + ] + + @classmethod + def standard_cols(cls) -> list[str]: + """Return list of standard column names""" + return [ + BillingColumn.TOPIC.value, + BillingColumn.GCP_PROJECT.value, + BillingColumn.SKU.value, + BillingColumn.CURRENCY.value, + BillingColumn.COST.value, + BillingColumn.LABELS.value, + BillingColumn.DAY.value, + BillingColumn.COST_CATEGORY.value, + BillingColumn.AR_GUID.value, + BillingColumn.INVOICE_MONTH.value, + ] @classmethod def extended_cols(cls) -> list[str]: """Return list of extended column names""" return [ - 'dataset', - 'batch_id', - 'sequencing_type', - 'stage', - 'sequencing_group', - 'ar_guid' + BillingColumn.DATASET.value, + BillingColumn.BATCH_ID.value, + BillingColumn.SEQUENCING_TYPE.value, + BillingColumn.STAGE.value, + BillingColumn.SEQUENCING_GROUP.value, + BillingColumn.AR_GUID.value, + BillingColumn.COMPUTE_CATEGORY.value, + BillingColumn.CROMWELL_SUB_WORKFLOW_NAME.value, + BillingColumn.CROMWELL_WORKFLOW_ID.value, + BillingColumn.GOOG_PIPELINES_WORKER.value, + BillingColumn.WDL_TASK_NAME.value, + BillingColumn.NAMESPACE.value, ] @staticmethod def generate_all_title(record) -> str: """Generate Column as All Title""" - if record == BillingColumn.PROJECT: + if record == BillingColumn.GCP_PROJECT: return 'All GCP Projects' return f'All {record.title()}s' @@ -170,20 +209,44 @@ class BillingTotalCostQueryModel(SMBase): fields: list[BillingColumn] start_date: str end_date: str - # optional, can be aggregate or gcp_billing - source: str | None = None + # optional, can be raw, aggregate or gcp_billing + source: BillingSource | None = None # optional - filters: dict[BillingColumn, str] | None = None + filters: dict[BillingColumn, str | list | dict] | None = None + # optional, AND or OR + filters_op: str | None = None + group_by: bool = True + # order by, reverse= TRUE for DESC, FALSE for ASC order_by: dict[BillingColumn, bool] | None = None limit: int | None = None offset: int | None = None + # default to day, can be day, week, month, invoice_month + time_column: BillingTimeColumn | None = None + time_periods: BillingTimePeriods | None = None + + # optional, show the min cost, e.g. 0.01, if not set, will show all + min_cost: float | None = None + def __hash__(self): """Create hash for this object to use in caching""" return hash(self.json()) + def to_filter(self) -> BillingFilter: + """ + Convert to internal analysis filter + """ + billing_filter = BillingFilter() + if self.filters: + # add filters as attributes + for fk, fv in self.filters.items(): + # fk is BillColumn, fv is value + setattr(billing_filter, fk.value, GenericBQFilter(eq=fv)) + + return billing_filter + class BillingTotalCostRecord(SMBase): """Return class for the Billing Total Cost record""" @@ -192,14 +255,22 @@ class BillingTotalCostRecord(SMBase): topic: str | None gcp_project: str | None cost_category: str | None - sku: str | None + sku: str | dict | None + invoice_month: str | None ar_guid: str | None + # extended columns dataset: str | None batch_id: str | None sequencing_type: str | None stage: str | None sequencing_group: str | None + compute_category: str | None + cromwell_sub_workflow_name: str | None + cromwell_workflow_id: str | None + goog_pipelines_worker: str | None + wdl_task_name: str | None + namespace: str | None cost: float currency: str | None @@ -213,12 +284,19 @@ def from_json(record): gcp_project=record.get('gcp_project'), cost_category=record.get('cost_category'), sku=record.get('sku'), + invoice_month=record.get('invoice_month'), ar_guid=record.get('ar_guid'), dataset=record.get('dataset'), batch_id=record.get('batch_id'), sequencing_type=record.get('sequencing_type'), stage=record.get('stage'), sequencing_group=record.get('sequencing_group'), + compute_category=record.get('compute_category'), + cromwell_sub_workflow_name=record.get('cromwell_sub_workflow_name'), + cromwell_workflow_id=record.get('cromwell_workflow_id'), + goog_pipelines_worker=record.get('goog_pipelines_worker'), + wdl_task_name=record.get('wdl_task_name'), + namespace=record.get('namespace'), cost=record.get('cost'), currency=record.get('currency'), ) @@ -256,6 +334,7 @@ class BillingCostBudgetRecord(SMBase): storage_daily: float | None details: list[BillingCostDetailsRecord] | None budget_spent: float | None + budget: float | None last_loaded_day: str | None @@ -274,5 +353,14 @@ def from_json(record): BillingCostDetailsRecord.from_json(row) for row in record.get('details') ], budget_spent=record.get('budget_spent'), + budget=record.get('budget'), last_loaded_day=record.get('last_loaded_day'), ) + + +class BillingHailBatchCostRecord(SMBase): + """Return class for the Billing Cost by batch_id/ar_guid""" + + ar_guid: str | None + batch_ids: list[str] | None + costs: list[dict] | None diff --git a/web/src/Routes.tsx b/web/src/Routes.tsx index 5abaa6d8d..a307355e6 100644 --- a/web/src/Routes.tsx +++ b/web/src/Routes.tsx @@ -6,7 +6,9 @@ import { BillingHome, BillingSeqrProp, BillingCostByTime, + BillingCostByAnalysis, BillingInvoiceMonthCost, + BillingCostByCategory, } from './pages/billing' import DocumentationArticle from './pages/docs/Documentation' import SampleView from './pages/sample/SampleView' @@ -37,7 +39,6 @@ const Routes: React.FunctionComponent = () => ( } /> - ( } /> + + + + } + /> + + + + } + /> input { + color: var(--color-text-primary); + background: var(--color-bg-card); +} + +.ui.toggle.checkbox label { + color: var(--color-text-primary); +} + +.ui.toggle.checkbox input:checked ~ .box, +.ui.toggle.checkbox input:checked ~ label { + color: var(--color-text-primary) !important; +} + +/* charts */ +.tooltip { + background-color: var(--color-bg-card); + color: var(--color-text-primary); +} + +.chart-card { + background-color: var(--color-bg-card); + color: var(--color-text-primary) !important; +} + +.chart-label { + color: var(--color-text-primary) !important; +} + +.hb-chart-grid { + color: var(--color-divider) !important; +} + +.billing-over-budget { + background-color: var(--color-bg-over-budget) !important; +} + +.billing-half-budget { + background-color: var(--color-bg-half-budget) !important; +} + +.billing-under-budget { + background-color: var(--color-bg-under-budget) !important; +} + +.billing-href a { + color: var(--color-text-href) !important; +} + +.bold-text { + font-weight: bold; +} diff --git a/web/src/pages/admin/ProjectsAdmin.tsx b/web/src/pages/admin/ProjectsAdmin.tsx index d1f3ae543..5f1d7702a 100644 --- a/web/src/pages/admin/ProjectsAdmin.tsx +++ b/web/src/pages/admin/ProjectsAdmin.tsx @@ -46,7 +46,7 @@ const ProjectsAdmin = () => { {error}
-
diff --git a/web/src/pages/billing/Billing.css b/web/src/pages/billing/Billing.css deleted file mode 100644 index 68d62cfe0..000000000 --- a/web/src/pages/billing/Billing.css +++ /dev/null @@ -1,15 +0,0 @@ -#group-by-dropdown .menu { - background: #ffffff !important; -} - -.field-selector-label { - width: 200px !important; -} - -.field-selector-dropdown { - width: 80% !important; -} - -.donut-chart { - margin-top: 20px; -} diff --git a/web/src/pages/billing/BillingCostByAnalysis.tsx b/web/src/pages/billing/BillingCostByAnalysis.tsx new file mode 100644 index 000000000..87e1ef153 --- /dev/null +++ b/web/src/pages/billing/BillingCostByAnalysis.tsx @@ -0,0 +1,253 @@ +import * as React from 'react' +import { useLocation, useNavigate, useSearchParams } from 'react-router-dom' +import { Button, Card, Grid, Input, Message, Select, Dropdown } from 'semantic-ui-react' +import SearchIcon from '@mui/icons-material/Search' + +import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' +import { BillingApi, BillingTotalCostRecord } from '../../sm-api' +import HailBatchGrid from './components/HailBatchGrid' +import { getMonthStartDate } from '../../shared/utilities/monthStartEndDate' +import generateUrl from '../../shared/utilities/generateUrl' + +enum SearchType { + Ar_guid, + Batch_id, +} + +const BillingCostByAnalysis: React.FunctionComponent = () => { + const [searchParams] = useSearchParams() + + // Data loading + const [isLoading, setIsLoading] = React.useState(true) + const [error, setError] = React.useState() + + const [start, setStart] = React.useState( + searchParams.get('start') ?? getMonthStartDate() + ) + + const [data, setData] = React.useState(undefined) + + const [searchTxt, setSearchTxt] = React.useState(searchParams.get('searchTxt') ?? '') + + const searchOptions: string[] = Object.keys(SearchType).filter((item) => isNaN(Number(item))) + const dropdownOptions = searchOptions.map((item) => ({ + text: item.replaceAll('_', ' '), + value: item, + })) + + const [searchByType, setSearchByType] = React.useState( + SearchType[searchParams.get('searchType')] ?? SearchType[0] + ) + + // use navigate and update url params + const location = useLocation() + const navigate = useNavigate() + + const updateNav = (sType: SearchType, sTxt: string | undefined) => { + const url = generateUrl(location, { + searchType: SearchType[sType], + searchTxt: sTxt, + }) + navigate(url) + } + + const getData = (sType: SearchType | undefined | string, sTxt: string) => { + if ((sType === undefined || sTxt === undefined) && sTxt.length < 6) { + // Seaarch text is not large enough + setIsLoading(false) + return + } + setIsLoading(true) + setError(undefined) + + // convert sType to enum + const convertedType: SearchType = SearchType[sType as keyof typeof SearchType] + + if (convertedType === SearchType.Ar_guid) { + new BillingApi() + .costByArGuid(sTxt) + .then((response) => { + setIsLoading(false) + setData(response.data) + }) + .catch((er) => setError(er.message)) + } else if (convertedType === SearchType.Batch_id) { + new BillingApi() + .costByBatchId(sTxt) + .then((response) => { + setIsLoading(false) + setData(response.data) + }) + .catch((er) => setError(er.message)) + } else { + setIsLoading(false) + } + } + + const handleSearch = () => { + if (searchByType === undefined || searchTxt === undefined || searchTxt.length < 6) { + // Seaarch text is not large enough + setIsLoading(false) + return + } + getData(searchByType, searchTxt) + } + + const handleSearchChange = (event: any, dt: any) => { + setSearchTxt(dt.value) + } + + const handleSearchTypeChange = (event: any, dt: any) => { + setSearchByType(dt.value) + } + + const handleSearchClick = () => { + updateNav(searchByType, searchTxt) + handleSearch() + } + + const getDefaultSearchType = () => { + if (searchByType !== undefined) { + return searchByType + } + return dropdownOptions[0].value + } + + React.useEffect(() => { + handleSearch() + }, []) + + const errorComponent = () => { + if (error) { + return ( + setError(undefined)}> + {error} +
+ +
+ ) + } + + // if no error return null + return null + } + + const loadingComponent = () => { + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + // otherwise return null + return null + } + + const searchCard = () => ( + +

+ Billing Cost By Analysis +

+ + + + {/* There is a Dropdown inside the search Input control to select searchType */} + + } + labelPosition="right" + placeholder="Search..." + onChange={handleSearchChange} + value={searchTxt} + /> + + + + + ) + } + + return ( + <> + +

+ Billing Cost By Category +

+ + + + + + + + + + + + + + + + + + + + + + changeDate('start', e.target.value)} + value={start} + /> + + + + + + + setAccumulate(!accumulate)} + /> + + + + + + + + +
+ + ) +} + +export default BillingCostByCategory diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx index 629b3ee08..ba191d335 100644 --- a/web/src/pages/billing/BillingCostByTime.tsx +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -6,19 +6,21 @@ import FieldSelector from './components/FieldSelector' import { BillingApi, BillingColumn, + BillingSource, BillingTotalCostQueryModel, BillingTotalCostRecord, } from '../../sm-api' import { convertFieldName } from '../../shared/utilities/fieldName' +import { getMonthStartDate, getMonthEndDate } from '../../shared/utilities/monthStartEndDate' import { IStackedAreaByDateChartData } from '../../shared/components/Graphs/StackedAreaByDateChart' import BillingCostByTimeTable from './components/BillingCostByTimeTable' import { BarChart, IData } from '../../shared/components/Graphs/BarChart' import { DonutChart } from '../../shared/components/Graphs/DonutChart' +import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' +import generateUrl from '../../shared/utilities/generateUrl' const BillingCostByTime: React.FunctionComponent = () => { - const now = new Date() - const [searchParams] = useSearchParams() const inputGroupBy: string | undefined = searchParams.get('groupBy') ?? undefined @@ -28,11 +30,9 @@ const BillingCostByTime: React.FunctionComponent = () => { const inputSelectedData: string | undefined = searchParams.get('selectedData') ?? undefined const [start, setStart] = React.useState( - searchParams.get('start') ?? `${now.getFullYear()}-${now.getMonth() + 1}-01` - ) - const [end, setEnd] = React.useState( - searchParams.get('end') ?? `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` + searchParams.get('start') ?? getMonthStartDate() ) + const [end, setEnd] = React.useState(searchParams.get('end') ?? getMonthEndDate()) const [groupBy, setGroupBy] = React.useState( fixedGroupBy ?? BillingColumn.GcpProject ) @@ -44,6 +44,7 @@ const BillingCostByTime: React.FunctionComponent = () => { // Data loading const [isLoading, setIsLoading] = React.useState(true) const [error, setError] = React.useState() + const [message, setMessage] = React.useState() const [groups, setGroups] = React.useState([]) const [data, setData] = React.useState([]) const [aggregatedData, setAggregatedData] = React.useState([]) @@ -54,20 +55,16 @@ const BillingCostByTime: React.FunctionComponent = () => { const updateNav = ( grp: string | undefined, - data: string | undefined, - start: string, - end: string + selData: string | undefined, + st: string, + ed: string ) => { - let url = `${location.pathname}` - if (grp || data) url += '?' - - let params: string[] = [] - if (grp) params.push(`groupBy=${grp}`) - if (data) params.push(`selectedData=${data}`) - if (start) params.push(`start=${start}`) - if (end) params.push(`end=${end}`) - - url += params.join('&') + const url = generateUrl(location, { + groupBy: grp, + selectedData: selData, + start: st, + end: ed, + }) navigate(url) } @@ -95,50 +92,39 @@ const BillingCostByTime: React.FunctionComponent = () => { const getData = (query: BillingTotalCostQueryModel) => { setIsLoading(true) setError(undefined) + setMessage(undefined) new BillingApi() .getTotalCost(query) .then((response) => { setIsLoading(false) // calc totals per cost_category - const recTotals = response.data.reduce( - ( - acc: { [key: string]: { [key: string]: number } }, - item: BillingTotalCostRecord - ) => { - const { cost_category, cost } = item - if (!acc[cost_category]) { - acc[cost_category] = 0 - } - acc[cost_category] += cost - return acc - }, - {} - ) + const recTotals: { [key: string]: number } = {} + response.data.forEach((item: BillingTotalCostRecord) => { + const { cost_category, cost } = item + if (!recTotals[cost_category]) { + recTotals[cost_category] = 0 + } + recTotals[cost_category] += cost + }) const sortedRecTotals: { [key: string]: number } = Object.fromEntries( Object.entries(recTotals).sort(([, a], [, b]) => b - a) ) const rec_grps = Object.keys(sortedRecTotals) - const records = response.data.reduce( - ( - acc: { [key: string]: { [key: string]: number } }, - item: BillingTotalCostRecord - ) => { - const { day, cost_category, cost } = item - if (day !== undefined) { - if (!acc[day]) { - // initialise day structure - acc[day] = {} - rec_grps.forEach((k) => { - acc[day][k] = 0 - }) - } - acc[day][cost_category] = cost + const records: { [key: string]: { [key: string]: number } } = {} + response.data.forEach((item: BillingTotalCostRecord) => { + const { day, cost_category, cost } = item + if (day !== undefined) { + if (!records[day]) { + // initial day structure + records[day] = {} + rec_grps.forEach((k) => { + records[day][k] = 0 + }) } - return acc - }, - {} - ) + records[day][cost_category] = cost + } + }) const no_undefined: string[] = rec_grps.filter( (item): item is string => item !== undefined ) @@ -159,7 +145,7 @@ const BillingCostByTime: React.FunctionComponent = () => { .slice(index) .reduce((sum, { value }) => sum + value, 0) - if (acc.length == maxDataPoints) { + if (acc.length === maxDataPoints) { acc.push({ label: 'Rest*', value: restValue }) } else { acc[maxDataPoints].value += restValue @@ -173,11 +159,128 @@ const BillingCostByTime: React.FunctionComponent = () => { .catch((er) => setError(er.message)) } + const messageComponent = () => { + if (message) { + return ( + setError(undefined)}> + {message} + + ) + } + if (error) { + return ( + setError(undefined)}> + {error} +
+ +
+ ) + } + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + return null + } + + const dataComponent = () => { + if (message || error || isLoading) { + return null + } + + if (!message && !error && !isLoading && (!data || data.length === 0)) { + return ( + + No Data + + ) + } + + return ( + <> + + + + + + + + + + + + + + + + + + + + + + ) + } + React.useEffect(() => { - if (selectedData !== undefined && selectedData !== '' && selectedData !== null) { - let source = 'aggregate' + if ( + selectedData !== undefined && + selectedData !== '' && + selectedData !== null && + start !== undefined && + start !== '' && + start !== null && + end !== undefined && + end !== '' && + end !== null && + groupBy !== undefined && + groupBy !== null + ) { + // valid selection, retrieve data + let source = BillingSource.Aggregate if (groupBy === BillingColumn.GcpProject) { - source = 'gcp_billing' + source = BillingSource.GcpBilling } if (selectedData.startsWith('All ')) { getData({ @@ -197,21 +300,28 @@ const BillingCostByTime: React.FunctionComponent = () => { source: source, }) } + } else { + // invalid selection, + setIsLoading(false) + setError(undefined) + + if (groupBy === undefined || groupBy === null) { + // Group By not selected + setMessage('Please select Group By') + } else if (selectedData === undefined || selectedData === null || selectedData === '') { + // Top Level not selected + setMessage(`Please select ${groupBy}`) + } else if (start === undefined || start === null || start === '') { + setMessage('Please select Start date') + } else if (end === undefined || end === null || end === '') { + setMessage('Please select End date') + } else { + // generic message + setMessage('Please make selection') + } } }, [start, end, groupBy, selectedData]) - if (error) { - return ( - setError(undefined)}> - {error} -
- -
- ) - } - return ( <> @@ -223,13 +333,14 @@ const BillingCostByTime: React.FunctionComponent = () => { Billing Cost By Time - + @@ -240,11 +351,12 @@ const BillingCostByTime: React.FunctionComponent = () => { onClickFunction={onSelect} selected={selectedData} includeAll={true} + autoSelect={true} />
- + { /> +
- - - - - - - - - + {messageComponent()} - - - - - - - - - + {dataComponent()} ) } diff --git a/web/src/pages/billing/BillingInvoiceMonthCost.tsx b/web/src/pages/billing/BillingInvoiceMonthCost.tsx index d7793d265..3dcfc9e48 100644 --- a/web/src/pages/billing/BillingInvoiceMonthCost.tsx +++ b/web/src/pages/billing/BillingInvoiceMonthCost.tsx @@ -2,15 +2,13 @@ import * as React from 'react' import { Link, useSearchParams, useNavigate, useLocation } from 'react-router-dom' import { Table as SUITable, Message, Button, Checkbox, Dropdown, Grid } from 'semantic-ui-react' import _ from 'lodash' - import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' import Table from '../../shared/components/Table' import { BillingApi, BillingColumn, BillingCostBudgetRecord } from '../../sm-api' - -import './Billing.css' import FieldSelector from './components/FieldSelector' - import { convertFieldName } from '../../shared/utilities/fieldName' +import { HorizontalStackedBarChart } from '../../shared/components/Graphs/HorizontalStackedBarChart' +import generateUrl from '../../shared/utilities/generateUrl' const BillingCurrentCost = () => { const [isLoading, setIsLoading] = React.useState(true) @@ -23,6 +21,8 @@ const BillingCurrentCost = () => { direction: 'undefined', }) + const [showAsChart, setShowAsChart] = React.useState(true) + // Pull search params for use in the component const [searchParams] = useSearchParams() const inputGroupBy: string | null = searchParams.get('groupBy') @@ -36,10 +36,10 @@ const BillingCurrentCost = () => { const navigate = useNavigate() const updateNav = (grp: BillingColumn, invoiceMonth: string | undefined) => { - let url = `${location.pathname}?groupBy=${grp}` - if (invoiceMonth) { - url += `&invoiceMonth=${invoiceMonth}` - } + const url = generateUrl(location, { + groupBy: grp, + invoiceMonth: invoiceMonth, + }) navigate(url) } @@ -56,7 +56,7 @@ const BillingCurrentCost = () => { const [lastLoadedDay, setLastLoadedDay] = React.useState() const getCosts = (grp: BillingColumn, invoiceMth: string | undefined) => { - updateNav(groupBy, invoiceMth) + updateNav(grp, invoiceMth) setIsLoading(true) setError(undefined) let source = 'aggregate' @@ -126,21 +126,24 @@ const BillingCurrentCost = () => { {error}
-
) - if (isLoading) - return ( -
- -

- This query takes a while... -

-
- ) + const rowColor = (p: BillingCostBudgetRecord) => { + if (p.budget_spent === undefined || p.budget_spent === null) { + return '' + } + if (p.budget_spent > 90) { + return 'billing-over-budget' + } + if (p.budget_spent > 50) { + return 'billing-half-budget' + } + return 'billing-under-budget' + } const handleSort = (clickedColumn: string) => { if (sort.column !== clickedColumn) { @@ -182,7 +185,7 @@ const BillingCurrentCost = () => { <>

Billing By Invoice Month

- + { selected={invoiceMonth} /> + + + setShowAsChart(!showAsChart)} + /> + - - - - - - - - {invoiceMonth === thisMonth ? ( - - 24H (day UTC {lastLoadedDay}) - - ) : null} - - {groupBy === BillingColumn.GcpProject ? ( - - Invoice Month (Acc) - - ) : ( - - Invoice Month (Acc) - - )} - - - - - {HEADER_FIELDS.map((k) => { - switch (k.show_always || invoiceMonth === thisMonth) { - case true: - return ( - handleSort(k.category)} + {(() => { + if (!showAsChart) return null + if (String(invoiceMonth) === String(thisMonth)) { + return ( + + + + + + + + + ) + } + return ( + + + + + + ) + })()} + + {!showAsChart ? ( +
+ + + + + + + {invoiceMonth === thisMonth ? ( + + 24H (day UTC {lastLoadedDay}) + + ) : null} + + {groupBy === BillingColumn.GcpProject ? ( + + Invoice Month (Acc) + + ) : ( + + Invoice Month (Acc) + + )} + + + + + {HEADER_FIELDS.map((k) => { + switch (k.show_always || invoiceMonth === thisMonth) { + case true: + return ( + handleSort(k.category)} + style={{ + borderBottom: 'none', + position: 'sticky', + resize: 'horizontal', + }} + > + {convertFieldName(k.title)} + + ) + default: + return null + } + })} + + {groupBy === BillingColumn.GcpProject && invoiceMonth === thisMonth ? ( + handleSort('budget_spent')} + style={{ + borderBottom: 'none', + position: 'sticky', + resize: 'horizontal', + }} + > + Budget Spend % + + ) : null} + + + + {_.orderBy( + costRecords, + [sort.column], + sort.direction === 'ascending' ? ['asc'] : ['desc'] + ).map((p) => ( + + + + handleToggle(p.field)} + /> + + {HEADER_FIELDS.map((k) => { + switch (k.category) { + case 'field': + return ( + + + + {p[k.category]} + + + + ) + default: + switch ( + k.show_always || + invoiceMonth === thisMonth + ) { + case true: + return ( + + {currencyFormat(p[k.category])} + + ) + default: + return null + } + } + })} + + {groupBy === BillingColumn.GcpProject && + invoiceMonth === thisMonth ? ( + {percFormat(p.budget_spent)} + ) : null} + + {typeof p === 'object' && + 'details' in p && + _.orderBy(p?.details, ['monthly_cost'], ['desc']).map((dk) => ( + - {convertFieldName(k.title)} - - ) - default: - return null - } - })} - - {groupBy === BillingColumn.GcpProject && invoiceMonth === thisMonth ? ( - handleSort('budget_spent')} - style={{ - borderBottom: 'none', - position: 'sticky', - resize: 'horizontal', - }} - > - Budget Spend % - - ) : null} - - - - {_.orderBy( - costRecords, - [sort.column], - sort.direction === 'ascending' ? ['asc'] : ['desc'] - ).map((p) => ( - - - - handleToggle(p.field)} - /> - - {HEADER_FIELDS.map((k) => { - switch (k.category) { - case 'field': - return ( - - - - {p[k.category]} - - - - ) - default: - switch (k.show_always || invoiceMonth === thisMonth) { - case true: - return ( - - {currencyFormat(p[k.category])} - - ) - default: - return null - } - } - })} - - {groupBy === BillingColumn.GcpProject && - invoiceMonth === thisMonth ? ( - {percFormat(p.budget_spent)} - ) : null} - - {typeof p === 'object' && - 'details' in p && - _.orderBy(p?.details, ['monthly_cost'], ['desc']).map((dk) => ( - - - {dk.cost_category} - - {dk.cost_group === 'C' ? ( - - {invoiceMonth === thisMonth ? ( - - - {currencyFormat(dk.daily_cost)} - - - - - ) : null} - - {currencyFormat(dk.monthly_cost)} - - - - ) : ( - - - {invoiceMonth === thisMonth ? ( - - - {currencyFormat(dk.daily_cost)} - - - - - ) : null} - - {currencyFormat(dk.monthly_cost)} - + + {dk.cost_category} + + {dk.cost_group === 'C' ? ( + + {invoiceMonth === thisMonth ? ( + + + {currencyFormat(dk.daily_cost)} + + + + + ) : null} + + {currencyFormat(dk.monthly_cost)} + + + + ) : ( + + + {invoiceMonth === thisMonth ? ( + + + {currencyFormat(dk.daily_cost)} + + + + + ) : null} + + {currencyFormat(dk.monthly_cost)} + + + + )} + + {groupBy === BillingColumn.GcpProject ? ( - - )} - - {groupBy === BillingColumn.GcpProject ? ( - - ) : null} - - ))} - - ))} - -
+ ) : null} + + ))} + + ))} + + + ) : null} ) } diff --git a/web/src/pages/billing/BillingSeqrProp.tsx b/web/src/pages/billing/BillingSeqrProp.tsx index cca54ed43..80493e2cc 100644 --- a/web/src/pages/billing/BillingSeqrProp.tsx +++ b/web/src/pages/billing/BillingSeqrProp.tsx @@ -2,27 +2,23 @@ import * as React from 'react' import { useLocation, useNavigate } from 'react-router-dom' import { Grid, Card, Input } from 'semantic-ui-react' import SeqrProportionalMapGraph from './components/SeqrProportionalMapGraph' +import { getMonthEndDate } from '../../shared/utilities/monthStartEndDate' +import generateUrl from '../../shared/utilities/generateUrl' const BillingSeqrProp: React.FunctionComponent = () => { const now = new Date() const [start, setStart] = React.useState(`${now.getFullYear()}-01-01`) - const [end, setEnd] = React.useState( - `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` - ) + const [end, setEnd] = React.useState(getMonthEndDate()) // use navigate and update url params const location = useLocation() const navigate = useNavigate() - const updateNav = (start: string, end: string) => { - let url = `${location.pathname}` - if (start || end) url += '?' - - let params: string[] = [] - if (start) params.push(`start=${start}`) - if (end) params.push(`end=${end}`) - - url += params.join('&') + const updateNav = (st: string, ed: string) => { + const url = generateUrl(location, { + start: st, + end: ed, + }) navigate(url) } diff --git a/web/src/pages/billing/components/BillingCostByTimeTable.tsx b/web/src/pages/billing/components/BillingCostByTimeTable.tsx index 992ccaeca..14ccd267d 100644 --- a/web/src/pages/billing/components/BillingCostByTimeTable.tsx +++ b/web/src/pages/billing/components/BillingCostByTimeTable.tsx @@ -120,9 +120,6 @@ const BillingCostByTimeTable: React.FC = ({ return (
-

- This query takes a while... -

) } @@ -161,11 +158,13 @@ const BillingCostByTimeTable: React.FC = ({ ).map((p) => ( - + {p.date.toLocaleDateString()} {headerFields().map((k) => ( - {currencyFormat(p.values[k.category])} + + {currencyFormat(p.values[k.category])} + ))} @@ -187,7 +186,6 @@ const BillingCostByTimeTable: React.FC = ({ fitted toggle checked={expandCompute} - slider onChange={() => setExpandCompute(!expandCompute)} /> @@ -227,7 +225,7 @@ const BillingCostByTimeTable: React.FC = ({ All Time Total {headerFields().map((k) => ( - + {currencyFormat( internalData.reduce( diff --git a/web/src/pages/billing/components/CostByTimeBarChart.tsx b/web/src/pages/billing/components/CostByTimeBarChart.tsx new file mode 100644 index 000000000..0035fc3f5 --- /dev/null +++ b/web/src/pages/billing/components/CostByTimeBarChart.tsx @@ -0,0 +1,37 @@ +import * as React from 'react' +import _ from 'lodash' + +import LoadingDucks from '../../../shared/components/LoadingDucks/LoadingDucks' +import { StackedBarChart } from '../../../shared/components/Graphs/StackedBarChart' +import { BillingCostBudgetRecord } from '../../../sm-api' + +interface ICostByTimeBarChartProps { + accumulate: boolean + isLoading: boolean + data: BillingCostBudgetRecord[] +} + +const CostByTimeBarChart: React.FunctionComponent = ({ + accumulate, + isLoading, + data, +}) => { + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + return ( + <> + + + ) +} + +export default CostByTimeBarChart diff --git a/web/src/pages/billing/components/CostByTimeChart.tsx b/web/src/pages/billing/components/CostByTimeChart.tsx index 4eeff8e90..a68a28699 100644 --- a/web/src/pages/billing/components/CostByTimeChart.tsx +++ b/web/src/pages/billing/components/CostByTimeChart.tsx @@ -26,9 +26,6 @@ const CostByTimeChart: React.FunctionComponent = ({ return (
-

- This query takes a while... -

) } diff --git a/web/src/pages/billing/components/FieldSelector.tsx b/web/src/pages/billing/components/FieldSelector.tsx index e6e86450a..3b20b1294 100644 --- a/web/src/pages/billing/components/FieldSelector.tsx +++ b/web/src/pages/billing/components/FieldSelector.tsx @@ -1,11 +1,6 @@ import * as React from 'react' - import { Dropdown, Input, Message } from 'semantic-ui-react' - -import { BillingApi, BillingColumn } from '../../../sm-api' - -import '../Billing.css' - +import { BillingApi, BillingColumn, BillingTimePeriods } from '../../../sm-api' import { convertFieldName } from '../../../shared/utilities/fieldName' interface FieldSelectorProps { @@ -13,6 +8,7 @@ interface FieldSelectorProps { fieldName: string selected?: string includeAll?: boolean + autoSelect?: boolean onClickFunction: (_: any, { value }: any) => void } @@ -21,20 +17,32 @@ const FieldSelector: React.FunctionComponent = ({ fieldName, selected, includeAll, + autoSelect, onClickFunction, }) => { const [loading, setLoading] = React.useState(true) const [error, setError] = React.useState() const [records, setRecords] = React.useState([]) - const extendRecords = (records: string[]) => { + const extendRecords = (recs: string[]) => { if (includeAll) { - if (fieldName === 'GCP-Project') { - return [`All ${convertFieldName(fieldName)}`, ...records] + let fname = convertFieldName(fieldName) + if (fname.endsWith('y')) { + fname = fname.substring(0, fname.length - 1) + 'ie' } - return [`All ${convertFieldName(fieldName)}s`, ...records] + return [`All ${fname}s`, ...recs] + } + return recs + } + + const processResponse = (response_data: string[]) => { + setLoading(false) + const extRecords = extendRecords(response_data) + setRecords(extRecords) + if (!selected && autoSelect) { + // set the first option as the default + onClickFunction(undefined, { value: extRecords[0] }) } - return records } const getTopics = () => { @@ -43,8 +51,7 @@ const FieldSelector: React.FunctionComponent = ({ new BillingApi() .getTopics() .then((response) => { - setLoading(false) - setRecords(extendRecords(response.data)) + processResponse(response.data) }) .catch((er) => setError(er.message)) } @@ -55,8 +62,7 @@ const FieldSelector: React.FunctionComponent = ({ new BillingApi() .getGcpProjects() .then((response) => { - setLoading(false) - setRecords(extendRecords(response.data)) + processResponse(response.data) }) .catch((er) => setError(er.message)) } @@ -67,23 +73,54 @@ const FieldSelector: React.FunctionComponent = ({ new BillingApi() .getInvoiceMonths() .then((response) => { - setLoading(false) - setRecords(extendRecords(response.data)) + processResponse(response.data) + }) + .catch((er) => setError(er.message)) + } + + const getStages = () => { + setLoading(true) + setError(undefined) + new BillingApi() + .getStages() + .then((response) => { + processResponse(response.data) + }) + .catch((er) => setError(er.message)) + } + + const getCostCategories = () => { + setLoading(true) + setError(undefined) + new BillingApi() + .getCostCategories() + .then((response) => { + processResponse(response.data) }) .catch((er) => setError(er.message)) } React.useEffect(() => { if (fieldName === BillingColumn.Topic) getTopics() + else if (fieldName === BillingColumn.GcpProject) getGcpProjects() else if (fieldName === BillingColumn.InvoiceMonth) getInvoiceMonths() + else if (fieldName === BillingColumn.Stage) getStages() + else if (fieldName === BillingColumn.CostCategory) getCostCategories() else if (fieldName === 'Group') { - setRecords([BillingColumn.GcpProject, BillingColumn.Topic]) + setRecords([BillingColumn.GcpProject, BillingColumn.Topic, BillingColumn.Stage]) + setLoading(false) + } else if (fieldName === 'Period') { + setRecords([ + BillingTimePeriods.Day, + BillingTimePeriods.Week, + BillingTimePeriods.Month, + BillingTimePeriods.InvoiceMonth, + ]) setLoading(false) - } else if (fieldName === BillingColumn.GcpProject) getGcpProjects() - else { + } else { setError(`Could not load records for ${fieldName}`) } - }, [label, fieldName]) + }, [fieldName]) const capitalize = (str: string): string => { if (str === 'gcp_project') { diff --git a/web/src/pages/billing/components/HailBatchGrid.tsx b/web/src/pages/billing/components/HailBatchGrid.tsx new file mode 100644 index 000000000..d3e8e199c --- /dev/null +++ b/web/src/pages/billing/components/HailBatchGrid.tsx @@ -0,0 +1,499 @@ +import * as React from 'react' +import { Table as SUITable, Popup, Checkbox } from 'semantic-ui-react' +import _ from 'lodash' +import Table from '../../../shared/components/Table' +import sanitiseValue from '../../../shared/utilities/sanitiseValue' +import '../../project/AnalysisRunnerView/AnalysisGrid.css' + +interface Field { + category: string + title: string + width?: string + className?: string + dataMap?: (data: any, value: string) => any +} + +const HailBatchGrid: React.FunctionComponent<{ + data: any[] +}> = ({ data }) => { + // prepare aggregated data by ar_guid, batch_id, job_id and coresponding batch_resource + const aggArGUIDData: any[] = [] + data.forEach((curr) => { + const { cost, topic, usage_start_time, usage_end_time } = curr + const ar_guid = curr['ar-guid'] + const usageStartDate = new Date(usage_start_time) + const usageEndDate = new Date(usage_end_time) + const idx = aggArGUIDData.findIndex((d) => d.ar_guid === ar_guid && d.topic === topic) + if (cost >= 0) { + // do not include credits, should be filter out at API? + if (idx === -1) { + aggArGUIDData.push({ + type: 'ar_guid', + key: ar_guid, + ar_guid, + batch_id: undefined, + job_id: undefined, + topic, + cost, + start_time: usageStartDate, + end_time: usageEndDate, + }) + } else { + aggArGUIDData[idx].cost += cost + aggArGUIDData[idx].start_time = new Date( + Math.min(usageStartDate.getTime(), aggArGUIDData[idx].start_time.getTime()) + ) + aggArGUIDData[idx].end_time = new Date( + Math.max(usageEndDate.getTime(), aggArGUIDData[idx].end_time.getTime()) + ) + } + } + }) + + const aggArGUIDResource: any[] = [] + data.forEach((curr) => { + const { cost, batch_resource } = curr + const ar_guid = curr['ar-guid'] + const idx = aggArGUIDResource.findIndex( + (d) => d.ar_guid === ar_guid && d.batch_resource === batch_resource + ) + if (cost >= 0) { + // do not include credits, should be filter out at API? + if (idx === -1) { + aggArGUIDResource.push({ + type: 'ar_guid', + key: ar_guid, + ar_guid, + batch_resource, + cost, + }) + } else { + aggArGUIDResource[idx].cost += cost + } + } + }) + const aggBatchData: any[] = [] + data.forEach((curr) => { + const { + batch_id, + url, + topic, + namespace, + batch_name, + cost, + usage_start_time, + usage_end_time, + } = curr + const ar_guid = curr['ar-guid'] + const usageStartDate = new Date(usage_start_time) + const usageEndDate = new Date(usage_end_time) + const idx = aggBatchData.findIndex( + (d) => + d.batch_id === batch_id && + d.batch_name === batch_name && + d.topic === topic && + d.namespace === namespace + ) + if (cost >= 0) { + // do not include credits, should be filter out at API? + if (idx === -1) { + aggBatchData.push({ + type: 'batch_id', + key: batch_id, + ar_guid, + batch_id, + url, + topic, + namespace, + batch_name, + job_id: undefined, + cost, + start_time: usageStartDate, + end_time: usageEndDate, + }) + } else { + aggBatchData[idx].cost += cost + aggBatchData[idx].start_time = new Date( + Math.min(usageStartDate.getTime(), aggBatchData[idx].start_time.getTime()) + ) + aggBatchData[idx].end_time = new Date( + Math.max(usageEndDate.getTime(), aggBatchData[idx].end_time.getTime()) + ) + } + } + }) + + const aggBatchResource: any[] = [] + data.forEach((curr) => { + const { batch_id, batch_resource, topic, namespace, batch_name, cost } = curr + const ar_guid = curr['ar-guid'] + const idx = aggBatchResource.findIndex( + (d) => + d.batch_id === batch_id && + d.batch_name === batch_name && + d.batch_resource === batch_resource && + d.topic === topic && + d.namespace === namespace + ) + if (cost >= 0) { + // do not include credits, should be filter out at API? + if (idx === -1) { + aggBatchResource.push({ + type: 'batch_id', + key: batch_id, + ar_guid, + batch_id, + batch_resource, + topic, + namespace, + batch_name, + cost, + }) + } else { + aggBatchResource[idx].cost += cost + } + } + }) + + const aggBatchJobData: any[] = [] + data.forEach((curr) => { + const { batch_id, url, cost, topic, namespace, job_id, usage_start_time, usage_end_time } = + curr + const ar_guid = curr['ar-guid'] + const usageStartDate = new Date(usage_start_time) + const usageEndDate = new Date(usage_end_time) + const idx = aggBatchJobData.findIndex( + (d) => + d.batch_id === batch_id && + d.job_id === job_id && + d.topic === topic && + d.namespace === namespace + ) + if (cost >= 0) { + if (idx === -1) { + aggBatchJobData.push({ + type: 'batch_id/job_id', + key: `${batch_id}/${job_id}`, + batch_id, + job_id, + ar_guid, + url, + topic, + namespace, + cost, + start_time: usageStartDate, + end_time: usageEndDate, + }) + } else { + aggBatchJobData[idx].cost += cost + aggBatchJobData[idx].start_time = new Date( + Math.min(usageStartDate.getTime(), aggBatchJobData[idx].start_time.getTime()) + ) + aggBatchJobData[idx].end_time = new Date( + Math.max(usageEndDate.getTime(), aggBatchJobData[idx].end_time.getTime()) + ) + } + } + }) + + const aggBatchJobResource: any[] = [] + data.forEach((curr) => { + const { batch_id, batch_resource, topic, namespace, cost, job_id, job_name } = curr + const ar_guid = curr['ar-guid'] + const idx = aggBatchJobResource.findIndex( + (d) => + d.batch_id === batch_id && + d.job_id === job_id && + d.batch_resource === batch_resource && + d.topic === topic && + d.namespace === namespace + ) + if (cost >= 0) { + if (idx === -1) { + aggBatchJobResource.push({ + type: 'batch_id/job_id', + key: `${batch_id}/${job_id}`, + batch_id, + job_id, + ar_guid, + batch_resource, + topic, + namespace, + cost, + job_name, + }) + } else { + aggBatchJobResource[idx].cost += cost + } + } + }) + + const aggData = [...aggArGUIDData, ...aggBatchData, ...aggBatchJobData] + const aggResource = [...aggArGUIDResource, ...aggBatchResource, ...aggBatchJobResource] + + // combine data and resource for each ar_guid, batch_id, job_id + const combinedData = aggData.map((dataItem) => { + const details = aggResource.filter( + (resourceItem) => + resourceItem.key === dataItem.key && resourceItem.type === dataItem.type + ) + return { ...dataItem, details } + }) + + const [openRows, setOpenRows] = React.useState([]) + + const handleToggle = (position: number) => { + if (!openRows.includes(position)) { + setOpenRows([...openRows, position]) + } else { + setOpenRows(openRows.filter((i) => i !== position)) + } + } + + const prepareBatchUrl = (url: string, txt: string) => ( + + {txt} + + ) + + const prepareBgColor = (log: any) => { + if (log.batch_id === undefined) { + return 'var(--color-border-color)' + } + if (log.job_id === undefined) { + return 'var(--color-border-default)' + } + return 'var(--color-bg)' + } + + const MAIN_FIELDS: Field[] = [ + { + category: 'job_id', + title: 'ID', + dataMap: (dataItem: any, value: string) => { + if (dataItem.batch_id === undefined) { + return `AR GUID: ${dataItem.ar_guid}` + } + if (dataItem.job_id === undefined) { + return prepareBatchUrl(dataItem.url, `BATCH ID: ${dataItem.batch_id}`) + } + return prepareBatchUrl(dataItem.url, `JOB: ${value}`) + }, + }, + { + category: 'start_time', + title: 'TIME STARTED', + dataMap: (dataItem: any, value: string) => { + const dateValue = new Date(value) + return ( + + {Number.isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()} + + ) + }, + }, + { + category: 'end_time', + title: 'TIME COMPLETED', + dataMap: (dataItem: any, value: string) => { + const dateValue = new Date(value) + return ( + + {Number.isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()} + + ) + }, + }, + { + category: 'duration', + title: 'DURATION', + dataMap: (dataItem: any, _value: string) => { + const duration = new Date( + dataItem.end_time.getTime() - dataItem.start_time.getTime() + ) + const seconds = Math.floor((duration / 1000) % 60) + const minutes = Math.floor((duration / (1000 * 60)) % 60) + const hours = Math.floor((duration / (1000 * 60 * 60)) % 24) + const formattedDuration = `${hours}h ${minutes}m ${seconds}s` + return {formattedDuration} + }, + }, + { + category: 'cost', + title: 'COST', + dataMap: (dataItem: any, _value: string) => ( + ${dataItem.cost.toFixed(4)}} + position="top center" + /> + ), + }, + ] + + const DETAIL_FIELDS: Field[] = [ + { + category: 'topic', + title: 'TOPIC', + }, + { + category: 'namespace', + title: 'NAMESPACE', + }, + { + category: 'batch_name', + title: 'NAME/SCRIPT', + }, + { + category: 'job_name', + title: 'NAME', + }, + ] + + const expandedRow = (log: any, idx: any) => + MAIN_FIELDS.map(({ category, dataMap, className }) => ( + + {dataMap ? dataMap(log, log[category]) : sanitiseValue(log[category])} + + )) + + return ( + + + + + {MAIN_FIELDS.map(({ category, title }, i) => ( + + {title} + + ))} + + + + {MAIN_FIELDS.map(({ category }, i) => ( + + ))} + + + + {combinedData + .sort((a, b) => { + // Sorts an array of objects first by 'batch_id' and then by 'job_id' in ascending order. + if (a.batch_id < b.batch_id) { + return -1 + } + if (a.batch_id > b.batch_id) { + return 1 + } + if (a.job_id < b.job_id) { + return -1 + } + if (a.job_id > b.job_id) { + return 1 + } + return 0 + }) + .map((log, idx) => ( + + + + handleToggle(log.key)} + /> + + {expandedRow(log, idx)} + + {Object.entries(log) + .filter(([c]) => + DETAIL_FIELDS.map(({ category }) => category).includes(c) + ) + .map(([k, v]) => { + const detailField = DETAIL_FIELDS.find( + ({ category }) => category === k + ) + const title = detailField ? detailField.title : k + return ( + + + + {title} + + {v} + + ) + })} + + + + COST BREAKDOWN + + + {typeof log === 'object' && + 'details' in log && + _.orderBy(log?.details, ['cost'], ['desc']).map((dk) => ( + + + + {dk.batch_resource} + + ${dk.cost.toFixed(4)} + + ))} + + ))} + +
+ ) +} + +export default HailBatchGrid diff --git a/web/src/pages/billing/index.ts b/web/src/pages/billing/index.ts index cdb6832fb..037d0b871 100644 --- a/web/src/pages/billing/index.ts +++ b/web/src/pages/billing/index.ts @@ -1,4 +1,6 @@ export { default as BillingHome } from "./BillingHome"; export { default as BillingSeqrProp } from "./BillingSeqrProp"; export { default as BillingCostByTime } from "./BillingCostByTime"; +export { default as BillingCostByAnalysis } from "./BillingCostByAnalysis"; +export { default as BillingCostByCategory } from "./BillingCostByCategory"; export { default as BillingInvoiceMonthCost } from "./BillingInvoiceMonthCost"; diff --git a/web/src/shared/components/Graphs/BarChart.tsx b/web/src/shared/components/Graphs/BarChart.tsx index 67d88fff7..ad66af92b 100644 --- a/web/src/shared/components/Graphs/BarChart.tsx +++ b/web/src/shared/components/Graphs/BarChart.tsx @@ -11,13 +11,21 @@ export interface IData { interface BarChartProps { data: IData[] maxSlices: number - colors: (t: number) => string | undefined + colors?: (t: number) => string | undefined isLoading: boolean } export const BarChart: React.FC = ({ data, maxSlices, colors, isLoading }) => { + if (isLoading) { + return ( +
+ +
+ ) + } + if (!data || data.length === 0) { - return
No data available
+ return <>No Data } const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow @@ -55,17 +63,6 @@ export const BarChart: React.FC = ({ data, maxSlices, colors, isL // reset svg contDiv.innerHTML = '' - if (isLoading) { - return ( -
- -

- This query takes a while... -

-
- ) - } - // construct svg const svg = select(contDiv) .append('svg') diff --git a/web/src/shared/components/Graphs/DonutChart.tsx b/web/src/shared/components/Graphs/DonutChart.tsx index 2a6fc4713..208b8a0df 100644 --- a/web/src/shared/components/Graphs/DonutChart.tsx +++ b/web/src/shared/components/Graphs/DonutChart.tsx @@ -11,7 +11,7 @@ export interface IDonutChartData { export interface IDonutChartProps { data?: IDonutChartData[] maxSlices: number - colors: (t: number) => string | undefined + colors?: (t: number) => string | undefined isLoading: boolean } @@ -30,9 +30,18 @@ function calcTranslate(data: IDonutChartPreparadData, move = 4) { } export const DonutChart: React.FC = ({ data, maxSlices, colors, isLoading }) => { + if (isLoading) { + return ( +
+ +
+ ) + } + if (!data || data.length === 0) { - return
No data available
+ return <>No Data } + const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow const duration = 250 const containerDivRef = React.useRef() @@ -105,17 +114,6 @@ export const DonutChart: React.FC = ({ data, maxSlices, colors // reset svg contDiv.innerHTML = '' - if (isLoading) { - return ( -
- -

- This query takes a while... -

-
- ) - } - // construct svg const svg = select(contDiv) .append('svg') diff --git a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx new file mode 100644 index 000000000..204d7cc7a --- /dev/null +++ b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx @@ -0,0 +1,350 @@ +import React from 'react' +import * as d3 from 'd3' +import LoadingDucks from '../LoadingDucks/LoadingDucks' +import { BillingCostBudgetRecord } from '../../../sm-api' +import { grey } from '@mui/material/colors' + +interface HorizontalStackedBarChartProps { + data: BillingCostBudgetRecord[] + title: string + series: string[] + labels: string[] + total_series: string + threshold_values: number[] + threshold_series: string + sorted_by: string + colors?: (t: number) => string | undefined + isLoading: boolean + showLegend: boolean +} + +const HorizontalStackedBarChart: React.FC = ({ + data, + title, + series, + labels, + total_series, + threshold_values, + threshold_series, + sorted_by, + colors, + isLoading, + showLegend, +}) => { + if (!isLoading && (!data || data.length === 0)) { + return
No data available
+ } + + const colorFunc: (t: number) => string | undefined = colors ?? d3.interpolateRainbow + + // set the dimensions and margins of the graph + const margin = { top: 80, right: 20, bottom: 50, left: 250 } + const width = 650 - margin.left - margin.right + const outsideHeight = 2850 + const height = outsideHeight - margin.top - margin.bottom + + const containerDivRef = React.useRef() + + const [clientWidth, setClientWidth] = React.useState(650) + + React.useEffect(() => { + function updateWindowWidth() { + setClientWidth(containerDivRef.current?.clientWidth ?? 650) + } + if (containerDivRef.current) { + updateWindowWidth() + } + window.addEventListener('resize', updateWindowWidth) + + return () => { + window.removeEventListener('resize', updateWindowWidth) + } + }, []) + + const contDiv = containerDivRef.current + if (contDiv) { + // reset svg + contDiv.innerHTML = '' + + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + // prepare data + let maxTotalSeries = Math.max(...data.map((item) => item[total_series])) + const typeKeys = data.map((d) => d.field) + data.sort((a, b) => b[sorted_by] - a[sorted_by]) + + // stack the data + const stack_fnc = d3 + .stack() + .keys(series) + .order(d3.stackOrderNone) + .offset(d3.stackOffsetNone) + + const stackedData = stack_fnc(data) + const indexedData = stackedData.map((innerArray, outerIdx) => + innerArray.map((d, innerIdx) => ({ outerIdx, innerIdx, data: d })) + ) + const budgetData = {} + data.forEach((d) => { + budgetData[d.field] = d.budget + }) + + const maxBudget = Math.max(...data.map((item) => item.budget)) + + if (showLegend) { + if (maxBudget > maxTotalSeries) { + maxTotalSeries = maxBudget * 1.01 + } + } + + // construct svg + const svg = d3 + .select(contDiv) + .append('svg') + .attr('width', '100%') + .attr('height', '100%') + .attr('viewBox', `0 0 650 ${outsideHeight}`) + .attr('preserveAspectRatio', 'xMinYMin') + .append('g') + .attr('transform', `translate(${margin.left}, ${margin.top})`) + + svg.append('defs') + .append('pattern') + .attr('id', 'pattern0') + .attr('patternUnits', 'userSpaceOnUse') + .attr('width', 4) + .attr('height', 4) + .append('path') + .attr('stroke', 'var(--color-text-primary') + .attr('stroke-width', 1) + + svg.append('defs') + .append('pattern') + .attr('id', 'pattern1') + .attr('patternUnits', 'userSpaceOnUse') + .attr('width', 4) + .attr('height', 4) + .append('path') + .attr('d', 'M-1,1 l2,-2 M0,4 l4,-4 M3,5 l2,-2') + .attr('stroke', 'var(--color-text-primary') + .attr('stroke-width', 1) + + svg.append('defs') + .append('pattern') + .attr('id', 'pattern2') + .attr('patternUnits', 'userSpaceOnUse') + .attr('width', 4) + .attr('height', 4) + .append('path') + .attr('d', 'M 2 0 L 2 4') + .attr('stroke', 'var(--color-text-primary') + .attr('stroke-width', 1) + + // X scale and Axis + const formater = d3.format('.1s') + const xScale = d3.scaleSqrt().domain([0, maxTotalSeries]).range([0, width]) + + svg.append('g') + .attr('transform', `translate(0, ${height})`) + .call(d3.axisBottom(xScale).ticks(7).tickSize(0).tickPadding(6).tickFormat(formater)) + .call((d) => d.select('.domain').remove()) + + // Y scale and Axis + const yScale = d3 + .scaleBand() + .domain(data.map((d) => d.field)) + .range([0, height]) + .padding(0.2) + + svg.append('g') + .style('font-size', '18px') // make the axis labels bigger + .call(d3.axisLeft(yScale).tickSize(0).tickPadding(5)) + + // color palette + const color = d3.scaleOrdinal().domain(typeKeys).range(['url(#pattern0)', 'url(#pattern1)']) + + const color_fnc = (d) => { + if (threshold_series === undefined) { + // if not defiend trhesholds then use the color function + return colorFunc(d.innerIdx / typeKeys.length) + } + if (d.data.data[threshold_series] == null) { + // no threshold value defined for bar + return 'grey' + } + if (d.data.data[threshold_series] >= threshold_values[0]) { + return 'red' + } + if (d.data.data[threshold_series] >= threshold_values[1]) { + return 'orange' + } + return 'green' + } + + // set vertical grid line + const GridLine = () => d3.axisBottom().scale(xScale) + + svg.append('g') + .attr('class', 'hb-chart-grid') + .call(GridLine().tickSize(height, 0, 0).tickFormat('').ticks(8)) + .selectAll('line') + .style('stroke-dasharray', '5,5') + + // create a tooltip + const tooltip = d3.select('body').append('div').attr('id', 'chart').attr('class', 'tooltip') + + // tooltip events + const mouseover = (d) => { + tooltip.style('opacity', 0.8) + d3.select(this).style('opacity', 0.5) + } + const mousemove = (event, d) => { + const formater = d3.format(',.2f') + tooltip + .html(formater(d.data[1] - d.data[0]) + ' AUD') + .style('top', event.pageY - 10 + 'px') + .style('left', event.pageX + 10 + 'px') + } + const mouseleave = (d) => { + tooltip.style('opacity', 0) + d3.select(this).style('opacity', 1) + } + + // create bars + svg.append('g') + .selectAll('g') + .data(indexedData) + .join('g') + .selectAll('rect') + .data((d) => d) + .join('rect') + .attr('x', (d) => xScale(d.data[0])) + .attr('y', (d) => yScale(d.data.data.field)) + .attr('width', (d) => xScale(d.data[1]) - xScale(d.data[0])) + .attr('height', yScale.bandwidth()) + .attr('fill', (d) => color_fnc(d)) + + svg.append('g') + .selectAll('g') + .data(indexedData) + .join('g') + .attr('fill', (d) => color(d)) + .selectAll('rect') + .data((d) => d) + .join('rect') + .attr('x', (d) => xScale(d.data[0])) + .attr('y', (d) => yScale(d.data.data.field)) + .attr('width', (d) => xScale(d.data[1]) - xScale(d.data[0])) + .attr('height', yScale.bandwidth()) + .on('mouseover', mouseover) + .on('mousemove', mousemove) + .on('mouseleave', mouseleave) + + // create bidgetn line + const budgetFnc = (d) => { + if (showLegend) { + return xScale(budgetData[d.data.data.field]) + } + return 0 + } + + const budgetColor = (d) => { + const budgetVal = budgetData[d.data.data.field] + if (showLegend && budgetVal !== null && budgetVal !== undefined) { + return 'darkcyan' + } + return 'rgba(0, 0, 0, 0)' + } + + svg.append('g') + .selectAll('g') + .data(indexedData) + .join('g') + .selectAll('rect') + .data((d) => d) + .join('rect') + .attr('x', (d) => budgetFnc(d)) + .attr('y', (d) => yScale(d.data.data.field) - 5) + .attr('width', (d) => 3) + .attr('height', yScale.bandwidth() + 10) + .attr('fill', (d) => budgetColor(d)) + + // set title + svg.append('text') + .attr('class', 'chart-title') + .style('font-size', '18px') + .attr('x', 0) + .attr('y', -margin.top / 1.7) + .attr('text-anchor', 'start') + .attr('fill', 'currentColor') + .text(title) + + // set Y axis label + svg.append('text') + .attr('class', 'chart-label') + .style('font-size', '18px') + .attr('x', width / 2) + .attr('y', height + margin.bottom) + .attr('text-anchor', 'middle') + .attr('fill', 'currentColor') + .text('AUD') + + if (showLegend) { + // Legend + for (let i = 0; i < labels.length; i++) { + svg.append('rect') + .attr('x', 0 + i * 150) + .attr('y', -(margin.top / 2.5)) + .attr('width', 15) + .attr('height', 15) + .style('fill', `url(#pattern${i})`) + + if (i === 0) { + // add background + svg.append('rect') + .attr('x', 0 + i * 150) + .attr('y', -(margin.top / 2.5)) + .attr('width', 15) + .attr('height', 15) + .style('fill', 'grey') + } + + svg.append('text') + .attr('class', 'legend') + .attr('x', 20 + i * 150) + .attr('y', -(margin.top / 3.8)) + .attr('fill', 'currentColor') + .text(labels[i]) + } + + // add budget bar if defined + if (maxBudget !== undefined && maxBudget !== null && maxBudget > 0) { + svg.append('rect') + .attr('x', labels.length * 150) + .attr('y', -(margin.top / 2.5)) + .attr('width', 3) + .attr('height', 15) + .style('fill', 'darkcyan') + + svg.append('text') + .attr('class', 'legend') + .attr('x', 20 + labels.length * 150) + .attr('y', -(margin.top / 3.8)) + .attr('fill', 'currentColor') + .text('Budget') + } + } + } + return
+} + +export { HorizontalStackedBarChart } diff --git a/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx b/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx index 08ebf3d34..f8256d454 100644 --- a/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx +++ b/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx @@ -76,7 +76,7 @@ export const StackedAreaByDateChart: React.FC = ({ colors, }) => { if (!data || data.length === 0) { - return + return <>No Data } const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow @@ -134,6 +134,10 @@ export const StackedAreaByDateChart: React.FC = ({ .domain(extent(data, (d) => d.date)) // date is a string, will this take a date object? Yes :) .range([0, width - margin.left - margin.right]) + if (stackedData.length === 0) { + return No Data + } + // use last stackData value to calculate max Y axis point const diffX = stackedData[stackedData.length - 1].flatMap((val) => val[1]) diff --git a/web/src/shared/components/Graphs/StackedBarChart.tsx b/web/src/shared/components/Graphs/StackedBarChart.tsx new file mode 100644 index 000000000..00d8d79b0 --- /dev/null +++ b/web/src/shared/components/Graphs/StackedBarChart.tsx @@ -0,0 +1,392 @@ +import * as d3 from 'd3' +import React from 'react' + +export interface IStackedBarChartData { + date: Date + values: { [key: string]: number } +} + +interface IStackedBarChartProps { + data?: IStackedBarChartData[] + accumulate: boolean +} + +function getSeries(data: IStackedBarChartData[] | undefined) { + if (!data || data.length === 0) { + return [] + } + + return Object.keys(data[0].values) +} + +function alignToStartOfMonth(date: Date): Date { + const year = date.getFullYear() + const month = date.getMonth() + return new Date(Date.UTC(year, month, 1)) +} + +/** + * Creates an array of three new dates, each incremented by a specified number of days from the given last date. + * If the difference in days is greater than 28, the dates are aligned to the start of their respective months. + * + * @param lastDate - The last date from which the new dates will be calculated. + * @param differenceInDays - The number of days to increment for each new date. + * @returns An array of three new Date objects. + */ +function createNewDates(lastDate: Date, differenceInDays: number): Date[] { + const newDates: Date[] = [] + for (let i = 1; i <= 3; i++) { + const newDate = new Date(lastDate.getTime() + i * differenceInDays * 24 * 60 * 60 * 1000) + if (differenceInDays > 28) { + const alignedDate = alignToStartOfMonth(newDate) + newDates.push(alignedDate) + } else { + newDates.push(newDate) + } + } + return newDates +} + +function getNewDates(data: IStackedBarChartData[]) { + // need at least 2 days to extrapolate + if (!data || data.length < 2) { + return [] + } + + // Get the last date in the data array + const lastDate = data[data.length - 1].date + const prevDate = data[data.length - 2].date + + const timeDifference = Math.abs(lastDate.getTime() - prevDate.getTime()) + const differenceInDays = Math.ceil(timeDifference / (1000 * 3600 * 24)) + + // for monthly add 3 extra days so we get the next month + return createNewDates(lastDate, differenceInDays > 28 ? differenceInDays + 3 : differenceInDays) +} + +function prepareData( + series: string[], + data: IStackedBarChartData[], + accumulate: boolean, + newDates: Date[] +) { + if (!data || data.length === 0) { + return [] + } + + const predictedRatio = newDates.length / data.length + const firstDateData = data[0] + const lastDateData = data[data.length - 1] + + // Interpolate the values for the new dates + const newValues = newDates.map((date: Date, i: number) => { + return { + date, + values: series.reduce((acc: Record, key: string) => { + const values = { ...acc } + const interpolator = d3.interpolate( + firstDateData.values[key], + lastDateData.values[key] + ) + const predX = 1 + (i + 1) * predictedRatio + const predictedValue = interpolator(predX) + values[key] = predictedValue < 0 ? lastDateData.values[key] : predictedValue + return values + }, {}), + } + }) + + // Add the new values to the data array + let extData = data.concat(newValues) + extData = extData.filter((item) => item !== undefined) + + return extData +} + +export const StackedBarChart: React.FC = ({ data, accumulate }) => { + const svgRef = React.useRef(null) + const legendRef = React.useRef(null) + + const containerDivRef = React.useRef() + const tooltipDivRef = React.useRef() + + const colorFunc: (t: number) => string | undefined = d3.interpolateRainbow + const margin = { top: 0, right: 10, bottom: 200, left: 100 } + const height = 800 - margin.top - margin.bottom + const marginLegend = 10 + const minWidth = 1900 + + const [width, setWidth] = React.useState(minWidth) + const series = getSeries(data) + const seriesCount = series.length + + React.useEffect(() => { + if (!data || data.length === 0) { + return + } + + // Prepare all data structures and predicted data + const newDates = getNewDates(data) + const combinedData = prepareData(series, data, accumulate, newDates) + + // X - values + const x_vals = combinedData.map((d) => d.date.toISOString().substring(0, 10)) + + // prepare stacked data + let stackedData + if (accumulate) { + const accumulatedData = combinedData.reduce((acc: any[], curr) => { + const last = acc[acc.length - 1] + const accumulated = { + date: curr.date, + values: Object.keys(curr.values).reduce( + (accValues: Record, key) => { + return { + ...accValues, + [key]: (last ? last.values[key] : 0) + curr.values[key], + } + }, + {} + ), + } + return [...acc, accumulated] + }, []) + + stackedData = d3 + .stack() + .offset(d3.stackOffsetNone) + .keys(series)(accumulatedData.map((d) => ({ date: d.date, ...d.values }))) + .map((ser, i) => ser.map((d) => ({ ...d, key: series[i] }))) + } else { + stackedData = d3 + .stack() + .offset(d3.stackOffsetNone) + .keys(series)(combinedData.map((d) => ({ date: d.date, ...d.values }))) + .map((ser, i) => ser.map((d) => ({ ...d, key: series[i] }))) + } + + // find max values for the X axes + const y1Max = d3.max(stackedData, (y) => d3.max(y, (d) => d[1])) + + // tooltip events + const tooltip = d3.select(tooltipDivRef.current) + + const mouseover = (d) => { + tooltip.style('opacity', 0.8) + d3.select(this).style('opacity', 0.5) + } + const mousemove = (event, d) => { + const formater = d3.format(',.2f') + tooltip + .html(d.key + ' ' + formater(d[1] - d[0]) + ' AUD') + .style('top', event.layerY - 30 + 'px') + .style('left', event.layerX - 30 + 'px') + } + const mouseleave = (d) => { + tooltip.style('opacity', 0) + d3.select(this).style('opacity', 1) + } + + const x = d3 + .scaleBand() + .domain(d3.range(x_vals.length)) + .rangeRound([margin.left, minWidth - margin.right]) + .padding(0.08) + + // calculate opacity (for new dates) + const opacity = 0.3 + const calcOpacity = (d) => { + const idx = series.indexOf(d.key) + const color = d3.color(colorFunc(idx / seriesCount)) + if (newDates.includes(d.data.date)) { + return d3.rgb(color.r, color.g, color.b, opacity) + } + + return color + } + + // get SVG reference + const svg = d3.select(svgRef.current) + + // remove prevously rendered data + svg.selectAll('g').remove() + svg.selectAll('rect').remove() + + // generate bars + const g = svg + .selectAll('g') + .data(stackedData) + .enter() + .append('g') + .attr('fill', (d, i) => colorFunc(i / seriesCount)) + .attr('id', (d, i) => `path${i}`) + + const rect = g + .selectAll('rect') + .data((d) => d) + .enter() + .append('rect') + .attr('x', (d, i) => x(i)) + .attr('y', height - margin.bottom) + .attr('width', x.bandwidth()) + .attr('height', 0) + .attr('fill', (d) => calcOpacity(d)) + .on('mouseover', mouseover) + .on('mousemove', mousemove) + .on('mouseleave', mouseleave) + + // x-axis & labels + const formatX = (val: number): string => x_vals[val] + + let x_labels: d3.Selection = + svg.select('.x-axis') + + if (x_labels.empty()) { + x_labels = svg + .append('g') + .attr('class', 'x-axis') + .attr('transform', `translate(0,${height - margin.bottom})`) + .call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)) + } else { + x_labels.call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)) + } + + // rotate x labels, if too many + if (x_vals.length > 10) { + x_labels + .selectAll('text') + .attr('transform', 'rotate(-90)') + .attr('text-anchor', 'end') + .attr('dy', '-0.55em') + .attr('dx', '-1em') + } else { + x_labels + .selectAll('text') + .attr('transform', 'rotate(0)') + .attr('text-anchor', 'middle') + .attr('dy', '0.55em') + .attr('dx', '0em') + } + + // y-axis & labels + const y = d3 + .scaleLinear() + .domain([0, y1Max]) + .range([height - margin.bottom, margin.top]) + + let y_labels: d3.Selection = + svg.select('.y-axis') + + if (y_labels.empty()) { + y_labels = svg + .append('g') + .attr('class', 'y-axis') + .attr('transform', `translate(${margin.left},0)`) + .call(d3.axisLeft(y)) + } else { + y_labels.call(d3.axisLeft(y)) + } + + // animate bars + rect.transition() + .duration(200) + .delay((d, i) => i * 5) + .attr('y', (d) => y(d[1]) || 0) + .attr('height', (d) => y(d[0]) - y(d[1])) + .transition() + .attr('x', (d, i) => x(i) || 0) + .attr('width', x.bandwidth()) + + // on Hover + const onHoverOver = (tg: HTMLElement, v) => { + d3.selectAll(`#path${v}`).style('fill-opacity', 0.5) + d3.select(tg).selectAll('circle').style('fill-opacity', 0.5) + d3.select(tg).selectAll('text').attr('font-weight', 'bold') + } + + const onHoverOut = (tg: HTMLElement, v) => { + d3.selectAll(`#path${v}`).style('fill-opacity', 1) + d3.select(tg).selectAll('circle').style('fill-opacity', 1) + d3.select(tg).selectAll('text').attr('font-weight', 'normal') + } + + const svgLegend = d3.select(legendRef.current) + + svgLegend + .selectAll('g.legend') + .data(series) + .join('g') + .attr('class', 'legend') + .attr('transform', `translate(0, ${margin.top})`) + .attr('id', (d, i) => `legend${i}`) + .attr('transform', (d, i) => `translate(${marginLegend},${marginLegend + i * 20})`) + .each(function (d, i) { + d3.select(this) + .selectAll('circle') // Replace append with selectAll + .data([d]) // Use data to bind a single data element + .join('circle') // Use join to handle enter/update/exit selections + .attr('r', 8) + .attr('fill', (d) => colorFunc(i / seriesCount)) + d3.select(this) + .selectAll('text') // Replace append with selectAll + .data([d]) // Use data to bind a single data element + .join('text') // Use join to handle enter/update/exit selections + .attr('text-anchor', 'start') + .attr('x', 10) + .attr('y', 0) + .attr('dy', '0.5em') + .text(d) + .attr('font-size', '0.8em') + d3.select(this) + .on('mouseover', (event, v) => { + const element = d3.select(`#legend${i}`) + onHoverOver(element.node(), i) + }) + .on('mouseout', (event, v) => { + const element = d3.select(`#legend${i}`) + onHoverOut(element.node(), i) + }) + }) + + // set all text to 2.5em + svg.selectAll('text').style('font-size', '2.5em') + + function updateWindowWidth() { + setWidth(containerDivRef.current?.clientWidth || 768) + } + if (containerDivRef.current) { + updateWindowWidth() + } + window.addEventListener('resize', updateWindowWidth) + }, [data, accumulate]) + + if (!data || data.length === 0) { + return <>No Data + } + + return ( + <> +
+ + +
+
+ + ) +} diff --git a/web/src/shared/components/Header/NavBar.tsx b/web/src/shared/components/Header/NavBar.tsx index 420371ced..8ee0f52b4 100644 --- a/web/src/shared/components/Header/NavBar.tsx +++ b/web/src/shared/components/Header/NavBar.tsx @@ -42,6 +42,16 @@ const billingPages = { url: '/billing/costByTime', icon: , }, + { + title: 'Cost By Analysis', + url: '/billing/costByAnalysis', + icon: , + }, + { + title: 'Cost By Category', + url: '/billing/costByCategory', + icon: , + }, { title: 'Seqr Prop Map', url: '/billing/seqrPropMap', @@ -138,8 +148,8 @@ const NavBar: React.FC = ({ fixed }) => { ]) React.useEffect(() => { - new BillingApi().getTopics().then((response) => { - if (response.status === 200) { + new BillingApi().isBillingEnabled().then((response) => { + if (response.status === 200 && response.data === true) { setMenuItems([...menuItems.slice(0, 2), billingPages, ...menuItems.slice(2)]) } }) diff --git a/web/src/shared/utilities/generateUrl.ts b/web/src/shared/utilities/generateUrl.ts new file mode 100644 index 000000000..6279c5d69 --- /dev/null +++ b/web/src/shared/utilities/generateUrl.ts @@ -0,0 +1,15 @@ +import { Dictionary } from 'lodash' + +const generateUrl = (location: Location, params: Dictionary): string => { + let paramsArray: string[] = [] + paramsArray = Object.entries(params) + .filter(([_, value]) => value !== null && value !== undefined) + .map(([key, value]) => `${key}=${value}`) + + if (paramsArray.length === 0) + return `${location.pathname}` + + return `${location.pathname}?${paramsArray.join('&')}` +} + +export default generateUrl diff --git a/web/src/shared/utilities/monthStartEndDate.ts b/web/src/shared/utilities/monthStartEndDate.ts new file mode 100644 index 000000000..089409332 --- /dev/null +++ b/web/src/shared/utilities/monthStartEndDate.ts @@ -0,0 +1,15 @@ +const getMonthStartDate = (): string => { + const now = new Date() + return `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-01` +} + +const getMonthEndDate = (): string => { + const now = new Date() + return [ + now.getFullYear(), + (now.getMonth() + 1).toString().padStart(2, '0'), + now.getDate().toString().padStart(2, '0') + ].join('-') +} + +export {getMonthStartDate, getMonthEndDate} From e63bda615bcabe574dd41a0e8ba020528d03b965 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Thu, 25 Jan 2024 16:13:37 +1100 Subject: [PATCH 3/7] Billing Enums & Filter fixes (#663) * Fixing Billing Enum imports. * Adding missing Billing Filter for day for gcp_billing_daily_cost table. --- api/routes/billing.py | 2 +- db/python/layers/billing.py | 4 +--- db/python/tables/bq/billing_base.py | 2 +- db/python/tables/bq/billing_gcp_daily.py | 9 +++++++++ models/enums/__init__.py | 1 + models/models/__init__.py | 3 --- 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/api/routes/billing.py b/api/routes/billing.py index be1fb46ff..7d93a599c 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -7,11 +7,11 @@ from api.settings import BILLING_CACHE_RESPONSE_TTL, BQ_AGGREG_VIEW from api.utils.db import BqConnection, get_author from db.python.layers.billing import BillingLayer +from models.enums import BillingSource from models.models import ( BillingColumn, BillingCostBudgetRecord, BillingHailBatchCostRecord, - BillingSource, BillingTotalCostQueryModel, BillingTotalCostRecord, ) diff --git a/db/python/layers/billing.py b/db/python/layers/billing.py index 8f991c728..737be6857 100644 --- a/db/python/layers/billing.py +++ b/db/python/layers/billing.py @@ -4,13 +4,11 @@ from db.python.tables.bq.billing_daily_extended import BillingDailyExtendedTable from db.python.tables.bq.billing_gcp_daily import BillingGcpDailyTable from db.python.tables.bq.billing_raw import BillingRawTable +from models.enums import BillingSource, BillingTimeColumn, BillingTimePeriods from models.models import ( BillingColumn, BillingCostBudgetRecord, BillingHailBatchCostRecord, - BillingSource, - BillingTimeColumn, - BillingTimePeriods, BillingTotalCostQueryModel, ) diff --git a/db/python/tables/bq/billing_base.py b/db/python/tables/bq/billing_base.py index e953c7013..cfa505ada 100644 --- a/db/python/tables/bq/billing_base.py +++ b/db/python/tables/bq/billing_base.py @@ -12,11 +12,11 @@ from db.python.tables.bq.billing_filter import BillingFilter from db.python.tables.bq.function_bq_filter import FunctionBQFilter from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from models.enums import BillingTimePeriods from models.models import ( BillingColumn, BillingCostBudgetRecord, BillingCostDetailsRecord, - BillingTimePeriods, BillingTotalCostQueryModel, ) diff --git a/db/python/tables/bq/billing_gcp_daily.py b/db/python/tables/bq/billing_gcp_daily.py index 037636993..b765547c3 100644 --- a/db/python/tables/bq/billing_gcp_daily.py +++ b/db/python/tables/bq/billing_gcp_daily.py @@ -42,6 +42,15 @@ def _query_to_partitioned_filter( if query.end_date else None, ) + # add day filter after partition filter is applied + billing_filter.day = GenericBQFilter[datetime]( + gte=datetime.strptime(query.start_date, '%Y-%m-%d') + if query.start_date + else None, + lte=datetime.strptime(query.end_date, '%Y-%m-%d') + if query.end_date + else None, + ) return billing_filter async def _last_loaded_day(self): diff --git a/models/enums/__init__.py b/models/enums/__init__.py index 14bcb9d5a..14f047786 100644 --- a/models/enums/__init__.py +++ b/models/enums/__init__.py @@ -1,3 +1,4 @@ from models.enums.analysis import AnalysisStatus +from models.enums.billing import BillingSource, BillingTimeColumn, BillingTimePeriods from models.enums.search import SearchResponseType from models.enums.web import MetaSearchEntityPrefix diff --git a/models/models/__init__.py b/models/models/__init__.py index d3b836e9a..ce5868cb1 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -17,9 +17,6 @@ BillingCostDetailsRecord, BillingHailBatchCostRecord, BillingInternal, - BillingSource, - BillingTimeColumn, - BillingTimePeriods, BillingTotalCostQueryModel, BillingTotalCostRecord, ) From 6530d46398c4f1400205e62ba77e5b09dd85dfc5 Mon Sep 17 00:00:00 2001 From: michael-harper <109899932+michael-harper@users.noreply.github.com> Date: Fri, 26 Jan 2024 15:12:45 +1100 Subject: [PATCH 4/7] Added the ability to transfer assay files across during the process. Needed to pass the 'project' variable through various function calls in order to provide copy_files_in_dict the correct project id to create paths to copy files to (#660) --- scripts/create_test_subset.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py index cc1adf52b..14f8ccd00 100755 --- a/scripts/create_test_subset.py +++ b/scripts/create_test_subset.py @@ -276,7 +276,7 @@ def transfer_samples_sgs_assays( type=sample_type or None, meta=(copy_files_in_dict(s['meta'], project) or {}), participant_id=existing_pid, - sequencing_groups=upsert_sequencing_groups(s, existing_data), + sequencing_groups=upsert_sequencing_groups(s, existing_data, project), id=existing_sid, ) @@ -289,7 +289,7 @@ def transfer_samples_sgs_assays( def upsert_sequencing_groups( - sample: dict, existing_data: dict + sample: dict, existing_data: dict, project: str ) -> list[SequencingGroupUpsert]: """Create SG Upsert Objects for a sample""" sgs_to_upsert: list[SequencingGroupUpsert] = [] @@ -306,7 +306,7 @@ def upsert_sequencing_groups( technology=sg.get('technology'), type=sg.get('type'), assays=upsert_assays( - sg, existing_sgid, existing_data, sample.get('externalId') + sg, existing_sgid, existing_data, sample.get('externalId'), project ), ) sgs_to_upsert.append(sg_upsert) @@ -315,7 +315,11 @@ def upsert_sequencing_groups( def upsert_assays( - sg: dict, existing_sgid: str | None, existing_data: dict, sample_external_id + sg: dict, + existing_sgid: str | None, + existing_data: dict, + sample_external_id, + project: str, ) -> list[AssayUpsert]: """Create Assay Upsert Objects for a sequencing group""" print(sg) @@ -325,17 +329,14 @@ def upsert_assays( # Check if assay exists if existing_sgid: _existing_assay = get_existing_assay( - existing_data, - sample_external_id, - existing_sgid, - assay + existing_data, sample_external_id, existing_sgid, assay ) existing_assay_id = _existing_assay.get('id') if _existing_assay else None assay_upsert = AssayUpsert( type=assay.get('type'), id=existing_assay_id, external_ids=assay.get('externalIds') or {}, - meta=assay.get('meta'), + meta=copy_files_in_dict(assay.get('meta'), project), ) assays_to_upsert.append(assay_upsert) From 733330cbe2da19449efd791cccdb64568f3c1fc7 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Mon, 29 Jan 2024 09:55:35 +1100 Subject: [PATCH 5/7] Billing fixing enum to string issue. (#665) --- db/python/tables/bq/billing_base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/db/python/tables/bq/billing_base.py b/db/python/tables/bq/billing_base.py index cfa505ada..335603c3b 100644 --- a/db/python/tables/bq/billing_base.py +++ b/db/python/tables/bq/billing_base.py @@ -12,7 +12,7 @@ from db.python.tables.bq.billing_filter import BillingFilter from db.python.tables.bq.function_bq_filter import FunctionBQFilter from db.python.tables.bq.generic_bq_filter import GenericBQFilter -from models.enums import BillingTimePeriods +from models.enums import BillingTimeColumn, BillingTimePeriods from models.models import ( BillingColumn, BillingCostBudgetRecord, @@ -44,26 +44,26 @@ def prepare_time_periods( query: BillingTotalCostQueryModel, ) -> TimeGroupingDetails: """Prepare Time periods grouping and parsing formulas""" - time_column = query.time_column or BillingTimePeriods.DAY + time_column: BillingTimeColumn = query.time_column or BillingTimeColumn.DAY # Based on specified time period, add the corresponding column if query.time_periods == BillingTimePeriods.DAY: return TimeGroupingDetails( - field=f'FORMAT_DATE("%Y-%m-%d", {time_column}) as day', + field=f'FORMAT_DATE("%Y-%m-%d", {time_column.value}) as day', formula='PARSE_DATE("%Y-%m-%d", day) as day', separator=',', ) if query.time_periods == BillingTimePeriods.WEEK: return TimeGroupingDetails( - field=f'FORMAT_DATE("%Y%W", {time_column}) as day', + field=f'FORMAT_DATE("%Y%W", {time_column.value}) as day', formula='PARSE_DATE("%Y%W", day) as day', separator=',', ) if query.time_periods == BillingTimePeriods.MONTH: return TimeGroupingDetails( - field=f'FORMAT_DATE("%Y%m", {time_column}) as day', + field=f'FORMAT_DATE("%Y%m", {time_column.value}) as day', formula='PARSE_DATE("%Y%m", day) as day', separator=',', ) From 91b5ff78658e10a7735d20caeb8c2889df99effe Mon Sep 17 00:00:00 2001 From: michael-harper <109899932+michael-harper@users.noreply.github.com> Date: Mon, 29 Jan 2024 10:36:45 +1100 Subject: [PATCH 6/7] Functionality: Adding sequencing-type as a parameter to allow ingestion of exome data (#666) * adding sequencing-type as a parameter for future-proofing when exome data comes along and sequencing groups can be ingested as exomes instead of defaulting to genome * removing requirment to have sequencing_type as input param as we have default set to genome * adding sequencing_type as a parameter to the tests * This commit adds a new unit test to verify that the sequencing type is set correctly in the ExistingCohortParser class when the --sequencing-type flag is set to 'genome' or 'exome'. The test checks both 'genome' and 'exome' options. * Add unit test for validating sequencing type in assay metadata * splitting test for 'genome' and 'exome' sequencing_type's into two separate tests --- scripts/parse_existing_cohort.py | 9 +++ test/test_parse_existing_cohort.py | 119 +++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/scripts/parse_existing_cohort.py b/scripts/parse_existing_cohort.py index 34d64f370..2603a8d60 100644 --- a/scripts/parse_existing_cohort.py +++ b/scripts/parse_existing_cohort.py @@ -111,6 +111,7 @@ def __init__( batch_number, include_participant_column, allow_missing_files, + sequencing_type, ): if include_participant_column: participant_column = Columns.PARTICIPANT_COLUMN @@ -131,6 +132,7 @@ def __init__( assay_meta_map=Columns.sequence_meta_map(), batch_number=batch_number, allow_extra_files_in_search_path=True, + default_sequencing_type=sequencing_type, ) def _get_dict_reader(self, file_pointer, delimiter: str): @@ -210,6 +212,11 @@ def get_existing_external_sequence_ids(self, participant_map: dict[str, dict]): '--project', help='The metamist project to import manifest into', ) +@click.option( + '--sequencing-type', + type=click.Choice(['genome', 'exome']), + help='Sequencing type: genome or exome', +) @click.option('--search-location', 'search_locations', multiple=True) @click.option( '--confirm', is_flag=True, help='Confirm with user input before updating server' @@ -236,6 +243,7 @@ async def main( dry_run=False, include_participant_column=False, allow_missing_files=False, + sequencing_type: str = 'genome', ): """Run script from CLI arguments""" @@ -245,6 +253,7 @@ async def main( batch_number=batch_number, include_participant_column=include_participant_column, allow_missing_files=allow_missing_files, + sequencing_type=sequencing_type, ) for manifest_path in manifests: diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index d8e755bf7..8fb169803 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -45,6 +45,7 @@ async def test_single_row( search_locations=[], project=self.project_name, allow_missing_files=False, + sequencing_type='genome', ) parser.filename_map = { @@ -116,6 +117,7 @@ async def test_no_header(self): search_locations=[], project=self.project_name, allow_missing_files=False, + sequencing_type='genome', ) parser.filename_map = { @@ -217,6 +219,7 @@ async def test_existing_row( search_locations=[], project=self.project_name, allow_missing_files=False, + sequencing_type='genome', ) parser.filename_map = { @@ -248,6 +251,7 @@ async def test_get_read_filenames_no_reads_fail(self): search_locations=[], project=self.project_name, allow_missing_files=False, + sequencing_type='genome', ) parser.filename_map = {} @@ -268,6 +272,7 @@ async def test_get_read_filenames_no_reads_pass(self): search_locations=[], project=self.project_name, allow_missing_files=True, + sequencing_type='genome', ) parser.filename_map = {} @@ -280,3 +285,117 @@ async def test_get_read_filenames_no_reads_pass(self): self.assertIn('No read files found for ', cm.output[0]) self.assertEqual(len(read_filenames), 0) + + @run_as_sync + async def test_genome_sequencing_type(self): + """Test that the sequencing type is set correctly when the --sequencing-type flag is set to 'genome''""" + + # Test with 'genome' + parser = ExistingCohortParser( + include_participant_column=False, + batch_number='M01', + search_locations=[], + project=self.project_name, + allow_missing_files=True, + sequencing_type='genome', + ) + self.assertEqual(parser.default_sequencing_type, 'genome') + + @run_as_sync + async def test_exome_sequencing_type(self): + """Test that the sequencing type is set correctly when the --sequencing-type flag is set to 'exome'""" + + # Test with 'exome' + parser = ExistingCohortParser( + include_participant_column=False, + batch_number='M01', + search_locations=[], + project=self.project_name, + allow_missing_files=True, + sequencing_type='exome', + ) + self.assertEqual(parser.default_sequencing_type, 'exome') + + @run_as_sync + @patch('metamist.parser.generic_parser.query_async') + @patch('metamist.parser.cloudhelper.CloudHelper.datetime_added') + @patch('metamist.parser.cloudhelper.CloudHelper.file_exists') + @patch('metamist.parser.cloudhelper.CloudHelper.file_size') + async def test_sequencing_type_in_assay_meta( + self, + mock_filesize, + mock_fileexists, + mock_datetime_added, + mock_graphql_query, + ): + """Test that the sequencing type is set correctly when the --sequencing-type flag is set to 'genome' or 'exome'""" + + mock_graphql_query.side_effect = self.run_graphql_query_async + + mock_filesize.return_value = 111 + mock_fileexists.return_value = False + mock_datetime_added.return_value = datetime.fromisoformat('2022-02-02T22:22:22') + + rows = [ + 'HEADER', + '""', + 'Application\tExternal ID\tSample Concentration (ng/ul)\tVolume (uL)\tSex\tSample/Name\tReference Genome\tParticipant ID\t', + 'App\tEXTID1234\t100\t100\tFemale\t220405_FLUIDX1234\thg38\tPID123', + ] + + for sequencing_type in ['genome', 'exome']: + with self.subTest(sequencing_type=sequencing_type): + parser = ExistingCohortParser( + include_participant_column=False, + batch_number='M01', + search_locations=[], + project=self.project_name, + allow_missing_files=False, + sequencing_type=sequencing_type, + ) + parser.filename_map = { + 'HG3F_2_220405_FLUIDX1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq': '/path/to/HG3F_2_220405_FLUIDX1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq', + 'HG3F_2_220405_FLUIDX1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq': '/path/to/HG3F_2_220405_FLUIDX1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq', + } + + file_contents = '\n'.join(rows) + participants: list[ParsedParticipant] + _, participants = await parser.parse_manifest( + StringIO(file_contents), delimiter='\t', dry_run=True + ) + + sample_to_add = participants[0].samples[0] + expected_sequence_dict = { + 'reference_genome': 'hg38', + 'platform': 'App', + 'concentration': '100', + 'volume': '100', + 'fluid_x_tube_id': '220405_FLUIDX1234', + 'reads_type': 'fastq', + 'reads': [ + { + 'location': '/path/to/HG3F_2_220405_FLUIDX1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq', + 'basename': 'HG3F_2_220405_FLUIDX1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq', + 'class': 'File', + 'checksum': None, + 'size': 111, + 'datetime_added': '2022-02-02T22:22:22', + }, + { + 'location': '/path/to/HG3F_2_220405_FLUIDX1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq', + 'basename': 'HG3F_2_220405_FLUIDX1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq', + 'class': 'File', + 'checksum': None, + 'size': 111, + 'datetime_added': '2022-02-02T22:22:22', + }, + ], + 'sequencing_platform': 'illumina', + 'sequencing_technology': 'short-read', + 'sequencing_type': f'{sequencing_type}', + 'batch': 'M01', + } + assay = sample_to_add.sequencing_groups[0].assays[0] + self.maxDiff = None + self.assertDictEqual(expected_sequence_dict, assay.meta) + return From 6134af94fc0d132080416aa5a86805c7c217936e Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Tue, 30 Jan 2024 16:04:22 +1100 Subject: [PATCH 7/7] Billing - Small typescript fixes for Safari browser. (#670) * Small typescript fixes for Safari browser. --- web/src/pages/billing/BillingInvoiceMonthCost.tsx | 2 +- web/src/shared/components/Graphs/DonutChart.tsx | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/web/src/pages/billing/BillingInvoiceMonthCost.tsx b/web/src/pages/billing/BillingInvoiceMonthCost.tsx index 3dcfc9e48..e06324670 100644 --- a/web/src/pages/billing/BillingInvoiceMonthCost.tsx +++ b/web/src/pages/billing/BillingInvoiceMonthCost.tsx @@ -169,7 +169,7 @@ const BillingCurrentCost = () => { const year = invoiceMonth.substring(0, 4) const month = invoiceMonth.substring(4, 6) let nextYear = year - let nextMonth = (parseInt(month, 10) + 1).toString() + let nextMonth = (parseInt(month, 10) + 1).toString().padStart(2, '0') if (month === '12') { nextYear = (parseInt(year, 10) + 1).toString() nextMonth = '01' diff --git a/web/src/shared/components/Graphs/DonutChart.tsx b/web/src/shared/components/Graphs/DonutChart.tsx index 208b8a0df..02f46292b 100644 --- a/web/src/shared/components/Graphs/DonutChart.tsx +++ b/web/src/shared/components/Graphs/DonutChart.tsx @@ -79,14 +79,15 @@ export const DonutChart: React.FC = ({ data, maxSlices, colors const margin = 15 const radius = Math.min(width, height) / 2 - margin - // keep order of the slices + // keep order of the slices, declare custom sort function to keep order of slices as passed in + // by default pie function starts from index 1 and sorts by value const pieFnc = pie() .value((d) => d.value) .sort((a) => { if (typeof a === 'object' && a.type === 'inc') { return 1 } - return -1 + return 0 // works both on Safari and Firefox, any other value will break one of them }) const data_ready = pieFnc(data) const innerRadius = radius / 1.75 // inner radius of pie, in pixels (non-zero for donut)