From e517bbaf61d8d3add686505bea5051e7d2eeb86a Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Wed, 22 Nov 2023 18:53:28 +1100 Subject: [PATCH 01/34] Billing api extra labels (#619) * Added compute_category, cromwell_sub_workflow_name, cromwell_workflow_id, goog_pipelines_worker and wdl_task_name to extended view and created relevant filters and API points. * Added labels to all BQ queries, refactoring billing layer. * Added examples to billing-total-cost API regarding the new filters. --- api/routes/billing.py | 117 +++++++++- db/python/layers/__init__.py | 2 + .../layers/{billing.py => billing_db.py} | 211 +++++------------- db/python/layers/billing_layer.py | 157 +++++++++++++ models/models/billing.py | 23 +- 5 files changed, 347 insertions(+), 163 deletions(-) rename db/python/layers/{billing.py => billing_db.py} (87%) create mode 100644 db/python/layers/billing_layer.py diff --git a/api/routes/billing.py b/api/routes/billing.py index bdc0d8b52..474af2c06 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -1,25 +1,21 @@ """ Billing routes """ -from fastapi import APIRouter from async_lru import alru_cache +from fastapi import APIRouter from api.settings import BILLING_CACHE_RESPONSE_TTL -from api.utils.db import ( - BqConnection, - get_author, -) -from db.python.layers.billing import BillingLayer +from api.utils.db import BqConnection, get_author +from db.python.layers.billing_layer import BillingLayer from models.models.billing import ( BillingColumn, BillingCostBudgetRecord, BillingQueryModel, BillingRowRecord, - BillingTotalCostRecord, BillingTotalCostQueryModel, + BillingTotalCostRecord, ) - router = APIRouter(prefix='/billing', tags=['billing']) @@ -169,6 +165,63 @@ async def get_sequencing_groups( return records +@router.get( + '/compute-categories', + response_model=list[str], + operation_id='getComputeCategories', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_compute_categories( + author: str = get_author, +) -> list[str]: + """ + Get list of all compute categories in database + Results are sorted ASC + """ + connection = BqConnection(author) + billing_layer = BillingLayer(connection) + records = await billing_layer.get_compute_categories() + return records + + +@router.get( + '/cromwell-sub-workflow-names', + response_model=list[str], + operation_id='getCromwellSubWorkflowNames', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_cromwell_sub_workflow_names( + author: str = get_author, +) -> list[str]: + """ + Get list of all cromwell_sub_workflow_names in database + Results are sorted ASC + """ + connection = BqConnection(author) + billing_layer = BillingLayer(connection) + records = await billing_layer.get_cromwell_sub_workflow_names() + return records + + +@router.get( + '/wdl-task-names', + response_model=list[str], + operation_id='getWdlTaskNames', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_wdl_task_names( + author: str = get_author, +) -> list[str]: + """ + Get list of all wdl_task_names in database + Results are sorted ASC + """ + connection = BqConnection(author) + billing_layer = BillingLayer(connection) + records = await billing_layer.get_wdl_task_names() + return records + + @router.get( '/invoice-months', response_model=list[str], @@ -341,6 +394,52 @@ async def get_total_cost( "order_by": {"cost": true} } + 12. Get total cost by compute_category order by cost DESC: + + { + "fields": ["compute_category"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "order_by": {"cost": true} + } + + 13. Get total cost by cromwell_sub_workflow_name, order by cost DESC: + + { + "fields": ["cromwell_sub_workflow_name"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "order_by": {"cost": true} + } + + 14. Get total cost by sku for given cromwell_workflow_id, order by cost DESC: + + { + "fields": ["sku"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "filters": {"cromwell_workflow_id": "cromwell-00448f7b-8ef3-4d22-80ab-e302acdb2d28"}, + "order_by": {"cost": true} + } + + 15. Get total cost by sku for given goog_pipelines_worker, order by cost DESC: + + { + "fields": ["goog_pipelines_worker"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "order_by": {"cost": true} + } + + 16. Get total cost by sku for given wdl_task_name, order by cost DESC: + + { + "fields": ["wdl_task_name"], + "start_date": "2023-11-10", + "end_date": "2023-11-10", + "order_by": {"cost": true} + } + """ connection = BqConnection(author) @@ -363,7 +462,7 @@ async def get_running_costs( ) -> list[BillingCostBudgetRecord]: """ Get running cost for specified fields in database - e.g. fields = ['gcp_project', 'topic'] + e.g. fields = ['gcp_project', 'topic', 'wdl_task_names', 'cromwell_sub_workflow_name', 'compute_category'] """ # TODO replace alru_cache with async-cache? diff --git a/db/python/layers/__init__.py b/db/python/layers/__init__.py index f67ab78c6..43d474fcc 100644 --- a/db/python/layers/__init__.py +++ b/db/python/layers/__init__.py @@ -1,6 +1,8 @@ from db.python.layers.analysis import AnalysisLayer from db.python.layers.assay import AssayLayer from db.python.layers.base import BaseLayer +from db.python.layers.billing_db import BillingDb +from db.python.layers.billing_layer import BillingLayer from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer from db.python.layers.sample import SampleLayer diff --git a/db/python/layers/billing.py b/db/python/layers/billing_db.py similarity index 87% rename from db/python/layers/billing.py rename to db/python/layers/billing_db.py index 93ce3cfc7..108512e85 100644 --- a/db/python/layers/billing.py +++ b/db/python/layers/billing_db.py @@ -1,31 +1,30 @@ import re - -from typing import Any -from datetime import datetime from collections import Counter, defaultdict -from google.cloud import bigquery - -from models.models import ( - BillingRowRecord, - BillingTotalCostRecord, - BillingTotalCostQueryModel, - BillingColumn, - BillingCostBudgetRecord, -) +from datetime import datetime +from typing import Any -from db.python.gcp_connect import BqDbBase -from db.python.layers.bq_base import BqBaseLayer -from db.python.tables.billing import BillingFilter +from google.cloud import bigquery from api.settings import ( - BQ_DAYS_BACK_OPTIMAL, - BQ_AGGREG_VIEW, - BQ_AGGREG_RAW, BQ_AGGREG_EXT_VIEW, + BQ_AGGREG_RAW, + BQ_AGGREG_VIEW, BQ_BUDGET_VIEW, + BQ_DAYS_BACK_OPTIMAL, BQ_GCP_BILLING_VIEW, ) from api.utils.dates import get_invoice_month_range, reformat_datetime +from db.python.gcp_connect import BqDbBase +from db.python.tables.billing import BillingFilter +from models.models import ( + BillingColumn, + BillingCostBudgetRecord, + BillingRowRecord, + BillingTotalCostQueryModel, + BillingTotalCostRecord, +) + +BQ_LABELS = {'source': 'metamist-api'} def abbrev_cost_category(cost_category: str) -> str: @@ -33,126 +32,6 @@ def abbrev_cost_category(cost_category: str) -> str: return 'S' if cost_category == 'Cloud Storage' else 'C' -class BillingLayer(BqBaseLayer): - """Billing layer""" - - async def get_gcp_projects( - self, - ) -> list[str] | None: - """ - Get All GCP projects in database - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_gcp_projects() - - async def get_topics( - self, - ) -> list[str] | None: - """ - Get All topics in database - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_topics() - - async def get_cost_categories( - self, - ) -> list[str] | None: - """ - Get All service description / cost categories in database - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_cost_categories() - - async def get_skus( - self, - limit: int | None = None, - offset: int | None = None, - ) -> list[str] | None: - """ - Get All SKUs in database - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_skus(limit, offset) - - async def get_datasets( - self, - ) -> list[str] | None: - """ - Get All datasets in database - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('dataset') - - async def get_stages( - self, - ) -> list[str] | None: - """ - Get All stages in database - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('stage') - - async def get_sequencing_types( - self, - ) -> list[str] | None: - """ - Get All sequencing_types in database - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('sequencing_type') - - async def get_sequencing_groups( - self, - ) -> list[str] | None: - """ - Get All sequencing_groups in database - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('sequencing_group') - - async def get_invoice_months( - self, - ) -> list[str] | None: - """ - Get All invoice months in database - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_invoice_months() - - async def query( - self, - _filter: BillingFilter, - limit: int = 10, - ) -> list[BillingRowRecord] | None: - """ - Get Billing record for the given gilter - """ - billing_db = BillingDb(self.connection) - return await billing_db.query(_filter, limit) - - async def get_total_cost( - self, - query: BillingTotalCostQueryModel, - ) -> list[BillingTotalCostRecord] | None: - """ - Get Total cost of selected fields for requested time interval - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_total_cost(query) - - async def get_running_cost( - self, - field: BillingColumn, - invoice_month: str | None = None, - source: str | None = None, - ) -> list[BillingCostBudgetRecord]: - """ - Get Running costs including monthly budget - """ - billing_db = BillingDb(self.connection) - return await billing_db.get_running_cost(field, invoice_month, source) - - class BillingDb(BqDbBase): """Db layer for billing related routes""" @@ -178,7 +57,8 @@ async def get_gcp_projects(self): bigquery.ScalarQueryParameter( 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) ), - ] + ], + labels=BQ_LABELS, ) query_job_result = list( @@ -212,7 +92,8 @@ async def get_topics(self): bigquery.ScalarQueryParameter( 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) ), - ] + ], + labels=BQ_LABELS, ) query_job_result = list( @@ -234,7 +115,11 @@ async def get_invoice_months(self): ORDER BY invoice_month DESC; """ - query_job_result = list(self._connection.connection.query(_query).result()) + job_config = bigquery.QueryJobConfig(labels=BQ_LABELS) + + query_job_result = list( + self._connection.connection.query(_query, job_config=job_config).result() + ) if query_job_result: return [str(dict(row)['invoice_month']) for row in query_job_result] @@ -263,7 +148,8 @@ async def get_cost_categories(self): bigquery.ScalarQueryParameter( 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) ), - ] + ], + labels=BQ_LABELS, ) query_job_result = list( @@ -309,7 +195,8 @@ async def get_skus( ), bigquery.ScalarQueryParameter('limit_val', 'INT64', limit), bigquery.ScalarQueryParameter('offset_val', 'INT64', offset), - ] + ], + labels=BQ_LABELS, ) query_job_result = list( @@ -323,10 +210,13 @@ async def get_skus( async def get_extended_values(self, field: str): """ - Get all extended values in database, - e.g. dataset, stage, sequencing_type or sequencing_group + Get all extended values in database, for specified field. + Field is one of extended coumns. """ + if field not in BillingColumn.extended_cols(): + raise ValueError('Invalid field value') + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL # this day > filter is to limit the amount of data scanned, @@ -347,7 +237,8 @@ async def get_extended_values(self, field: str): bigquery.ScalarQueryParameter( 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) ), - ] + ], + labels=BQ_LABELS, ) query_job_result = list( @@ -418,7 +309,9 @@ async def query( bigquery.ScalarQueryParameter('limit_val', 'INT64', limit) ) - job_config = bigquery.QueryJobConfig(query_parameters=query_parameters) + job_config = bigquery.QueryJobConfig( + query_parameters=query_parameters, labels=BQ_LABELS + ) query_job_result = list( self._connection.connection.query(_query, job_config=job_config).result() ) @@ -531,7 +424,9 @@ async def get_total_cost( bigquery.ScalarQueryParameter('offset_val', 'INT64', query.offset) ) - job_config = bigquery.QueryJobConfig(query_parameters=query_parameters) + job_config = bigquery.QueryJobConfig( + query_parameters=query_parameters, labels=BQ_LABELS + ) query_job_result = list( self._connection.connection.query(_query, job_config=job_config).result() ) @@ -565,7 +460,10 @@ async def get_budgets_by_gcp_project( ON d.gcp_project = t.gcp_project AND d.created_at = t.last_created_at """ - query_job_result = list(self._connection.connection.query(_query).result()) + job_config = bigquery.QueryJobConfig(labels=BQ_LABELS) + query_job_result = list( + self._connection.connection.query(_query, job_config=job_config).result() + ) if query_job_result: return {row.gcp_project: row.budget for row in query_job_result} @@ -591,7 +489,8 @@ async def get_last_loaded_day(self): bigquery.ScalarQueryParameter( 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) ), - ] + ], + labels=BQ_LABELS, ) query_job_result = list( @@ -764,7 +663,9 @@ async def execute_running_cost_query( list( self._connection.connection.query( _query, - job_config=bigquery.QueryJobConfig(query_parameters=query_params), + job_config=bigquery.QueryJobConfig( + query_parameters=query_params, labels=BQ_LABELS + ), ).result() ), ) @@ -893,8 +794,14 @@ async def get_running_cost( BillingColumn.TOPIC, BillingColumn.PROJECT, BillingColumn.DATASET, + BillingColumn.COMPUTE_CATEGORY, + BillingColumn.WDL_TASK_NAME, + BillingColumn.CROMWELL_SUB_WORKFLOW_NAME, ): - raise ValueError('Invalid field only topic, dataset or project allowed') + raise ValueError( + 'Invalid field only topic, dataset, gcp-project, compute_category, ' + 'wdl_task_name & cromwell_sub_workflow_name are allowed' + ) ( is_current_month, diff --git a/db/python/layers/billing_layer.py b/db/python/layers/billing_layer.py new file mode 100644 index 000000000..9ba5883b3 --- /dev/null +++ b/db/python/layers/billing_layer.py @@ -0,0 +1,157 @@ +from db.python.layers.billing_db import BillingDb +from db.python.layers.bq_base import BqBaseLayer +from db.python.tables.billing import BillingFilter +from models.models import ( + BillingColumn, + BillingCostBudgetRecord, + BillingRowRecord, + BillingTotalCostQueryModel, + BillingTotalCostRecord, +) + + +class BillingLayer(BqBaseLayer): + """Billing layer""" + + async def get_gcp_projects( + self, + ) -> list[str] | None: + """ + Get All GCP projects in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_gcp_projects() + + async def get_topics( + self, + ) -> list[str] | None: + """ + Get All topics in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_topics() + + async def get_cost_categories( + self, + ) -> list[str] | None: + """ + Get All service description / cost categories in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_cost_categories() + + async def get_skus( + self, + limit: int | None = None, + offset: int | None = None, + ) -> list[str] | None: + """ + Get All SKUs in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_skus(limit, offset) + + async def get_datasets( + self, + ) -> list[str] | None: + """ + Get All datasets in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_extended_values('dataset') + + async def get_stages( + self, + ) -> list[str] | None: + """ + Get All stages in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_extended_values('stage') + + async def get_sequencing_types( + self, + ) -> list[str] | None: + """ + Get All sequencing_types in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_extended_values('sequencing_type') + + async def get_sequencing_groups( + self, + ) -> list[str] | None: + """ + Get All sequencing_groups in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_extended_values('sequencing_group') + + async def get_compute_categories( + self, + ) -> list[str] | None: + """ + Get All compute_category values in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_extended_values('compute_category') + + async def get_cromwell_sub_workflow_names( + self, + ) -> list[str] | None: + """ + Get All cromwell_sub_workflow_name values in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_extended_values('cromwell_sub_workflow_name') + + async def get_wdl_task_names( + self, + ) -> list[str] | None: + """ + Get All wdl_task_name values in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_extended_values('wdl_task_name') + + async def get_invoice_months( + self, + ) -> list[str] | None: + """ + Get All invoice months in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_invoice_months() + + async def query( + self, + _filter: BillingFilter, + limit: int = 10, + ) -> list[BillingRowRecord] | None: + """ + Get Billing record for the given gilter + """ + billing_db = BillingDb(self.connection) + return await billing_db.query(_filter, limit) + + async def get_total_cost( + self, + query: BillingTotalCostQueryModel, + ) -> list[BillingTotalCostRecord] | None: + """ + Get Total cost of selected fields for requested time interval + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_total_cost(query) + + async def get_running_cost( + self, + field: BillingColumn, + invoice_month: str | None = None, + source: str | None = None, + ) -> list[BillingCostBudgetRecord]: + """ + Get Running costs including monthly budget + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_running_cost(field, invoice_month, source) diff --git a/models/models/billing.py b/models/models/billing.py index 481ea77ce..bc0efb2df 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -3,7 +3,6 @@ from db.python.tables.billing import BillingFilter from db.python.utils import GenericFilter - from models.base import SMBase @@ -138,6 +137,11 @@ class BillingColumn(str, Enum): SEQUENCING_TYPE = 'sequencing_type' STAGE = 'stage' SEQUENCING_GROUP = 'sequencing_group' + COMPUTE_CATEGORY = 'compute_category' + CROMWELL_SUB_WORKFLOW_NAME = 'cromwell_sub_workflow_name' + CROMWELL_WORKFLOW_ID = 'cromwell_workflow_id' + GOOG_PIPELINES_WORKER = 'goog_pipelines_worker' + WDL_TASK_NAME = 'wdl_task_name' @classmethod def extended_cols(cls) -> list[str]: @@ -148,7 +152,12 @@ def extended_cols(cls) -> list[str]: 'sequencing_type', 'stage', 'sequencing_group', - 'ar_guid' + 'ar_guid', + 'compute_category', + 'cromwell_sub_workflow_name', + 'cromwell_workflow_id', + 'goog_pipelines_worker', + 'wdl_task_name', ] @staticmethod @@ -200,6 +209,11 @@ class BillingTotalCostRecord(SMBase): sequencing_type: str | None stage: str | None sequencing_group: str | None + compute_category: str | None + cromwell_sub_workflow_name: str | None + cromwell_workflow_id: str | None + goog_pipelines_worker: str | None + wdl_task_name: str | None cost: float currency: str | None @@ -219,6 +233,11 @@ def from_json(record): sequencing_type=record.get('sequencing_type'), stage=record.get('stage'), sequencing_group=record.get('sequencing_group'), + compute_category=record.get('compute_category'), + cromwell_sub_workflow_name=record.get('cromwell_sub_workflow_name'), + cromwell_workflow_id=record.get('cromwell_workflow_id'), + goog_pipelines_worker=record.get('goog_pipelines_worker'), + wdl_task_name=record.get('wdl_task_name'), cost=record.get('cost'), currency=record.get('currency'), ) From e7116eb51decce3719d7e15476dc82776a790cb3 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Mon, 27 Nov 2023 14:20:42 +1100 Subject: [PATCH 02/34] Billing - fixing styling issues after the first Billing release (#624) * Temporarily disable seqr and hail from /topics API. * Autoselect 1st topic / 1st project value from the DDL. * Merging Billing.css into index.css * Small fix - reusing extRecords in FieldSelector component. * Refactoring duplicated code in FieldSelector. * Added Stages to the Group by DDL. --- db/python/layers/billing_db.py | 3 + web/src/index.css | 41 ++++++++++++++ web/src/pages/billing/Billing.css | 15 ----- web/src/pages/billing/BillingCostByTime.tsx | 2 + .../pages/billing/BillingInvoiceMonthCost.tsx | 6 +- .../billing/components/FieldSelector.tsx | 55 ++++++++++++------- 6 files changed, 81 insertions(+), 41 deletions(-) delete mode 100644 web/src/pages/billing/Billing.css diff --git a/db/python/layers/billing_db.py b/db/python/layers/billing_db.py index 108512e85..31191cf15 100644 --- a/db/python/layers/billing_db.py +++ b/db/python/layers/billing_db.py @@ -84,6 +84,8 @@ async def get_topics(self): WHERE day > TIMESTAMP_ADD( CURRENT_TIMESTAMP(), INTERVAL @days DAY ) + -- TODO put this back when reloading is fixed + AND NOT topic IN ('seqr', 'hail') ORDER BY topic ASC; """ @@ -794,6 +796,7 @@ async def get_running_cost( BillingColumn.TOPIC, BillingColumn.PROJECT, BillingColumn.DATASET, + BillingColumn.STAGE, BillingColumn.COMPUTE_CATEGORY, BillingColumn.WDL_TASK_NAME, BillingColumn.CROMWELL_SUB_WORKFLOW_NAME, diff --git a/web/src/index.css b/web/src/index.css index 65dd69ccd..4c28ca4f0 100644 --- a/web/src/index.css +++ b/web/src/index.css @@ -200,3 +200,44 @@ html[data-theme='dark-mode'] .ui.table { [class^='language-'] { border: none; } + +#group-by-dropdown .menu { + background: #ffffff !important; +} + +.field-selector-label { + width: 200px !important; +} + +.field-selector-dropdown { + width: 80% !important; +} + +.donut-chart { + margin-top: 20px; +} + +/* missing styles regarding dark/light theme */ +.ui.fluid.card { + color: var(--color-text-primary); + background: var(--color-bg); + fill: var(--color-text-primary); +} + +.ui.header { + color: var(--color-text-primary); +} + +.ui.input > input { + color: var(--color-text-primary); + background: var(--color-bg-card); +} + +.ui.toggle.checkbox label { + color: var(--color-text-primary); +} + +.ui.toggle.checkbox input:checked ~ .box, +.ui.toggle.checkbox input:checked ~ label { + color: var(--color-text-primary) !important; +} diff --git a/web/src/pages/billing/Billing.css b/web/src/pages/billing/Billing.css deleted file mode 100644 index 68d62cfe0..000000000 --- a/web/src/pages/billing/Billing.css +++ /dev/null @@ -1,15 +0,0 @@ -#group-by-dropdown .menu { - background: #ffffff !important; -} - -.field-selector-label { - width: 200px !important; -} - -.field-selector-dropdown { - width: 80% !important; -} - -.donut-chart { - margin-top: 20px; -} diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx index 629b3ee08..84d22f0e9 100644 --- a/web/src/pages/billing/BillingCostByTime.tsx +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -230,6 +230,7 @@ const BillingCostByTime: React.FunctionComponent = () => { fieldName="Group" onClickFunction={onGroupBySelect} selected={groupBy} + autoSelect={false} /> @@ -240,6 +241,7 @@ const BillingCostByTime: React.FunctionComponent = () => { onClickFunction={onSelect} selected={selectedData} includeAll={true} + autoSelect={true} /> diff --git a/web/src/pages/billing/BillingInvoiceMonthCost.tsx b/web/src/pages/billing/BillingInvoiceMonthCost.tsx index d7793d265..da6190b17 100644 --- a/web/src/pages/billing/BillingInvoiceMonthCost.tsx +++ b/web/src/pages/billing/BillingInvoiceMonthCost.tsx @@ -2,14 +2,10 @@ import * as React from 'react' import { Link, useSearchParams, useNavigate, useLocation } from 'react-router-dom' import { Table as SUITable, Message, Button, Checkbox, Dropdown, Grid } from 'semantic-ui-react' import _ from 'lodash' - import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' import Table from '../../shared/components/Table' import { BillingApi, BillingColumn, BillingCostBudgetRecord } from '../../sm-api' - -import './Billing.css' import FieldSelector from './components/FieldSelector' - import { convertFieldName } from '../../shared/utilities/fieldName' const BillingCurrentCost = () => { @@ -56,7 +52,7 @@ const BillingCurrentCost = () => { const [lastLoadedDay, setLastLoadedDay] = React.useState() const getCosts = (grp: BillingColumn, invoiceMth: string | undefined) => { - updateNav(groupBy, invoiceMth) + updateNav(grp, invoiceMth) setIsLoading(true) setError(undefined) let source = 'aggregate' diff --git a/web/src/pages/billing/components/FieldSelector.tsx b/web/src/pages/billing/components/FieldSelector.tsx index e6e86450a..438933d69 100644 --- a/web/src/pages/billing/components/FieldSelector.tsx +++ b/web/src/pages/billing/components/FieldSelector.tsx @@ -1,11 +1,6 @@ import * as React from 'react' - import { Dropdown, Input, Message } from 'semantic-ui-react' - import { BillingApi, BillingColumn } from '../../../sm-api' - -import '../Billing.css' - import { convertFieldName } from '../../../shared/utilities/fieldName' interface FieldSelectorProps { @@ -13,6 +8,7 @@ interface FieldSelectorProps { fieldName: string selected?: string includeAll?: boolean + autoSelect?: boolean onClickFunction: (_: any, { value }: any) => void } @@ -21,20 +17,28 @@ const FieldSelector: React.FunctionComponent = ({ fieldName, selected, includeAll, + autoSelect, onClickFunction, }) => { const [loading, setLoading] = React.useState(true) const [error, setError] = React.useState() const [records, setRecords] = React.useState([]) - const extendRecords = (records: string[]) => { + const extendRecords = (recs: string[]) => { if (includeAll) { - if (fieldName === 'GCP-Project') { - return [`All ${convertFieldName(fieldName)}`, ...records] - } - return [`All ${convertFieldName(fieldName)}s`, ...records] + return [`All ${convertFieldName(fieldName)}s`, ...recs] + } + return recs + } + + const processResponse = (response_data: string[]) => { + setLoading(false) + const extRecords = extendRecords(response_data) + setRecords(extRecords) + if (!selected && autoSelect) { + // set the first option as the default + onClickFunction(undefined, { value: extRecords[0] }) } - return records } const getTopics = () => { @@ -43,8 +47,7 @@ const FieldSelector: React.FunctionComponent = ({ new BillingApi() .getTopics() .then((response) => { - setLoading(false) - setRecords(extendRecords(response.data)) + processResponse(response.data) }) .catch((er) => setError(er.message)) } @@ -55,8 +58,7 @@ const FieldSelector: React.FunctionComponent = ({ new BillingApi() .getGcpProjects() .then((response) => { - setLoading(false) - setRecords(extendRecords(response.data)) + processResponse(response.data) }) .catch((er) => setError(er.message)) } @@ -67,23 +69,34 @@ const FieldSelector: React.FunctionComponent = ({ new BillingApi() .getInvoiceMonths() .then((response) => { - setLoading(false) - setRecords(extendRecords(response.data)) + processResponse(response.data) + }) + .catch((er) => setError(er.message)) + } + + const getStages = () => { + setLoading(true) + setError(undefined) + new BillingApi() + .getStages() + .then((response) => { + processResponse(response.data) }) .catch((er) => setError(er.message)) } React.useEffect(() => { if (fieldName === BillingColumn.Topic) getTopics() + else if (fieldName === BillingColumn.GcpProject) getGcpProjects() else if (fieldName === BillingColumn.InvoiceMonth) getInvoiceMonths() + else if (fieldName === BillingColumn.Stage) getStages() else if (fieldName === 'Group') { - setRecords([BillingColumn.GcpProject, BillingColumn.Topic]) + setRecords([BillingColumn.GcpProject, BillingColumn.Topic, BillingColumn.Stage]) setLoading(false) - } else if (fieldName === BillingColumn.GcpProject) getGcpProjects() - else { + } else { setError(`Could not load records for ${fieldName}`) } - }, [label, fieldName]) + }, [fieldName]) const capitalize = (str: string): string => { if (str === 'gcp_project') { From 2dafa63d1a7f190ef5e6d79ef3c2467678ddec38 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Mon, 27 Nov 2023 17:02:29 +1100 Subject: [PATCH 03/34] Billing API IsBillingEnabled (#626) * Added API point to check if billing is enabled. --- api/routes/billing.py | 73 +++++++++++---------- web/src/shared/components/Header/NavBar.tsx | 4 +- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/api/routes/billing.py b/api/routes/billing.py index 474af2c06..6f4751b26 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -4,7 +4,7 @@ from async_lru import alru_cache from fastapi import APIRouter -from api.settings import BILLING_CACHE_RESPONSE_TTL +from api.settings import BILLING_CACHE_RESPONSE_TTL, BQ_AGGREG_VIEW from api.utils.db import BqConnection, get_author from db.python.layers.billing_layer import BillingLayer from models.models.billing import ( @@ -19,6 +19,30 @@ router = APIRouter(prefix='/billing', tags=['billing']) +@router.get( + '/is-billing-enabled', + response_model=bool, + operation_id='isBillingEnabled', +) +def is_billing_enabled() -> bool: + """ + Return true if billing ie enabled, false otherwise + """ + return BQ_AGGREG_VIEW is not None + + +def initialise_billing_layer(author: str) -> BillingLayer: + """ + Initialise billing + """ + if not is_billing_enabled(): + raise ValueError('Billing is not enabled') + + connection = BqConnection(author) + billing_layer = BillingLayer(connection) + return billing_layer + + @router.get( '/gcp-projects', response_model=list[str], @@ -29,8 +53,7 @@ async def get_gcp_projects( author: str = get_author, ) -> list[str]: """Get list of all GCP projects in database""" - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_gcp_projects() return records @@ -45,8 +68,7 @@ async def get_topics( author: str = get_author, ) -> list[str]: """Get list of all topics in database""" - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_topics() return records @@ -61,8 +83,7 @@ async def get_cost_categories( author: str = get_author, ) -> list[str]: """Get list of all service description / cost categories in database""" - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_cost_categories() return records @@ -83,8 +104,7 @@ async def get_skus( There is over 400 Skus so limit is required Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_skus(limit, offset) return records @@ -102,8 +122,7 @@ async def get_datasets( Get list of all datasets in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_datasets() return records @@ -121,8 +140,7 @@ async def get_sequencing_types( Get list of all sequencing_types in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_sequencing_types() return records @@ -140,8 +158,7 @@ async def get_stages( Get list of all stages in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_stages() return records @@ -159,8 +176,7 @@ async def get_sequencing_groups( Get list of all sequencing_groups in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_sequencing_groups() return records @@ -178,8 +194,7 @@ async def get_compute_categories( Get list of all compute categories in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_compute_categories() return records @@ -197,8 +212,7 @@ async def get_cromwell_sub_workflow_names( Get list of all cromwell_sub_workflow_names in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_cromwell_sub_workflow_names() return records @@ -216,8 +230,7 @@ async def get_wdl_task_names( Get list of all wdl_task_names in database Results are sorted ASC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_wdl_task_names() return records @@ -235,8 +248,7 @@ async def get_invoice_months( Get list of all invoice months in database Results are sorted DESC """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_invoice_months() return records @@ -262,8 +274,7 @@ async def query_billing( } """ - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.query(query.to_filter(), limit) return records @@ -441,9 +452,7 @@ async def get_total_cost( } """ - - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_total_cost(query) return records @@ -464,13 +473,11 @@ async def get_running_costs( Get running cost for specified fields in database e.g. fields = ['gcp_project', 'topic', 'wdl_task_names', 'cromwell_sub_workflow_name', 'compute_category'] """ - # TODO replace alru_cache with async-cache? # so we can skip author for caching? # pip install async-cache # @AsyncTTL(time_to_live=BILLING_CACHE_RESPONSE_TTL, maxsize=1024, skip_args=2) - connection = BqConnection(author) - billing_layer = BillingLayer(connection) + billing_layer = initialise_billing_layer(author) records = await billing_layer.get_running_cost(field, invoice_month, source) return records diff --git a/web/src/shared/components/Header/NavBar.tsx b/web/src/shared/components/Header/NavBar.tsx index 420371ced..3600a827a 100644 --- a/web/src/shared/components/Header/NavBar.tsx +++ b/web/src/shared/components/Header/NavBar.tsx @@ -138,8 +138,8 @@ const NavBar: React.FC = ({ fixed }) => { ]) React.useEffect(() => { - new BillingApi().getTopics().then((response) => { - if (response.status === 200) { + new BillingApi().isBillingEnabled().then((response) => { + if (response.status === 200 && response.data === true) { setMenuItems([...menuItems.slice(0, 2), billingPages, ...menuItems.slice(2)]) } }) From 263b3661d020660a9dc1635bd1af84846768889e Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Tue, 28 Nov 2023 18:28:28 +1100 Subject: [PATCH 04/34] Added simple Total Cost By Batch Page. (#627) * Added simple Total Cost By Batch Page. --- api/routes/billing.py | 18 ++ db/python/layers/billing_db.py | 20 +- models/models/billing.py | 3 + web/src/Routes.tsx | 9 + web/src/pages/billing/BillingCostByBatch.tsx | 258 +++++++++++++++++++ web/src/pages/billing/index.ts | 1 + web/src/shared/components/Header/NavBar.tsx | 5 + 7 files changed, 308 insertions(+), 6 deletions(-) create mode 100644 web/src/pages/billing/BillingCostByBatch.tsx diff --git a/api/routes/billing.py b/api/routes/billing.py index 6f4751b26..d47a5cf0d 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -451,6 +451,24 @@ async def get_total_cost( "order_by": {"cost": true} } + 17. Get total cost by sku for provided ID, which can be any of + [ar_guid, batch_id, sequencing_group or cromwell_workflow_id], + order by cost DESC: + + { + "fields": ["sku", "ar_guid", "batch_id", "sequencing_group", "cromwell_workflow_id"], + "start_date": "2023-11-01", + "end_date": "2023-11-30", + "filters": { + "ar_guid": "855a6153-033c-4398-8000-46ed74c02fe8", + "batch_id": "429518", + "sequencing_group": "cpg246751", + "cromwell_workflow_id": "cromwell-e252f430-4143-47ec-a9c0-5f7face1b296" + }, + "filters_op": "OR", + "order_by": {"cost": true} + } + """ billing_layer = initialise_billing_layer(author) records = await billing_layer.get_total_cost(query) diff --git a/db/python/layers/billing_db.py b/db/python/layers/billing_db.py index 31191cf15..b9fc73e19 100644 --- a/db/python/layers/billing_db.py +++ b/db/python/layers/billing_db.py @@ -357,15 +357,15 @@ async def get_total_cost( fields_selected = ','.join(columns) # construct filters - filters = [] + and_filters = [] query_parameters = [] - filters.append('day >= TIMESTAMP(@start_date)') + and_filters.append('day >= TIMESTAMP(@start_date)') query_parameters.append( bigquery.ScalarQueryParameter('start_date', 'STRING', query.start_date) ) - filters.append('day <= TIMESTAMP(@end_date)') + and_filters.append('day <= TIMESTAMP(@end_date)') query_parameters.append( bigquery.ScalarQueryParameter('end_date', 'STRING', query.end_date) ) @@ -377,11 +377,12 @@ async def get_total_cost( # (part_time field in the view) # We are querying by day, # which can be up to a week behind regarding _PARTITIONTIME - filters.append('part_time >= TIMESTAMP(@start_date)') - filters.append( + and_filters.append('part_time >= TIMESTAMP(@start_date)') + and_filters.append( 'part_time <= TIMESTAMP_ADD(TIMESTAMP(@end_date), INTERVAL 7 DAY)' ) + filters = [] if query.filters: for filter_key, filter_value in query.filters.items(): col_name = str(filter_key.value) @@ -394,7 +395,14 @@ async def get_total_cost( # the view has to be extended view_to_use = BQ_AGGREG_EXT_VIEW - filter_str = 'WHERE ' + ' AND '.join(filters) if filters else '' + if query.filters_op == 'OR': + if filters: + and_filters.append('(' + ' OR '.join(filters) + ')') + else: + # if not specified, default to AND + and_filters.extend(filters) + + filter_str = 'WHERE ' + ' AND '.join(and_filters) if and_filters else '' # construct order by order_by_cols = [] diff --git a/models/models/billing.py b/models/models/billing.py index bc0efb2df..4687e23ff 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -184,6 +184,9 @@ class BillingTotalCostQueryModel(SMBase): # optional filters: dict[BillingColumn, str] | None = None + # optional, AND or OR + filters_op: str | None = None + # order by, reverse= TRUE for DESC, FALSE for ASC order_by: dict[BillingColumn, bool] | None = None limit: int | None = None diff --git a/web/src/Routes.tsx b/web/src/Routes.tsx index 5abaa6d8d..298f303d8 100644 --- a/web/src/Routes.tsx +++ b/web/src/Routes.tsx @@ -6,6 +6,7 @@ import { BillingHome, BillingSeqrProp, BillingCostByTime, + BillingCostByBatch, BillingInvoiceMonthCost, } from './pages/billing' import DocumentationArticle from './pages/docs/Documentation' @@ -57,6 +58,14 @@ const Routes: React.FunctionComponent = () => ( } /> + + + + } + /> { + if (rec == null) { + return '' + } + if (rec.batch_id != null) { + return `Batch ID:${rec.batch_id}` + } + if (rec.ar_guid != null) { + return `AR GUID:${rec.ar_guid}` + } + if (rec.sequencing_group != null) { + return `Sequencing Group:${rec.sequencing_group}` + } + if (rec.cromwell_workflow_id != null) { + return `CROMWELL WORKFLOW ID:${rec.cromwell_workflow_id}` + } + return '' +} + +const BillingCostByBatch: React.FunctionComponent = () => { + const [searchParams] = useSearchParams() + + const now = new Date() + + // Data loading + const [isLoading, setIsLoading] = React.useState(true) + const [error, setError] = React.useState() + + const [start, setStart] = React.useState( + searchParams.get('start') ?? `${now.getFullYear()}-${now.getMonth() + 1}-01` + ) + const [end, setEnd] = React.useState( + searchParams.get('end') ?? `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` + ) + + const [data, setData] = React.useState([]) + + const [searchTxt, setSearchTxt] = React.useState( + searchParams.get('searchBy') ?? undefined + ) + + // use navigate and update url params + const location = useLocation() + const navigate = useNavigate() + + const updateNav = (searchBy: string | undefined) => { + let url = `${location.pathname}` + if (searchBy) url += '?' + + const params: string[] = [] + if (searchBy) params.push(`searchBy=${searchBy}`) + + url += params.join('&') + navigate(url) + } + + const getData = (query: BillingTotalCostQueryModel) => { + setIsLoading(true) + setError(undefined) + new BillingApi() + .getTotalCost(query) + .then((response) => { + setIsLoading(false) + setData(response.data) + }) + .catch((er) => setError(er.message)) + } + + const handleSearch = () => { + updateNav(searchTxt) + getData({ + fields: [ + BillingColumn.CostCategory, + BillingColumn.Sku, + BillingColumn.ArGuid, + BillingColumn.BatchId, + BillingColumn.CromwellWorkflowId, + BillingColumn.SequencingGroup, + ], + start_date: start, + end_date: end, + filters: { + ar_guid: searchTxt, + batch_id: searchTxt, + sequencing_group: searchTxt, + cromwell_workflow_id: searchTxt, + }, + filters_op: 'OR', + order_by: { cost: true }, + }) + } + + const handleSearchChange = (event: any, dt: any) => { + setSearchTxt(dt.value) + } + + React.useEffect(() => {}, []) + + React.useEffect(() => { + handleSearch() + }, [searchTxt]) + + const searchCard = () => { + return ( + +

+ Billing Cost By Batch +

+ + + + + + + + + + +
+

+ {' '} + E.g. +
+ ar_guid: 855a6153-033c-4398-8000-46ed74c02fe8 +
+ batch_id: 429518 +
+ sequencing_group: cpg246751 +
+ cromwell_workflow_id: cromwell-e252f430-4143-47ec-a9c0-5f7face1b296 +
+

+
+ ) + } + + if (error) { + return ( + setError(undefined)}> + {error} +
+ +
+ ) + } + + if (isLoading) { + return ( +
+ {searchCard()} + +

+ This query takes a while... +

+
+ ) + } + + if (data.length === 0) { + return ( +
+ {searchCard()} +

+ No data found. +

+
+ ) + } + + return ( + <> + {searchCard()} + +
{extractHeader(data[0])}
+ + + + + Cost Category + + + SKU + + + Cost + + + + + {data.map((k) => ( + + {k.cost_category} + {k.sku} + + {currencyFormat(k.cost)} + + + ))} + +
+
+ + ) +} + +export default BillingCostByBatch diff --git a/web/src/pages/billing/index.ts b/web/src/pages/billing/index.ts index cdb6832fb..8b9a44ced 100644 --- a/web/src/pages/billing/index.ts +++ b/web/src/pages/billing/index.ts @@ -1,4 +1,5 @@ export { default as BillingHome } from "./BillingHome"; export { default as BillingSeqrProp } from "./BillingSeqrProp"; export { default as BillingCostByTime } from "./BillingCostByTime"; +export { default as BillingCostByBatch } from "./BillingCostByBatch"; export { default as BillingInvoiceMonthCost } from "./BillingInvoiceMonthCost"; diff --git a/web/src/shared/components/Header/NavBar.tsx b/web/src/shared/components/Header/NavBar.tsx index 3600a827a..73c378442 100644 --- a/web/src/shared/components/Header/NavBar.tsx +++ b/web/src/shared/components/Header/NavBar.tsx @@ -42,6 +42,11 @@ const billingPages = { url: '/billing/costByTime', icon: , }, + { + title: 'Cost By Batch', + url: '/billing/costByBatch', + icon: , + }, { title: 'Seqr Prop Map', url: '/billing/seqrPropMap', From 0f7ca2e7612615363babc380b9429029d02397e0 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Mon, 11 Dec 2023 14:07:11 +1100 Subject: [PATCH 05/34] Billing cost by category (#629) * Added simple Total Cost By Batch Page. * Fixed autoselect day format. * Fixing day format for autoselect (missing leading 0) * Added first draft of billing page to show detail SKU per selected cost category over selected time periods (day, week, month or invoice month) * Small fix for BillingCostByBatch page, disable search if searchBy is empty or < 6 chars. * New: Billing API GET namespaces, added namespace to allowed fields for total cost. * Implemented HorizontalStackedBarChart, updated Billing By Invoice Month page to enable toggle between chart and table view. --- api/routes/billing.py | 31 ++ db/python/layers/billing_db.py | 54 ++- db/python/layers/billing_layer.py | 9 + models/models/__init__.py | 22 +- models/models/billing.py | 19 + web/src/Routes.tsx | 9 + web/src/index.css | 6 + web/src/pages/billing/BillingCostByBatch.tsx | 25 +- .../pages/billing/BillingCostByCategory.tsx | 255 +++++++++++ web/src/pages/billing/BillingCostByTime.tsx | 9 +- .../pages/billing/BillingInvoiceMonthCost.tsx | 403 +++++++++++------- web/src/pages/billing/BillingSeqrProp.tsx | 5 +- .../billing/components/FieldSelector.tsx | 22 +- web/src/pages/billing/index.ts | 1 + .../Graphs/HorizontalStackedBarChart.tsx | 288 +++++++++++++ web/src/shared/components/Header/NavBar.tsx | 5 + 16 files changed, 965 insertions(+), 198 deletions(-) create mode 100644 web/src/pages/billing/BillingCostByCategory.tsx create mode 100644 web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx diff --git a/api/routes/billing.py b/api/routes/billing.py index d47a5cf0d..f80e3199a 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -253,6 +253,24 @@ async def get_invoice_months( return records +@router.get( + '/namespaces', + response_model=list[str], + operation_id='getNamespaces', +) +@alru_cache(ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_namespaces( + author: str = get_author, +) -> list[str]: + """ + Get list of all namespaces in database + Results are sorted DESC + """ + billing_layer = initialise_billing_layer(author) + records = await billing_layer.get_namespaces() + return records + + @router.post( '/query', response_model=list[BillingRowRecord], operation_id='queryBilling' ) @@ -469,6 +487,19 @@ async def get_total_cost( "order_by": {"cost": true} } + 18. Get weekly total cost by sku for selected cost_category, order by day ASC: + + { + "fields": ["sku"], + "start_date": "2022-11-01", + "end_date": "2023-12-07", + "filters": { + "cost_category": "Cloud Storage" + }, + "order_by": {"day": false}, + "time_periods": "week" + } + """ billing_layer = initialise_billing_layer(author) records = await billing_layer.get_total_cost(query) diff --git a/db/python/layers/billing_db.py b/db/python/layers/billing_db.py index b9fc73e19..e36c7f540 100644 --- a/db/python/layers/billing_db.py +++ b/db/python/layers/billing_db.py @@ -1,7 +1,7 @@ import re from collections import Counter, defaultdict from datetime import datetime -from typing import Any +from typing import Any, Tuple from google.cloud import bigquery @@ -20,6 +20,7 @@ BillingColumn, BillingCostBudgetRecord, BillingRowRecord, + BillingTimePeriods, BillingTotalCostQueryModel, BillingTotalCostRecord, ) @@ -32,6 +33,29 @@ def abbrev_cost_category(cost_category: str) -> str: return 'S' if cost_category == 'Cloud Storage' else 'C' +def prepare_time_periods(query: BillingTotalCostQueryModel) -> Tuple[str, str, str]: + """Prepare Time periods grouping and parsing formulas""" + day_parse_formula = '' + day_field = '' + day_grp = 'day, ' + + # Based on specified time period, add the corresponding column + if query.time_periods == BillingTimePeriods.DAY: + day_field = 'day, ' + day_parse_formula = 'day, ' + elif query.time_periods == BillingTimePeriods.WEEK: + day_field = 'FORMAT_DATE("%Y%W", day) as day, ' + day_parse_formula = 'PARSE_DATE("%Y%W", day) as day, ' + elif query.time_periods == BillingTimePeriods.MONTH: + day_field = 'FORMAT_DATE("%Y%m", day) as day, ' + day_parse_formula = 'PARSE_DATE("%Y%m", day) as day, ' + elif query.time_periods == BillingTimePeriods.INVOICE_MONTH: + day_field = 'invoice_month as day, ' + day_parse_formula = 'PARSE_DATE("%Y%m", day) as day, ' + + return day_field, day_grp, day_parse_formula + + class BillingDb(BqDbBase): """Db layer for billing related routes""" @@ -356,6 +380,18 @@ async def get_total_cost( fields_selected = ','.join(columns) + # prepare grouping by time periods + day_parse_formula = '' + day_field = '' + day_grp = '' + if query.time_periods: + # remove existing day column, if added to fields + # this is to prevent duplicating various time periods in one query + if BillingColumn.DAY in query.fields: + columns.remove(BillingColumn.DAY) + + day_field, day_grp, day_parse_formula = prepare_time_periods(query) + # construct filters and_filters = [] query_parameters = [] @@ -415,11 +451,14 @@ async def get_total_cost( order_by_str = f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' _query = f""" - SELECT {fields_selected}, SUM(cost) as cost - FROM `{view_to_use}` - {filter_str} - GROUP BY {fields_selected} - {order_by_str} + WITH t AS ( + SELECT {day_field}{fields_selected}, SUM(cost) as cost + FROM `{view_to_use}` + {filter_str} + GROUP BY {day_grp}{fields_selected} + {order_by_str} + ) + SELECT {day_parse_formula}{fields_selected}, cost FROM t """ # append LIMIT and OFFSET if present @@ -808,10 +847,11 @@ async def get_running_cost( BillingColumn.COMPUTE_CATEGORY, BillingColumn.WDL_TASK_NAME, BillingColumn.CROMWELL_SUB_WORKFLOW_NAME, + BillingColumn.NAMESPACE, ): raise ValueError( 'Invalid field only topic, dataset, gcp-project, compute_category, ' - 'wdl_task_name & cromwell_sub_workflow_name are allowed' + 'wdl_task_name, cromwell_sub_workflow_name & namespace are allowed' ) ( diff --git a/db/python/layers/billing_layer.py b/db/python/layers/billing_layer.py index 9ba5883b3..632b345ea 100644 --- a/db/python/layers/billing_layer.py +++ b/db/python/layers/billing_layer.py @@ -123,6 +123,15 @@ async def get_invoice_months( billing_db = BillingDb(self.connection) return await billing_db.get_invoice_months() + async def get_namespaces( + self, + ) -> list[str] | None: + """ + Get All namespaces values in database + """ + billing_db = BillingDb(self.connection) + return await billing_db.get_extended_values('namespace') + async def query( self, _filter: BillingFilter, diff --git a/models/models/__init__.py b/models/models/__init__.py index aa7cf5324..5754b4239 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -8,11 +8,15 @@ ProportionalDateTemporalMethod, SequencingGroupSizeModel, ) -from models.models.assay import ( - Assay, - AssayInternal, - AssayUpsert, - AssayUpsertInternal, +from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal +from models.models.billing import ( + BillingColumn, + BillingCostBudgetRecord, + BillingCostDetailsRecord, + BillingRowRecord, + BillingTimePeriods, + BillingTotalCostQueryModel, + BillingTotalCostRecord, ) from models.models.family import ( Family, @@ -62,11 +66,3 @@ ProjectSummaryInternal, WebProject, ) -from models.models.billing import ( - BillingRowRecord, - BillingTotalCostRecord, - BillingTotalCostQueryModel, - BillingColumn, - BillingCostBudgetRecord, - BillingCostDetailsRecord, -) diff --git a/models/models/billing.py b/models/models/billing.py index 4687e23ff..bd8bab129 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -142,6 +142,7 @@ class BillingColumn(str, Enum): CROMWELL_WORKFLOW_ID = 'cromwell_workflow_id' GOOG_PIPELINES_WORKER = 'goog_pipelines_worker' WDL_TASK_NAME = 'wdl_task_name' + NAMESPACE = 'namespace' @classmethod def extended_cols(cls) -> list[str]: @@ -158,6 +159,7 @@ def extended_cols(cls) -> list[str]: 'cromwell_workflow_id', 'goog_pipelines_worker', 'wdl_task_name', + 'namespace', ] @staticmethod @@ -169,6 +171,16 @@ def generate_all_title(record) -> str: return f'All {record.title()}s' +class BillingTimePeriods(str, Enum): + """List of billing grouping time periods""" + + # grouping time periods + DAY = 'day' + WEEK = 'week' + MONTH = 'month' + INVOICE_MONTH = 'invoice_month' + + class BillingTotalCostQueryModel(SMBase): """ Used to query for billing total cost @@ -192,6 +204,9 @@ class BillingTotalCostQueryModel(SMBase): limit: int | None = None offset: int | None = None + # default to day, can be day, week, month, invoice_month + time_periods: BillingTimePeriods | None = None + def __hash__(self): """Create hash for this object to use in caching""" return hash(self.json()) @@ -205,6 +220,7 @@ class BillingTotalCostRecord(SMBase): gcp_project: str | None cost_category: str | None sku: str | None + invoice_month: str | None ar_guid: str | None # extended columns dataset: str | None @@ -217,6 +233,7 @@ class BillingTotalCostRecord(SMBase): cromwell_workflow_id: str | None goog_pipelines_worker: str | None wdl_task_name: str | None + namespace: str | None cost: float currency: str | None @@ -230,6 +247,7 @@ def from_json(record): gcp_project=record.get('gcp_project'), cost_category=record.get('cost_category'), sku=record.get('sku'), + invoice_month=record.get('invoice_month'), ar_guid=record.get('ar_guid'), dataset=record.get('dataset'), batch_id=record.get('batch_id'), @@ -241,6 +259,7 @@ def from_json(record): cromwell_workflow_id=record.get('cromwell_workflow_id'), goog_pipelines_worker=record.get('goog_pipelines_worker'), wdl_task_name=record.get('wdl_task_name'), + namespace=record.get('namespace'), cost=record.get('cost'), currency=record.get('currency'), ) diff --git a/web/src/Routes.tsx b/web/src/Routes.tsx index 298f303d8..2e5bfffcb 100644 --- a/web/src/Routes.tsx +++ b/web/src/Routes.tsx @@ -8,6 +8,7 @@ import { BillingCostByTime, BillingCostByBatch, BillingInvoiceMonthCost, + BillingCostByCategory, } from './pages/billing' import DocumentationArticle from './pages/docs/Documentation' import SampleView from './pages/sample/SampleView' @@ -66,6 +67,14 @@ const Routes: React.FunctionComponent = () => ( } /> + + + + } + /> { const [error, setError] = React.useState() const [start, setStart] = React.useState( - searchParams.get('start') ?? `${now.getFullYear()}-${now.getMonth() + 1}-01` + searchParams.get('start') ?? + `${now.getFullYear()}-${now.getMonth().toString().padStart(2, '0')}-01` ) const [end, setEnd] = React.useState( - searchParams.get('end') ?? `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` + searchParams.get('end') ?? + `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-${now + .getDate() + .toString() + .padStart(2, '0')}` ) const [data, setData] = React.useState([]) @@ -67,13 +72,10 @@ const BillingCostByBatch: React.FunctionComponent = () => { const updateNav = (searchBy: string | undefined) => { let url = `${location.pathname}` - if (searchBy) url += '?' - - const params: string[] = [] - if (searchBy) params.push(`searchBy=${searchBy}`) - - url += params.join('&') - navigate(url) + if (searchBy) { + url += `?searchBy=${searchBy}` + navigate(url) + } } const getData = (query: BillingTotalCostQueryModel) => { @@ -89,6 +91,11 @@ const BillingCostByBatch: React.FunctionComponent = () => { } const handleSearch = () => { + if (searchTxt === undefined || searchTxt.length < 6) { + // Seaarch text is not large enough + setIsLoading(false) + return + } updateNav(searchTxt) getData({ fields: [ diff --git a/web/src/pages/billing/BillingCostByCategory.tsx b/web/src/pages/billing/BillingCostByCategory.tsx new file mode 100644 index 000000000..096831fc1 --- /dev/null +++ b/web/src/pages/billing/BillingCostByCategory.tsx @@ -0,0 +1,255 @@ +import * as React from 'react' +import { useLocation, useNavigate, useSearchParams } from 'react-router-dom' +import { Button, Card, Grid, Input, Message, Table as SUITable } from 'semantic-ui-react' +import CostByTimeChart from './components/CostByTimeChart' +import FieldSelector from './components/FieldSelector' +import { + BillingApi, + BillingColumn, + BillingTotalCostQueryModel, + BillingTotalCostRecord, +} from '../../sm-api' + +import { convertFieldName } from '../../shared/utilities/fieldName' +import { IStackedAreaByDateChartData } from '../../shared/components/Graphs/StackedAreaByDateChart' +import BillingCostByTimeTable from './components/BillingCostByTimeTable' +import { BarChart, IData } from '../../shared/components/Graphs/BarChart' +import { DonutChart } from '../../shared/components/Graphs/DonutChart' + +const BillingCostByCategory: React.FunctionComponent = () => { + const now = new Date() + + const [searchParams] = useSearchParams() + + const inputCostCategory: string | undefined = searchParams.get('costCategory') ?? undefined + + const [start, setStart] = React.useState( + searchParams.get('start') ?? + `${now.getFullYear() - 1}-${now.getMonth().toString().padStart(2, '0')}-01` + ) + const [end, setEnd] = React.useState( + searchParams.get('end') ?? + `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-${now + .getDate() + .toString() + .padStart(2, '0')}` + ) + const [selectedData, setCostCategory] = React.useState(inputCostCategory) + + const [selectedPeriod, setPeriod] = React.useState(undefined) + + // Max Aggregated Data Points, rest will be aggregated into "Rest" + const maxDataPoints = 7 + + // Data loading + const [isLoading, setIsLoading] = React.useState(true) + const [error, setError] = React.useState() + const [groups, setGroups] = React.useState([]) + const [data, setData] = React.useState([]) + const [aggregatedData, setAggregatedData] = React.useState([]) + + // use navigate and update url params + const location = useLocation() + const navigate = useNavigate() + + const updateNav = ( + category: string | undefined, + period: string | undefined, + start: string, + end: string + ) => { + let url = `${location.pathname}` + + if (category && period) { + url += '?' + + let params: string[] = [] + if (category) params.push(`costCategory=${category}`) + if (period) params.push(`period=${period}`) + if (start) params.push(`start=${start}`) + if (end) params.push(`end=${end}`) + + url += params.join('&') + navigate(url) + } + } + + const onSelect = (event: any, recs: any) => { + setCostCategory(recs.value) + updateNav(recs.value, selectedPeriod, start, end) + } + + const onSelectPeriod = (event: any, recs: any) => { + setPeriod(recs.value) + updateNav(selectedData, recs.value, start, end) + } + + const changeDate = (name: string, value: string) => { + let start_update = start + let end_update = end + if (name === 'start') start_update = value + if (name === 'end') end_update = value + setStart(start_update) + setEnd(end_update) + updateNav(selectedData, selectedPeriod, start_update, end_update) + } + + const getData = (query: BillingTotalCostQueryModel) => { + setIsLoading(true) + setError(undefined) + new BillingApi() + .getTotalCost(query) + .then((response) => { + setIsLoading(false) + + // calc totals per sku + const recTotals = response.data.reduce( + ( + acc: { [key: string]: { [key: string]: number } }, + item: BillingTotalCostRecord + ) => { + const { sku, cost } = item + if (!acc[sku]) { + acc[sku] = 0 + } + acc[sku] += cost + return acc + }, + {} + ) + const sortedRecTotals: { [key: string]: number } = Object.fromEntries( + Object.entries(recTotals).sort(([, a], [, b]) => b - a) + ) + const rec_grps = Object.keys(sortedRecTotals) + const records = response.data.reduce( + ( + acc: { [key: string]: { [key: string]: number } }, + item: BillingTotalCostRecord + ) => { + const { day, sku, cost } = item + if (day !== undefined) { + if (!acc[day]) { + // initialise day structure + acc[day] = {} + rec_grps.forEach((k) => { + acc[day][k] = 0 + }) + } + acc[day][sku] = cost + } + return acc + }, + {} + ) + const no_undefined: string[] = rec_grps.filter( + (item): item is string => item !== undefined + ) + setGroups(no_undefined) + setData( + Object.keys(records).map((key) => ({ + date: new Date(key), + values: records[key], + })) + ) + }) + .catch((er) => setError(er.message)) + } + + React.useEffect(() => { + if ( + selectedData !== undefined && + selectedData !== '' && + selectedData !== null && + selectedPeriod !== undefined && + selectedPeriod !== '' && + selectedPeriod !== null + ) { + getData({ + fields: [BillingColumn.Sku], + start_date: start, + end_date: end, + filters: { cost_category: selectedData }, + order_by: { day: false }, + time_periods: selectedPeriod, + }) + } + }, [start, end, selectedData, selectedPeriod]) + + if (error) { + return ( + setError(undefined)}> + {error} +
+ +
+ ) + } + + return ( + <> + +

+ Billing Cost By Category +

+ + + + + + + + + + + + + + + + changeDate('start', e.target.value)} + value={start} + /> + + + + + + + + +
+ + ) +} + +export default BillingCostByCategory diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx index 84d22f0e9..0c6c2f337 100644 --- a/web/src/pages/billing/BillingCostByTime.tsx +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -28,10 +28,15 @@ const BillingCostByTime: React.FunctionComponent = () => { const inputSelectedData: string | undefined = searchParams.get('selectedData') ?? undefined const [start, setStart] = React.useState( - searchParams.get('start') ?? `${now.getFullYear()}-${now.getMonth() + 1}-01` + searchParams.get('start') ?? + `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-01` ) const [end, setEnd] = React.useState( - searchParams.get('end') ?? `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` + searchParams.get('end') ?? + `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-${now + .getDate() + .toString() + .padStart(2, '0')}` ) const [groupBy, setGroupBy] = React.useState( fixedGroupBy ?? BillingColumn.GcpProject diff --git a/web/src/pages/billing/BillingInvoiceMonthCost.tsx b/web/src/pages/billing/BillingInvoiceMonthCost.tsx index da6190b17..68f7be778 100644 --- a/web/src/pages/billing/BillingInvoiceMonthCost.tsx +++ b/web/src/pages/billing/BillingInvoiceMonthCost.tsx @@ -7,6 +7,7 @@ import Table from '../../shared/components/Table' import { BillingApi, BillingColumn, BillingCostBudgetRecord } from '../../sm-api' import FieldSelector from './components/FieldSelector' import { convertFieldName } from '../../shared/utilities/fieldName' +import { HorizontalStackedBarChart } from '../../shared/components/Graphs/HorizontalStackedBarChart' const BillingCurrentCost = () => { const [isLoading, setIsLoading] = React.useState(true) @@ -19,6 +20,8 @@ const BillingCurrentCost = () => { direction: 'undefined', }) + const [showAsChart, setShowAsChart] = React.useState(true) + // Pull search params for use in the component const [searchParams] = useSearchParams() const inputGroupBy: string | null = searchParams.get('groupBy') @@ -195,177 +198,247 @@ const BillingCurrentCost = () => { selected={invoiceMonth} /> + + + setShowAsChart(!showAsChart)} + /> + - - - - - - - - {invoiceMonth === thisMonth ? ( - - 24H (day UTC {lastLoadedDay}) - - ) : null} - - {groupBy === BillingColumn.GcpProject ? ( - - Invoice Month (Acc) - - ) : ( - - Invoice Month (Acc) - - )} - - - - - {HEADER_FIELDS.map((k) => { - switch (k.show_always || invoiceMonth === thisMonth) { - case true: - return ( - handleSort(k.category)} + {(() => { + if (!showAsChart) return null + if (String(invoiceMonth) === String(thisMonth)) { + return ( + + + + + + + + + ) + } + return ( + + + + + + ) + })()} + + {!showAsChart ? ( +
+ + + + + + + {invoiceMonth === thisMonth ? ( + + 24H (day UTC {lastLoadedDay}) + + ) : null} + + {groupBy === BillingColumn.GcpProject ? ( + + Invoice Month (Acc) + + ) : ( + + Invoice Month (Acc) + + )} + + + + + {HEADER_FIELDS.map((k) => { + switch (k.show_always || invoiceMonth === thisMonth) { + case true: + return ( + handleSort(k.category)} + style={{ + borderBottom: 'none', + position: 'sticky', + resize: 'horizontal', + }} + > + {convertFieldName(k.title)} + + ) + default: + return null + } + })} + + {groupBy === BillingColumn.GcpProject && invoiceMonth === thisMonth ? ( + handleSort('budget_spent')} + style={{ + borderBottom: 'none', + position: 'sticky', + resize: 'horizontal', + }} + > + Budget Spend % + + ) : null} + + + + {_.orderBy( + costRecords, + [sort.column], + sort.direction === 'ascending' ? ['asc'] : ['desc'] + ).map((p) => ( + + + + handleToggle(p.field)} + /> + + {HEADER_FIELDS.map((k) => { + switch (k.category) { + case 'field': + return ( + + + + {p[k.category]} + + + + ) + default: + switch ( + k.show_always || + invoiceMonth === thisMonth + ) { + case true: + return ( + + {currencyFormat(p[k.category])} + + ) + default: + return null + } + } + })} + + {groupBy === BillingColumn.GcpProject && + invoiceMonth === thisMonth ? ( + {percFormat(p.budget_spent)} + ) : null} + + {typeof p === 'object' && + 'details' in p && + _.orderBy(p?.details, ['monthly_cost'], ['desc']).map((dk) => ( + - {convertFieldName(k.title)} - - ) - default: - return null - } - })} - - {groupBy === BillingColumn.GcpProject && invoiceMonth === thisMonth ? ( - handleSort('budget_spent')} - style={{ - borderBottom: 'none', - position: 'sticky', - resize: 'horizontal', - }} - > - Budget Spend % - - ) : null} - - - - {_.orderBy( - costRecords, - [sort.column], - sort.direction === 'ascending' ? ['asc'] : ['desc'] - ).map((p) => ( - - - - handleToggle(p.field)} - /> - - {HEADER_FIELDS.map((k) => { - switch (k.category) { - case 'field': - return ( - - - - {p[k.category]} - - - - ) - default: - switch (k.show_always || invoiceMonth === thisMonth) { - case true: - return ( - - {currencyFormat(p[k.category])} - - ) - default: - return null - } - } - })} - - {groupBy === BillingColumn.GcpProject && - invoiceMonth === thisMonth ? ( - {percFormat(p.budget_spent)} - ) : null} - - {typeof p === 'object' && - 'details' in p && - _.orderBy(p?.details, ['monthly_cost'], ['desc']).map((dk) => ( - - - {dk.cost_category} - - {dk.cost_group === 'C' ? ( - - {invoiceMonth === thisMonth ? ( - - - {currencyFormat(dk.daily_cost)} - - - - - ) : null} - - {currencyFormat(dk.monthly_cost)} - - - - ) : ( - - - {invoiceMonth === thisMonth ? ( - - - {currencyFormat(dk.daily_cost)} - - - - - ) : null} - - {currencyFormat(dk.monthly_cost)} - + + {dk.cost_category} + + {dk.cost_group === 'C' ? ( + + {invoiceMonth === thisMonth ? ( + + + {currencyFormat(dk.daily_cost)} + + + + + ) : null} + + {currencyFormat(dk.monthly_cost)} + + + + ) : ( + + + {invoiceMonth === thisMonth ? ( + + + {currencyFormat(dk.daily_cost)} + + + + + ) : null} + + {currencyFormat(dk.monthly_cost)} + + + + )} + + {groupBy === BillingColumn.GcpProject ? ( - - )} - - {groupBy === BillingColumn.GcpProject ? ( - - ) : null} - - ))} - - ))} - -
+ ) : null} + + ))} + + ))} + + + ) : null} ) } diff --git a/web/src/pages/billing/BillingSeqrProp.tsx b/web/src/pages/billing/BillingSeqrProp.tsx index cca54ed43..2fc9ba0e6 100644 --- a/web/src/pages/billing/BillingSeqrProp.tsx +++ b/web/src/pages/billing/BillingSeqrProp.tsx @@ -7,7 +7,10 @@ const BillingSeqrProp: React.FunctionComponent = () => { const now = new Date() const [start, setStart] = React.useState(`${now.getFullYear()}-01-01`) const [end, setEnd] = React.useState( - `${now.getFullYear()}-${now.getMonth() + 1}-${now.getDate()}` + `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-${now + .getDate() + .toString() + .padStart(2, '0')}` ) // use navigate and update url params diff --git a/web/src/pages/billing/components/FieldSelector.tsx b/web/src/pages/billing/components/FieldSelector.tsx index 438933d69..83e256c9b 100644 --- a/web/src/pages/billing/components/FieldSelector.tsx +++ b/web/src/pages/billing/components/FieldSelector.tsx @@ -1,6 +1,6 @@ import * as React from 'react' import { Dropdown, Input, Message } from 'semantic-ui-react' -import { BillingApi, BillingColumn } from '../../../sm-api' +import { BillingApi, BillingColumn, BillingTimePeriods } from '../../../sm-api' import { convertFieldName } from '../../../shared/utilities/fieldName' interface FieldSelectorProps { @@ -85,14 +85,34 @@ const FieldSelector: React.FunctionComponent = ({ .catch((er) => setError(er.message)) } + const getCostCategories = () => { + setLoading(true) + setError(undefined) + new BillingApi() + .getCostCategories() + .then((response) => { + processResponse(response.data) + }) + .catch((er) => setError(er.message)) + } + React.useEffect(() => { if (fieldName === BillingColumn.Topic) getTopics() else if (fieldName === BillingColumn.GcpProject) getGcpProjects() else if (fieldName === BillingColumn.InvoiceMonth) getInvoiceMonths() else if (fieldName === BillingColumn.Stage) getStages() + else if (fieldName === BillingColumn.CostCategory) getCostCategories() else if (fieldName === 'Group') { setRecords([BillingColumn.GcpProject, BillingColumn.Topic, BillingColumn.Stage]) setLoading(false) + } else if (fieldName === 'Period') { + setRecords([ + BillingTimePeriods.Day, + BillingTimePeriods.Week, + BillingTimePeriods.Month, + BillingTimePeriods.InvoiceMonth, + ]) + setLoading(false) } else { setError(`Could not load records for ${fieldName}`) } diff --git a/web/src/pages/billing/index.ts b/web/src/pages/billing/index.ts index 8b9a44ced..3e072f0ba 100644 --- a/web/src/pages/billing/index.ts +++ b/web/src/pages/billing/index.ts @@ -2,4 +2,5 @@ export { default as BillingHome } from "./BillingHome"; export { default as BillingSeqrProp } from "./BillingSeqrProp"; export { default as BillingCostByTime } from "./BillingCostByTime"; export { default as BillingCostByBatch } from "./BillingCostByBatch"; +export { default as BillingCostByCategory } from "./BillingCostByCategory"; export { default as BillingInvoiceMonthCost } from "./BillingInvoiceMonthCost"; diff --git a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx new file mode 100644 index 000000000..82849d792 --- /dev/null +++ b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx @@ -0,0 +1,288 @@ +import React from 'react' +import * as d3 from 'd3' +import LoadingDucks from '../LoadingDucks/LoadingDucks' +import { BillingCostBudgetRecord } from '../../../sm-api' + +interface HorizontalStackedBarChartProps { + data: BillingCostBudgetRecord[] + title: string + series: string[] + labels: string[] + total_series: string + threshold_values: number[] + threshold_series: string + sorted_by: string + colors: (t: number) => string | undefined + isLoading: boolean + showLegend: boolean +} + +const HorizontalStackedBarChart: React.FC = ({ + data, + title, + series, + labels, + total_series, + threshold_values, + threshold_series, + sorted_by, + colors, + isLoading, + showLegend, +}) => { + if (!data || data.length === 0) { + return
No data available
+ } + + const colorFunc: (t: number) => string | undefined = colors ?? d3.interpolateRainbow + + // set the dimensions and margins of the graph + const margin = { top: 80, right: 20, bottom: 50, left: 250 } + const width = 650 - margin.left - margin.right + const outsideHeight = 2850 + const height = outsideHeight - margin.top - margin.bottom + + const containerDivRef = React.useRef() + + const [clientWidth, setClientWidth] = React.useState(650) + + React.useEffect(() => { + function updateWindowWidth() { + setClientWidth(containerDivRef.current?.clientWidth ?? 650) + } + if (containerDivRef.current) { + updateWindowWidth() + } + window.addEventListener('resize', updateWindowWidth) + + return () => { + window.removeEventListener('resize', updateWindowWidth) + } + }, []) + + const contDiv = containerDivRef.current + if (contDiv) { + // reset svg + contDiv.innerHTML = '' + + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + // prepare data + const maxTotalSeries = Math.max(...data.map((item) => item[total_series])) + const typeKeys = data.map((d) => d.field) + data.sort((a, b) => b[sorted_by] - a[sorted_by]) + + // stack the data + const stack_fnc = d3 + .stack() + .keys(series) + .order(d3.stackOrderNone) + .offset(d3.stackOffsetNone) + + const stackedData = stack_fnc(data) + const indexedData = stackedData.map((innerArray, outerIdx) => + innerArray.map((d, innerIdx) => ({ outerIdx, innerIdx, data: d })) + ) + + // construct svg + const svg = d3 + .select(contDiv) + .append('svg') + .attr('width', '100%') + .attr('height', '100%') + .attr('viewBox', `0 0 650 ${outsideHeight}`) + .attr('preserveAspectRatio', 'xMinYMin') + .append('g') + .attr('transform', `translate(${margin.left}, ${margin.top})`) + + svg.append('defs') + .append('pattern') + .attr('id', 'pattern0') + .attr('patternUnits', 'userSpaceOnUse') + .attr('width', 4) + .attr('height', 4) + .append('path') + .attr('stroke', '#000000') + .attr('stroke-width', 1) + + svg.append('defs') + .append('pattern') + .attr('id', 'pattern1') + .attr('patternUnits', 'userSpaceOnUse') + .attr('width', 4) + .attr('height', 4) + .append('path') + .attr('d', 'M-1,1 l2,-2 M0,4 l4,-4 M3,5 l2,-2') + .attr('stroke', '#000000') + .attr('stroke-width', 1) + + svg.append('defs') + .append('pattern') + .attr('id', 'pattern2') + .attr('patternUnits', 'userSpaceOnUse') + .attr('width', 4) + .attr('height', 4) + .append('path') + .attr('d', 'M 2 0 L 2 4') + .attr('stroke', '#000000') + .attr('stroke-width', 1) + + // X scale and Axis + const formater = d3.format('.1s') + const xScale = d3.scaleSqrt().domain([0, maxTotalSeries]).range([0, width]) + + svg.append('g') + .attr('transform', `translate(0, ${height})`) + .call(d3.axisBottom(xScale).ticks(7).tickSize(0).tickPadding(6).tickFormat(formater)) + .call((d) => d.select('.domain').remove()) + + // Y scale and Axis + const yScale = d3 + .scaleBand() + .domain(data.map((d) => d.field)) + .range([0, height]) + .padding(0.2) + + svg.append('g') + .style('font-size', '18px') // make the axis labels bigger + .call(d3.axisLeft(yScale).tickSize(0).tickPadding(5)) + + // color palette + const color = d3.scaleOrdinal().domain(typeKeys).range(['url(#pattern0)', 'url(#pattern1)']) + + const color_fnc = (d) => { + if (threshold_series === undefined) { + // if not defiend trhesholds then use the color function + return colorFunc(d.innerIdx / typeKeys.length) + } + if (d.data.data[threshold_series] == null) { + // no threshold value defined for bar + return 'grey' + } + if (d.data.data[threshold_series] >= threshold_values[0]) { + return 'red' + } + if (d.data.data[threshold_series] >= threshold_values[1]) { + return 'orange' + } + return 'green' + } + + // set vertical grid line + const GridLine = () => { + return d3.axisBottom().scale(xScale) + } + + svg.append('g') + .attr('class', 'grid') + .call(GridLine().tickSize(height, 0, 0).tickFormat('').ticks(8)) + + // create a tooltip + const tooltip = d3.select('body').append('div').attr('id', 'chart').attr('class', 'tooltip') + + // tooltip events + const mouseover = (d) => { + tooltip.style('opacity', 0.8) + d3.select(this).style('opacity', 0.5) + } + const mousemove = (event, d) => { + const formater = d3.format(',.2f') + tooltip + .html(formater(d.data[1] - d.data[0]) + ' AUD') + .style('top', event.pageY - 10 + 'px') + .style('left', event.pageX + 10 + 'px') + } + const mouseleave = (d) => { + tooltip.style('opacity', 0) + d3.select(this).style('opacity', 1) + } + + // create bars + svg.append('g') + .selectAll('g') + .data(indexedData) + .join('g') + .selectAll('rect') + .data((d) => d) + .join('rect') + .attr('x', (d) => xScale(d.data[0])) + .attr('y', (d) => yScale(d.data.data.field)) + .attr('width', (d) => xScale(d.data[1]) - xScale(d.data[0])) + .attr('height', yScale.bandwidth()) + .attr('fill', (d) => color_fnc(d)) + + svg.append('g') + .selectAll('g') + .data(indexedData) + .join('g') + .attr('fill', (d) => color(d)) + .selectAll('rect') + .data((d) => d) + .join('rect') + .attr('x', (d) => xScale(d.data[0])) + .attr('y', (d) => yScale(d.data.data.field)) + .attr('width', (d) => xScale(d.data[1]) - xScale(d.data[0])) + .attr('height', yScale.bandwidth()) + .on('mouseover', mouseover) + .on('mousemove', mousemove) + .on('mouseleave', mouseleave) + + // set title + svg.append('text') + .attr('class', 'chart-title') + .style('font-size', '18px') + .attr('x', 0) + .attr('y', -margin.top / 1.7) + .attr('text-anchor', 'start') + .text(title) + + // set Y axis label + svg.append('text') + .attr('class', 'chart-label') + .style('font-size', '18px') + .attr('x', width / 2) + .attr('y', height + margin.bottom) + .attr('text-anchor', 'middle') + .text('AUD') + + if (showLegend) { + // Legend + for (let i = 0; i < labels.length; i++) { + svg.append('rect') + .attr('x', 0 + i * 150) + .attr('y', -(margin.top / 2.5)) + .attr('width', 15) + .attr('height', 15) + .style('fill', `url(#pattern${i})`) + + if (i === 0) { + // add background + svg.append('rect') + .attr('x', 0 + i * 150) + .attr('y', -(margin.top / 2.5)) + .attr('width', 15) + .attr('height', 15) + .style('fill', 'grey') + } + + svg.append('text') + .attr('class', 'legend') + .attr('x', 20 + i * 150) + .attr('y', -(margin.top / 3.8)) + .text(labels[i]) + } + } + } + return
+} + +export { HorizontalStackedBarChart } diff --git a/web/src/shared/components/Header/NavBar.tsx b/web/src/shared/components/Header/NavBar.tsx index 73c378442..2b64d9ba3 100644 --- a/web/src/shared/components/Header/NavBar.tsx +++ b/web/src/shared/components/Header/NavBar.tsx @@ -47,6 +47,11 @@ const billingPages = { url: '/billing/costByBatch', icon: , }, + { + title: 'Cost By Category', + url: '/billing/costByCategory', + icon: , + }, { title: 'Seqr Prop Map', url: '/billing/seqrPropMap', From d97b5b7dcfa8ba304f2bd5de6d83f71124d3732b Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Mon, 18 Dec 2023 10:05:11 +1100 Subject: [PATCH 06/34] Stacked Bars Chart with option to accumulate data. (#634) * Implemented Stacked bars with option to accumulate data. * Added budget bar to billing horizontal bar chart, added background color for the billing table to reflect the chart colours. * Added simple prediction of billing stacked bar chart. --- db/python/layers/billing_db.py | 1 + models/models/billing.py | 2 + web/src/index.css | 27 +- .../pages/billing/BillingCostByCategory.tsx | 166 ++++++---- web/src/pages/billing/BillingCostByTime.tsx | 10 +- .../pages/billing/BillingInvoiceMonthCost.tsx | 29 +- .../billing/components/CostByTimeBarChart.tsx | 37 +++ .../billing/components/FieldSelector.tsx | 6 +- .../Graphs/HorizontalStackedBarChart.tsx | 68 ++++- .../Graphs/StackedAreaByDateChart.tsx | 6 +- .../components/Graphs/StackedBarChart.tsx | 287 ++++++++++++++++++ 11 files changed, 558 insertions(+), 81 deletions(-) create mode 100644 web/src/pages/billing/components/CostByTimeBarChart.tsx create mode 100644 web/src/shared/components/Graphs/StackedBarChart.tsx diff --git a/db/python/layers/billing_db.py b/db/python/layers/billing_db.py index e36c7f540..dbf9818a0 100644 --- a/db/python/layers/billing_db.py +++ b/db/python/layers/billing_db.py @@ -821,6 +821,7 @@ async def append_running_cost_records( 'budget_spent': 100 * monthly / budget_monthly if budget_monthly else None, + 'budget': budget_monthly, 'last_loaded_day': last_loaded_day, } ) diff --git a/models/models/billing.py b/models/models/billing.py index bd8bab129..861317529 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -297,6 +297,7 @@ class BillingCostBudgetRecord(SMBase): storage_daily: float | None details: list[BillingCostDetailsRecord] | None budget_spent: float | None + budget: float | None last_loaded_day: str | None @@ -315,5 +316,6 @@ def from_json(record): BillingCostDetailsRecord.from_json(row) for row in record.get('details') ], budget_spent=record.get('budget_spent'), + budget=record.get('budget'), last_loaded_day=record.get('last_loaded_day'), ) diff --git a/web/src/index.css b/web/src/index.css index 28b7b36ca..c0228c110 100644 --- a/web/src/index.css +++ b/web/src/index.css @@ -214,7 +214,7 @@ html[data-theme='dark-mode'] .ui.table { } .donut-chart { - margin-top: 20px; + padding-top: 20px; } /* missing styles regarding dark/light theme */ @@ -247,3 +247,28 @@ html[data-theme='dark-mode'] .ui.table { background-color: var(--color-bg-card); color: var(--color-text-primary); } + +.chart-card { + background-color: var(--color-bg-card); + color: var(--color-text-primary) !important; +} + +.chart-label { + color: var(--color-text-primary) !important; +} + +.hb-chart-grid { + color: var(--color-divider) !important; +} + +.billing-over-budget { + background-color: rgba(128, 0, 0, 0.2) !important; +} + +.billing-half-budget { + background-color: rgba(255, 165, 0, 0.2) !important; +} + +.billing-under-budget { + background-color: rgba(0, 128, 0, 0.2) !important; +} diff --git a/web/src/pages/billing/BillingCostByCategory.tsx b/web/src/pages/billing/BillingCostByCategory.tsx index 096831fc1..1c2daed1c 100644 --- a/web/src/pages/billing/BillingCostByCategory.tsx +++ b/web/src/pages/billing/BillingCostByCategory.tsx @@ -1,27 +1,32 @@ import * as React from 'react' import { useLocation, useNavigate, useSearchParams } from 'react-router-dom' -import { Button, Card, Grid, Input, Message, Table as SUITable } from 'semantic-ui-react' -import CostByTimeChart from './components/CostByTimeChart' +import { Button, Card, Checkbox, Grid, Input, Message } from 'semantic-ui-react' +import CostByTimeBarChart from './components/CostByTimeBarChart' import FieldSelector from './components/FieldSelector' import { BillingApi, BillingColumn, BillingTotalCostQueryModel, BillingTotalCostRecord, + BillingTimePeriods, } from '../../sm-api' import { convertFieldName } from '../../shared/utilities/fieldName' import { IStackedAreaByDateChartData } from '../../shared/components/Graphs/StackedAreaByDateChart' -import BillingCostByTimeTable from './components/BillingCostByTimeTable' -import { BarChart, IData } from '../../shared/components/Graphs/BarChart' -import { DonutChart } from '../../shared/components/Graphs/DonutChart' const BillingCostByCategory: React.FunctionComponent = () => { const now = new Date() const [searchParams] = useSearchParams() + const inputGroupBy: string | undefined = searchParams.get('groupBy') ?? undefined + const fixedGroupBy: BillingColumn = inputGroupBy + ? (inputGroupBy as BillingColumn) + : BillingColumn.GcpProject + + const inputSelectedGroup: string | undefined = searchParams.get('group') ?? undefined const inputCostCategory: string | undefined = searchParams.get('costCategory') ?? undefined + const inputPeriod: string | undefined = searchParams.get('period') ?? BillingTimePeriods.Month const [start, setStart] = React.useState( searchParams.get('start') ?? @@ -34,54 +39,69 @@ const BillingCostByCategory: React.FunctionComponent = () => { .toString() .padStart(2, '0')}` ) - const [selectedData, setCostCategory] = React.useState(inputCostCategory) - const [selectedPeriod, setPeriod] = React.useState(undefined) + const [selectedGroup, setSelectedGroup] = React.useState(inputSelectedGroup) + const [selectedCostCategory, setCostCategory] = React.useState( + inputCostCategory + ) - // Max Aggregated Data Points, rest will be aggregated into "Rest" - const maxDataPoints = 7 + const [selectedPeriod, setPeriod] = React.useState(inputPeriod) // Data loading const [isLoading, setIsLoading] = React.useState(true) const [error, setError] = React.useState() - const [groups, setGroups] = React.useState([]) const [data, setData] = React.useState([]) - const [aggregatedData, setAggregatedData] = React.useState([]) + + const [groupBy, setGroupBy] = React.useState( + fixedGroupBy ?? BillingColumn.GcpProject + ) + + const [accumulate, setAccumulate] = React.useState(true) // use navigate and update url params const location = useLocation() const navigate = useNavigate() const updateNav = ( + grpBy: BillingColumn, + grp: string | undefined, category: string | undefined, period: string | undefined, - start: string, - end: string + st: string, + ed: string ) => { let url = `${location.pathname}` + url += '?' + let params: string[] = [] + if (grpBy) params.push(`groupBy=${grpBy}`) + if (grp) params.push(`group=${grp}`) + if (category) params.push(`costCategory=${category}`) + if (period) params.push(`period=${period}`) + if (st) params.push(`start=${st}`) + if (ed) params.push(`end=${ed}`) + url += params.join('&') + navigate(url) + } - if (category && period) { - url += '?' - - let params: string[] = [] - if (category) params.push(`costCategory=${category}`) - if (period) params.push(`period=${period}`) - if (start) params.push(`start=${start}`) - if (end) params.push(`end=${end}`) + const onGroupBySelect = (event: any, recs: any) => { + setGroupBy(recs.value) + setSelectedGroup(undefined) + updateNav(recs.value, undefined, selectedCostCategory, selectedPeriod, start, end) + } - url += params.join('&') - navigate(url) - } + const onSelectGroup = (event: any, recs: any) => { + setSelectedGroup(recs.value) + updateNav(groupBy, recs.value, selectedCostCategory, selectedPeriod, start, end) } - const onSelect = (event: any, recs: any) => { + const onSelectCategory = (event: any, recs: any) => { setCostCategory(recs.value) - updateNav(recs.value, selectedPeriod, start, end) + updateNav(groupBy, selectedGroup, recs.value, selectedPeriod, start, end) } const onSelectPeriod = (event: any, recs: any) => { setPeriod(recs.value) - updateNav(selectedData, recs.value, start, end) + updateNav(groupBy, selectedGroup, selectedCostCategory, recs.value, start, end) } const changeDate = (name: string, value: string) => { @@ -91,7 +111,14 @@ const BillingCostByCategory: React.FunctionComponent = () => { if (name === 'end') end_update = value setStart(start_update) setEnd(end_update) - updateNav(selectedData, selectedPeriod, start_update, end_update) + updateNav( + groupBy, + selectedGroup, + selectedCostCategory, + selectedPeriod, + start_update, + end_update + ) } const getData = (query: BillingTotalCostQueryModel) => { @@ -141,10 +168,6 @@ const BillingCostByCategory: React.FunctionComponent = () => { }, {} ) - const no_undefined: string[] = rec_grps.filter( - (item): item is string => item !== undefined - ) - setGroups(no_undefined) setData( Object.keys(records).map((key) => ({ date: new Date(key), @@ -156,24 +179,27 @@ const BillingCostByCategory: React.FunctionComponent = () => { } React.useEffect(() => { - if ( - selectedData !== undefined && - selectedData !== '' && - selectedData !== null && - selectedPeriod !== undefined && - selectedPeriod !== '' && - selectedPeriod !== null - ) { + // if selectedCostCategory is all + const selFilters: { [key: string]: string } = {} + + if (groupBy && selectedGroup && !selectedGroup.startsWith('All ')) { + selFilters[groupBy] = selectedGroup + } + if (selectedCostCategory && !selectedCostCategory.startsWith('All ')) { + selFilters.cost_category = selectedCostCategory + } + + if (selectedPeriod !== undefined && selectedPeriod !== '' && selectedPeriod !== null) { getData({ fields: [BillingColumn.Sku], start_date: start, end_date: end, - filters: { cost_category: selectedData }, + filters: selFilters, order_by: { day: false }, time_periods: selectedPeriod, }) } - }, [start, end, selectedData, selectedPeriod]) + }, [groupBy, selectedGroup, selectedCostCategory, selectedPeriod, start, end]) if (error) { return ( @@ -198,20 +224,41 @@ const BillingCostByCategory: React.FunctionComponent = () => { Billing Cost By Category - - + + + + + + + + + + - + { autoSelect={false} /> - - { + + + + setAccumulate(!accumulate)} + /> + + + - diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx index 0c6c2f337..b698a78c4 100644 --- a/web/src/pages/billing/BillingCostByTime.tsx +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -228,7 +228,7 @@ const BillingCostByTime: React.FunctionComponent = () => { Billing Cost By Time - + { - + { - + { /> - + { - + { ) - if (isLoading) - return ( -
- -

- This query takes a while... -

-
- ) + const rowColor = (p: BillingCostBudgetRecord) => { + if (p.budget_spent === undefined || p.budget_spent === null) { + return '' + } + if (p.budget_spent > 90) { + return 'billing-over-budget' + } + if (p.budget_spent > 50) { + return 'billing-half-budget' + } + return 'billing-under-budget' + } const handleSort = (clickedColumn: string) => { if (sort.column !== clickedColumn) { @@ -181,7 +184,7 @@ const BillingCurrentCost = () => { <>

Billing By Invoice Month

- + { if (String(invoiceMonth) === String(thisMonth)) { return ( - + { showLegend={false} /> - + { sort.direction === 'ascending' ? ['asc'] : ['desc'] ).map((p) => ( - + = ({ + accumulate, + isLoading, + data, +}) => { + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + return ( + <> + + + ) +} + +export default CostByTimeBarChart diff --git a/web/src/pages/billing/components/FieldSelector.tsx b/web/src/pages/billing/components/FieldSelector.tsx index 83e256c9b..3b20b1294 100644 --- a/web/src/pages/billing/components/FieldSelector.tsx +++ b/web/src/pages/billing/components/FieldSelector.tsx @@ -26,7 +26,11 @@ const FieldSelector: React.FunctionComponent = ({ const extendRecords = (recs: string[]) => { if (includeAll) { - return [`All ${convertFieldName(fieldName)}s`, ...recs] + let fname = convertFieldName(fieldName) + if (fname.endsWith('y')) { + fname = fname.substring(0, fname.length - 1) + 'ie' + } + return [`All ${fname}s`, ...recs] } return recs } diff --git a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx index 82849d792..ed84f9793 100644 --- a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx +++ b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx @@ -2,6 +2,7 @@ import React from 'react' import * as d3 from 'd3' import LoadingDucks from '../LoadingDucks/LoadingDucks' import { BillingCostBudgetRecord } from '../../../sm-api' +import { grey } from '@mui/material/colors' interface HorizontalStackedBarChartProps { data: BillingCostBudgetRecord[] @@ -77,7 +78,7 @@ const HorizontalStackedBarChart: React.FC = ({ } // prepare data - const maxTotalSeries = Math.max(...data.map((item) => item[total_series])) + let maxTotalSeries = Math.max(...data.map((item) => item[total_series])) const typeKeys = data.map((d) => d.field) data.sort((a, b) => b[sorted_by] - a[sorted_by]) @@ -92,6 +93,18 @@ const HorizontalStackedBarChart: React.FC = ({ const indexedData = stackedData.map((innerArray, outerIdx) => innerArray.map((d, innerIdx) => ({ outerIdx, innerIdx, data: d })) ) + const budgetData = data.reduce((acc, d) => { + acc[d.field] = d.budget + return acc + }, {}) + + const maxBudget = Math.max(...data.map((item) => item.budget)) + + if (showLegend) { + if (maxBudget > maxTotalSeries) { + maxTotalSeries = maxBudget * 1.01 + } + } // construct svg const svg = d3 @@ -178,13 +191,13 @@ const HorizontalStackedBarChart: React.FC = ({ } // set vertical grid line - const GridLine = () => { - return d3.axisBottom().scale(xScale) - } + const GridLine = () => d3.axisBottom().scale(xScale) svg.append('g') - .attr('class', 'grid') + .attr('class', 'hb-chart-grid') .call(GridLine().tickSize(height, 0, 0).tickFormat('').ticks(8)) + .selectAll('line') + .style('stroke-dasharray', '5,5') // create a tooltip const tooltip = d3.select('body').append('div').attr('id', 'chart').attr('class', 'tooltip') @@ -236,6 +249,35 @@ const HorizontalStackedBarChart: React.FC = ({ .on('mousemove', mousemove) .on('mouseleave', mouseleave) + // create bidgetn line + const budgetFnc = (d) => { + if (showLegend) { + return xScale(budgetData[d.data.data.field]) + } + return 0 + } + + const budgetColor = (d) => { + const budgetVal = budgetData[d.data.data.field] + if (showLegend && budgetVal !== null && budgetVal !== undefined) { + return 'darkcyan' + } + return 'rgba(0, 0, 0, 0)' + } + + svg.append('g') + .selectAll('g') + .data(indexedData) + .join('g') + .selectAll('rect') + .data((d) => d) + .join('rect') + .attr('x', (d) => budgetFnc(d)) + .attr('y', (d) => yScale(d.data.data.field) - 5) + .attr('width', (d) => 3) + .attr('height', yScale.bandwidth() + 10) + .attr('fill', (d) => budgetColor(d)) + // set title svg.append('text') .attr('class', 'chart-title') @@ -280,6 +322,22 @@ const HorizontalStackedBarChart: React.FC = ({ .attr('y', -(margin.top / 3.8)) .text(labels[i]) } + + // add budget bar if defined + if (maxBudget !== undefined && maxBudget !== null && maxBudget > 0) { + svg.append('rect') + .attr('x', labels.length * 150) + .attr('y', -(margin.top / 2.5)) + .attr('width', 3) + .attr('height', 15) + .style('fill', 'darkcyan') + + svg.append('text') + .attr('class', 'legend') + .attr('x', 20 + labels.length * 150) + .attr('y', -(margin.top / 3.8)) + .text('Budget') + } } } return
diff --git a/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx b/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx index 08ebf3d34..aa6d731f8 100644 --- a/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx +++ b/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx @@ -76,7 +76,7 @@ export const StackedAreaByDateChart: React.FC = ({ colors, }) => { if (!data || data.length === 0) { - return + return No Data } const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow @@ -134,6 +134,10 @@ export const StackedAreaByDateChart: React.FC = ({ .domain(extent(data, (d) => d.date)) // date is a string, will this take a date object? Yes :) .range([0, width - margin.left - margin.right]) + if (stackedData.length === 0) { + return No Data + } + // use last stackData value to calculate max Y axis point const diffX = stackedData[stackedData.length - 1].flatMap((val) => val[1]) diff --git a/web/src/shared/components/Graphs/StackedBarChart.tsx b/web/src/shared/components/Graphs/StackedBarChart.tsx new file mode 100644 index 000000000..901e26fa9 --- /dev/null +++ b/web/src/shared/components/Graphs/StackedBarChart.tsx @@ -0,0 +1,287 @@ +import * as d3 from 'd3' +import _ from 'lodash' +import React from 'react' + +export interface IStackedBarChartData { + date: Date + values: { [key: string]: number } +} + +interface IStackedBarChartProps { + data?: IStackedBarChartData[] + accumulate: boolean +} + +export const StackedBarChart: React.FC = ({ data, accumulate }) => { + const colorFunc: (t: number) => string | undefined = d3.interpolateRainbow + const margin = { top: 50, right: 50, bottom: 100, left: 100 } + const height = 800 - margin.top - margin.bottom + const marginLegend = 10 + + const containerDivRef = React.useRef() + const [width, setWidth] = React.useState(768) + + React.useEffect(() => { + function updateWindowWidth() { + setWidth(containerDivRef.current?.clientWidth || 768) + } + if (containerDivRef.current) { + updateWindowWidth() + } + window.addEventListener('resize', updateWindowWidth) + + return () => { + window.removeEventListener('resize', updateWindowWidth) + } + }, []) + + if (!data || data.length === 0) { + return No Data + } + + const contDiv = containerDivRef.current + if (contDiv) { + // reset svg + contDiv.innerHTML = '' + } + + const series = Object.keys(data[0].values) + const seriesLength = series.length + + // Get the last date in the data array + const lastDate = data[data.length - 1].date + + // Create 3 new dates + // TODO make it as custom props + const newDates = d3 + .range(1, 4) + .map((day) => new Date(lastDate.getTime() + day * 24 * 60 * 60 * 1000)) + + // Interpolate the values for the new dates + const newValues = newDates.map((date, i) => { + if (i < data.length) { + const prevData = data[data.length - 1 - i] + const nextData = data[data.length - 1 - i] + return { + date, + values: series.reduce((values, key) => { + // TODO revisit how we extrapolate new data + const interpolator = d3.interpolate(prevData.values[key], nextData.values[key]) + values[key] = interpolator((i + 1) / 6) + return values + }, {}), + } + } + }) + + // Add the new values to the data array + let extData = data.concat(newValues) + extData = extData.filter((item) => item !== undefined) + + // X - values + const x_vals = extData.map((d) => d.date.toISOString().substring(0, 10)) + + // prepare stacked data + let stackedData + if (accumulate) { + const accumulatedData = extData.reduce((acc: any[], curr) => { + const last = acc[acc.length - 1] + const accumulated = { + date: curr.date, + values: Object.keys(curr.values).reduce( + (accValues: Record, key) => { + return { + ...accValues, + [key]: (last ? last.values[key] : 0) + curr.values[key], + } + }, + {} + ), + } + return [...acc, accumulated] + }, []) + + stackedData = d3 + .stack() + .offset(d3.stackOffsetNone) + .keys(series)(accumulatedData.map((d) => ({ date: d.date, ...d.values }))) + .map((ser, i) => ser.map((d) => ({ ...d, key: series[i] }))) + } else { + stackedData = d3 + .stack() + .offset(d3.stackOffsetNone) + .keys(series)(extData.map((d) => ({ date: d.date, ...d.values }))) + .map((ser, i) => ser.map((d) => ({ ...d, key: series[i] }))) + } + + // find max values for the X axes + const y1Max = d3.max(stackedData, (y) => d3.max(y, (d) => d[1])) + + // tooltip events + const tooltip = d3.select('body').append('div').attr('id', 'chart').attr('class', 'tooltip') + + const mouseover = (d) => { + tooltip.style('opacity', 0.8) + d3.select(this).style('opacity', 0.5) + } + const mousemove = (event, d) => { + const formater = d3.format(',.2f') + tooltip + .html(d.key + ' ' + formater(d[1] - d[0]) + ' AUD') + .style('top', event.pageY - 10 + 'px') + .style('left', event.pageX + 10 + 'px') + } + const mouseleave = (d) => { + tooltip.style('opacity', 0) + d3.select(this).style('opacity', 1) + } + + const x = d3 + .scaleBand() + .domain(d3.range(x_vals.length)) + .rangeRound([margin.left, width - margin.right]) + .padding(0.08) + + // create root svg element + const svg = d3 + .select(contDiv) + .append('svg') + .attr('viewBox', [0, 0, width, height]) + .attr('height', height) + .attr('style', 'max-width: 100%; height: auto;') + + // calculate opacity (for new dates) + const opacity = 0.3 + const calcOpacity = (d) => { + const idx = series.indexOf(d.key) + const color = d3.color(colorFunc(idx / seriesLength)) + if (newDates.includes(d.data.date)) { + return d3.rgb(color.r, color.g, color.b, opacity) + } + + return color + } + + // bars + const rect = svg + .selectAll('g') + .data(stackedData) + .join('g') + .attr('fill', (d, i) => colorFunc(i / seriesLength)) + .attr('id', (d, i) => `path${i}`) + .selectAll('rect') + .data((d) => d) + .join('rect') + .attr('x', (d, i) => x(i)) + .attr('y', height - margin.bottom) + .attr('width', x.bandwidth()) + .attr('height', 0) + .attr('fill', (d) => calcOpacity(d)) + .on('mouseover', mouseover) + .on('mousemove', mousemove) + .on('mouseleave', mouseleave) + + // x-axis & labels + const formatX = (val: number): string => x_vals[val] + + const x_labels = svg + .append('g') + .attr('transform', `translate(0,${height - margin.bottom})`) + .call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)) + + if (x_vals.length > 10) { + // rotate x labels, if too many + x_labels + .selectAll('text') + .attr('transform', 'rotate(-90)') + .attr('text-anchor', 'end') + .attr('dy', '-0.55em') + .attr('dx', '-1em') + } + + // y-axis & labels + const y = d3 + .scaleLinear() + .domain([0, y1Max]) + .range([height - margin.bottom, margin.top]) + + const y_axis = d3.axisLeft().scale(y).ticks(10, '$.0f') + svg.append('g').attr('transform', `translate(${margin.left}, 0)`).call(y_axis) + + // animate bars + rect.transition() + .duration(200) + .delay((d, i) => i * 5) + .attr('y', (d) => y(d[1])) + .attr('height', (d) => y(d[0]) - y(d[1])) + .transition() + .attr('x', (d, i) => x(i)) + .attr('width', x.bandwidth()) + + // on Hover + const onHoverOver = (tg: HTMLElement, v) => { + d3.selectAll(`#path${v}`).style('fill-opacity', 0.5) + d3.select(tg).selectAll('circle').style('fill-opacity', 0.5) + d3.select(tg).selectAll('text').attr('font-weight', 'bold') + } + + const onHoverOut = (tg: HTMLElement, v) => { + d3.selectAll(`#path${v}`).style('fill-opacity', 1) + d3.select(tg).selectAll('circle').style('fill-opacity', 1) + d3.select(tg).selectAll('text').attr('font-weight', 'normal') + } + + // add legend + const svgLegend = d3 + .select(contDiv) + .append('svg') + .attr('height', height) + .attr('viewBox', `0 0 450 ${height}`) + + svgLegend + .selectAll('g.legend') + .attr('transform', `translate(0, ${margin.top})`) + .data(series) + .enter() + .append('g') + .attr('id', (d, i) => `legend${i}`) + .attr('transform', (d, i) => `translate(${marginLegend},${marginLegend + i * 20})`) + .each(function (d, i) { + d3.select(this) + .append('circle') + .attr('r', 8) + .attr('fill', (d) => colorFunc(i / seriesLength)) + d3.select(this) + .append('text') + .attr('text-anchor', 'start') + .attr('x', 10) + .attr('y', 0) + .attr('dy', '0.5em') + .text(d) + .attr('font-size', '0.8em') + d3.select(this) + .on('mouseover', (event, v) => { + const element = d3.select(`#legend${i}`) + onHoverOver(element.node(), i) + }) + .on('mouseout', (event, v) => { + const element = d3.select(`#legend${i}`) + onHoverOut(element.node(), i) + }) + }) + + // set all text to 15px + svg.selectAll('text').style('font-size', '20px') + + // Simple responsive, move legend to bottom if mobile + if (width < 1000) { + // if mobile / tablet size + svgLegend.attr('width', '100%') + svg.attr('width', '100%') + } else { + svgLegend.attr('width', '30%') + svg.attr('width', '70%') + } + + return
+} From 80dd3c1a7ffa91cb28b0794b25a10f50d140d44d Mon Sep 17 00:00:00 2001 From: Sabrina Yan <9669990+violetbrina@users.noreply.github.com> Date: Thu, 21 Dec 2023 17:01:59 +1100 Subject: [PATCH 07/34] Billing hail batch layout (#633) * Added simple Total Cost By Batch Page. * Removing debug prints. * Fixed autoselect day format. * Fixing day format for autoselect (missing leading 0) * Added first draft of billing page to show detail SKU per selected cost category over selected time periods (day, week, month or invoice month) * Small fix for BillingCostByBatch page, disable search if searchBy is empty or < 6 chars. * New: Billing API GET namespaces, added namespace to allowed fields for total cost. * Implemented HorizontalStackedBarChart, updated Billing By Invoice Month page to enable toggle between chart and table view. * ADD: Cost by Analysis page * ADD: add start of Analysis grid * ADD: add start of Analysis grid * FIX: table fixes for the HailBatchGrid * API: api changes to enable query of the raw table * API: fixed and working with updated get_total_cost endpoint * API: fix typing of get_total_cost (default return is now a list[dict] and can be converted in the layer/route to a specific output type * API: add endpoint to get costs by batch_id * API: done * IN PROGRESS: modifying Cost By Analysis to use new endpoints * IN PROGRESS: changes to Cost By Analysis, linking with backend API. * IN PROGRESS: changes to Cost By Analysis, grid grouping by ar/batch/job. * NEW: finalising Cost By Analysis page * ADD: durations to Cost By Analysis page --------- Co-authored-by: Milo Hyben --- api/routes/billing.py | 35 +- api/settings.py | 5 +- db/python/layers/billing_db.py | 315 +++++++++--- db/python/layers/billing_layer.py | 98 +++- models/models/billing.py | 141 +++++- requirements.txt | 2 + web/src/Routes.tsx | 10 +- .../pages/billing/BillingCostByAnalysis.tsx | 213 ++++++++ web/src/pages/billing/BillingCostByTime.tsx | 7 +- .../billing/components/HailBatchGrid.tsx | 463 ++++++++++++++++++ web/src/pages/billing/index.ts | 1 + web/src/shared/components/Graphs/BarChart.tsx | 2 +- .../shared/components/Graphs/DonutChart.tsx | 2 +- .../Graphs/HorizontalStackedBarChart.tsx | 2 +- web/src/shared/components/Header/NavBar.tsx | 5 + 15 files changed, 1198 insertions(+), 103 deletions(-) create mode 100644 web/src/pages/billing/BillingCostByAnalysis.tsx create mode 100644 web/src/pages/billing/components/HailBatchGrid.tsx diff --git a/api/routes/billing.py b/api/routes/billing.py index f80e3199a..3c409dcb8 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -10,6 +10,7 @@ from models.models.billing import ( BillingColumn, BillingCostBudgetRecord, + BillingHailBatchCostRecord, BillingQueryModel, BillingRowRecord, BillingTotalCostQueryModel, @@ -297,6 +298,38 @@ async def query_billing( return records +@router.get( + '/cost-by-ar-guid/{ar_guid}', + response_model=BillingHailBatchCostRecord, + operation_id='costByArGuid', +) +@alru_cache(maxsize=10, ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_cost_by_ar_guid( + author: str = get_author, + ar_guid: str = None, +) -> BillingHailBatchCostRecord: + """Get Hail Batch costs by AR GUID""" + billing_layer = initialise_billing_layer(author) + records = await billing_layer.get_cost_by_ar_guid(ar_guid) + return records + + +@router.get( + '/cost-by-batch-id/{batch_id}', + response_model=BillingHailBatchCostRecord, + operation_id='costByBatchId', +) +@alru_cache(maxsize=10, ttl=BILLING_CACHE_RESPONSE_TTL) +async def get_cost_by_batch_id( + author: str = get_author, + batch_id: str = None, +) -> BillingHailBatchCostRecord: + """Get Hail Batch costs by Batch ID""" + billing_layer = initialise_billing_layer(author) + records = await billing_layer.get_cost_by_batch_id(batch_id) + return records + + @router.post( '/total-cost', response_model=list[BillingTotalCostRecord], @@ -503,7 +536,7 @@ async def get_total_cost( """ billing_layer = initialise_billing_layer(author) records = await billing_layer.get_total_cost(query) - return records + return [BillingTotalCostRecord.from_json(record) for record in records] @router.get( diff --git a/api/settings.py b/api/settings.py index ab92c717c..98d11062f 100644 --- a/api/settings.py +++ b/api/settings.py @@ -42,10 +42,11 @@ BQ_AGGREG_EXT_VIEW = os.getenv('SM_GCP_BQ_AGGREG_EXT_VIEW') BQ_BUDGET_VIEW = os.getenv('SM_GCP_BQ_BUDGET_VIEW') BQ_GCP_BILLING_VIEW = os.getenv('SM_GCP_BQ_BILLING_VIEW') +BQ_BATCHES_VIEW = os.getenv('SM_GCP_BQ_BATCHES_VIEW') # This is to optimise BQ queries, DEV table has data only for Mar 2023 -BQ_DAYS_BACK_OPTIMAL = 30 # Look back 30 days for optimal query -BILLING_CACHE_RESPONSE_TTL = 3600 # 1 Hour +BQ_DAYS_BACK_OPTIMAL = 30 # Look back 30 days for optimal query +BILLING_CACHE_RESPONSE_TTL = 3600 # 1 Hour def get_default_user() -> str | None: diff --git a/db/python/layers/billing_db.py b/db/python/layers/billing_db.py index dbf9818a0..4a38208f9 100644 --- a/db/python/layers/billing_db.py +++ b/db/python/layers/billing_db.py @@ -1,7 +1,8 @@ +# pylint: disable=too-many-lines import re from collections import Counter, defaultdict -from datetime import datetime -from typing import Any, Tuple +from datetime import datetime, timedelta +from typing import Any from google.cloud import bigquery @@ -9,6 +10,7 @@ BQ_AGGREG_EXT_VIEW, BQ_AGGREG_RAW, BQ_AGGREG_VIEW, + BQ_BATCHES_VIEW, BQ_BUDGET_VIEW, BQ_DAYS_BACK_OPTIMAL, BQ_GCP_BILLING_VIEW, @@ -22,8 +24,8 @@ BillingRowRecord, BillingTimePeriods, BillingTotalCostQueryModel, - BillingTotalCostRecord, ) +from models.models.billing import BillingSource BQ_LABELS = {'source': 'metamist-api'} @@ -33,25 +35,28 @@ def abbrev_cost_category(cost_category: str) -> str: return 'S' if cost_category == 'Cloud Storage' else 'C' -def prepare_time_periods(query: BillingTotalCostQueryModel) -> Tuple[str, str, str]: +def prepare_time_periods( + query: BillingTotalCostQueryModel, +) -> tuple[str, str, str]: """Prepare Time periods grouping and parsing formulas""" + time_column = query.time_column.value or 'day' day_parse_formula = '' day_field = '' day_grp = 'day, ' # Based on specified time period, add the corresponding column if query.time_periods == BillingTimePeriods.DAY: - day_field = 'day, ' - day_parse_formula = 'day, ' + day_field = f'FORMAT_DATE("%Y-%m-%d", {time_column}) as day, ' + day_parse_formula = f'PARSE_DATE("%Y-%m-%d", day) as day, ' elif query.time_periods == BillingTimePeriods.WEEK: - day_field = 'FORMAT_DATE("%Y%W", day) as day, ' - day_parse_formula = 'PARSE_DATE("%Y%W", day) as day, ' + day_field = f'FORMAT_DATE("%Y%W", {time_column}) as day, ' + day_parse_formula = f'PARSE_DATE("%Y%W", day) as day, ' elif query.time_periods == BillingTimePeriods.MONTH: - day_field = 'FORMAT_DATE("%Y%m", day) as day, ' - day_parse_formula = 'PARSE_DATE("%Y%m", day) as day, ' + day_field = f'FORMAT_DATE("%Y%m", {time_column}) as day, ' + day_parse_formula = f'PARSE_DATE("%Y%m", day) as day, ' elif query.time_periods == BillingTimePeriods.INVOICE_MONTH: day_field = 'invoice_month as day, ' - day_parse_formula = 'PARSE_DATE("%Y%m", day) as day, ' + day_parse_formula = f'PARSE_DATE("%Y%m", day) as day, ' return day_field, day_grp, day_parse_formula @@ -283,7 +288,7 @@ async def query( ) -> list[BillingRowRecord] | None: """Get Billing record from BQ""" - # TODO: THis function is not going to be used most likely + # TODO: This function is not going to be used most likely # get_total_cost will replace it # cost of this BQ is 30MB on DEV, @@ -347,10 +352,129 @@ async def query( raise ValueError('No record found') + def _prepare_filter_str(self, query: BillingTotalCostQueryModel, view_to_use: str): + """Prepare filter string""" + and_filters = [] + query_parameters = [] + time_column = query.time_column or 'day' + + and_filters.append(f'{time_column} >= TIMESTAMP(@start_date)') + and_filters.append(f'{time_column} <= TIMESTAMP(@end_date)') + query_parameters.extend( + [ + bigquery.ScalarQueryParameter('start_date', 'STRING', query.start_date), + bigquery.ScalarQueryParameter('end_date', 'STRING', query.end_date), + ] + ) + + if query.source == BillingSource.GCP_BILLING: + # BQ_GCP_BILLING_VIEW view is partitioned by different field + # BQ has limitation, materialized view can only by partition by base table + # partition or its subset, in our case _PARTITIONTIME + # (part_time field in the view) + # We are querying by day, + # which can be up to a week behind regarding _PARTITIONTIME + and_filters.append('part_time >= TIMESTAMP(@start_date)') + and_filters.append( + 'part_time <= TIMESTAMP_ADD(TIMESTAMP(@end_date), INTERVAL 7 DAY)' + ) + + def construct_filter( + name: str, value: Any, is_label: bool = False + ) -> tuple[str, bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter]: + compare = '=' + b1, b2 = '', '' + param_type = bigquery.ScalarQueryParameter + key = name.replace('-', '_') + + if isinstance(value, list): + compare = 'IN' + b1, b2 = 'UNNEST(', ')' + param_type = bigquery.ArrayQueryParameter + + if is_label: + name = f'getLabelValue(labels, "{name}")' + + return ( + f'{name} {compare} {b1}@{key}{b2}', + param_type(key, 'STRING', value), + ) + + # No additional filters + filters = [] + if not query.filters: + filter_str = 'WHERE ' + ' AND '.join(and_filters) if and_filters else '' + return filter_str, query_parameters, view_to_use + + # Add each of the filters in the query + for filter_key, filter_value in query.filters.items(): + col_name = str(filter_key.value) + if col_name in BillingColumn.extended_cols(): + # if one of the extended columns is needed, + # the view has to be extended + view_to_use = BQ_AGGREG_EXT_VIEW + + if not isinstance(filter_value, dict): + filter_, query_param = construct_filter(col_name, filter_value) + filters.append(filter_) + query_parameters.append(query_param) + else: + for label_key, label_value in filter_value.items(): + filter_, query_param = construct_filter( + label_key, label_value, True + ) + filters.append(filter_) + query_parameters.append(query_param) + + if query.filters_op == 'OR': + if filters: + and_filters.append('(' + ' OR '.join(filters) + ')') + else: + # if not specified, default to AND + and_filters.extend(filters) + + filter_str = 'WHERE ' + ' AND '.join(and_filters) if and_filters else '' + return filter_str, query_parameters, view_to_use + + def convert_output(self, query_job_result): + """Convert query result to json""" + if not query_job_result or query_job_result.result().total_rows == 0: + # return empty list if no record found + return [] + + records = query_job_result.result() + + results = [] + + def fix_labels(row): + return {r['key']: r['value'] for r in row} + + for record in records: + drec = dict(record) + if 'labels' in drec: + drec.update(fix_labels(drec['labels'])) + + results.append(drec) + + # df = query_job_result.to_dataframe() + + # for col in df.columns: + # # convert date to string + # if df.dtypes[col] == 'dbdate': + # df[col] = df[col].astype(str) + + # # modify labels format + # if col == 'labels': + # df[col] = df[col].apply(fix_labels) + + # data = json.loads(df.to_json(orient='records', date_format='iso')) + return results + + # pylint: disable=too-many-locals async def get_total_cost( self, query: BillingTotalCostQueryModel, - ) -> list[BillingTotalCostRecord] | None: + ) -> list[dict] | None: """ Get Total cost of selected fields for requested time interval from BQ view """ @@ -360,86 +484,48 @@ async def get_total_cost( extended_cols = BillingColumn.extended_cols() # by default look at the normal view - if query.source == 'gcp_billing': + if query.source == BillingSource.GCP_BILLING: view_to_use = BQ_GCP_BILLING_VIEW + elif query.source == BillingSource.RAW: + view_to_use = BQ_AGGREG_RAW else: view_to_use = BQ_AGGREG_VIEW - columns = [] + # Get columns to group by and check view to use + grp_columns = [] for field in query.fields: col_name = str(field.value) - if col_name == 'cost': - # skip the cost field as it will be always present + if not BillingColumn.can_group_by(field): + # if the field cannot be grouped by, skip it continue if col_name in extended_cols: # if one of the extended columns is needed, the view has to be extended view_to_use = BQ_AGGREG_EXT_VIEW - columns.append(col_name) + grp_columns.append(col_name) - fields_selected = ','.join(columns) + grp_selected = ','.join(grp_columns) + fields_selected = ','.join( + (field.value for field in query.fields if field != BillingColumn.COST) + ) # prepare grouping by time periods day_parse_formula = '' day_field = '' day_grp = '' - if query.time_periods: + if query.time_periods or query.time_column: # remove existing day column, if added to fields # this is to prevent duplicating various time periods in one query - if BillingColumn.DAY in query.fields: - columns.remove(BillingColumn.DAY) + # if BillingColumn.DAY in query.fields: + # columns.remove(BillingColumn.DAY) day_field, day_grp, day_parse_formula = prepare_time_periods(query) - # construct filters - and_filters = [] - query_parameters = [] - - and_filters.append('day >= TIMESTAMP(@start_date)') - query_parameters.append( - bigquery.ScalarQueryParameter('start_date', 'STRING', query.start_date) - ) - - and_filters.append('day <= TIMESTAMP(@end_date)') - query_parameters.append( - bigquery.ScalarQueryParameter('end_date', 'STRING', query.end_date) + filter_str, query_parameters, view_to_use = self._prepare_filter_str( + query, view_to_use ) - if query.source == 'gcp_billing': - # BQ_GCP_BILLING_VIEW view is partitioned by different field - # BQ has limitation, materialized view can only by partition by base table - # partition or its subset, in our case _PARTITIONTIME - # (part_time field in the view) - # We are querying by day, - # which can be up to a week behind regarding _PARTITIONTIME - and_filters.append('part_time >= TIMESTAMP(@start_date)') - and_filters.append( - 'part_time <= TIMESTAMP_ADD(TIMESTAMP(@end_date), INTERVAL 7 DAY)' - ) - - filters = [] - if query.filters: - for filter_key, filter_value in query.filters.items(): - col_name = str(filter_key.value) - filters.append(f'{col_name} = @{col_name}') - query_parameters.append( - bigquery.ScalarQueryParameter(col_name, 'STRING', filter_value) - ) - if col_name in extended_cols: - # if one of the extended columns is needed, - # the view has to be extended - view_to_use = BQ_AGGREG_EXT_VIEW - - if query.filters_op == 'OR': - if filters: - and_filters.append('(' + ' OR '.join(filters) + ')') - else: - # if not specified, default to AND - and_filters.extend(filters) - - filter_str = 'WHERE ' + ' AND '.join(and_filters) if and_filters else '' - # construct order by order_by_cols = [] if query.order_by: @@ -450,12 +536,21 @@ async def get_total_cost( order_by_str = f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' + group_by = f'GROUP BY {day_grp}{grp_selected}' if query.group_by else '' + cost = 'SUM(cost) as cost' if query.group_by else 'cost' + _query = f""" + CREATE TEMP FUNCTION getLabelValue( + labels ARRAY>, label STRING + ) AS ( + (SELECT value FROM UNNEST(labels) WHERE key = label LIMIT 1) + ); + WITH t AS ( - SELECT {day_field}{fields_selected}, SUM(cost) as cost + SELECT {day_field}{fields_selected}, {cost} FROM `{view_to_use}` {filter_str} - GROUP BY {day_grp}{fields_selected} + {group_by} {order_by_str} ) SELECT {day_parse_formula}{fields_selected}, cost FROM t @@ -476,17 +571,11 @@ async def get_total_cost( job_config = bigquery.QueryJobConfig( query_parameters=query_parameters, labels=BQ_LABELS ) - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() + query_job_result = self._connection.connection.query( + _query, job_config=job_config ) - if query_job_result: - return [ - BillingTotalCostRecord.from_json(dict(row)) for row in query_job_result - ] - - # return empty list if no record found - return [] + return self.convert_output(query_job_result) async def get_budgets_by_gcp_project( self, field: BillingColumn, is_current_month: bool @@ -494,7 +583,7 @@ async def get_budgets_by_gcp_project( """ Get budget for gcp-projects """ - if field != BillingColumn.PROJECT or not is_current_month: + if field != BillingColumn.GCP_PROJECT or not is_current_month: # only projects have budget and only for current month return {} @@ -842,7 +931,7 @@ async def get_running_cost( # accept only Topic, Dataset or Project at this stage if field not in ( BillingColumn.TOPIC, - BillingColumn.PROJECT, + BillingColumn.GCP_PROJECT, BillingColumn.DATASET, BillingColumn.STAGE, BillingColumn.COMPUTE_CATEGORY, @@ -928,3 +1017,67 @@ async def get_running_cost( ) return results + + async def get_batches_by_ar_guid( + self, ar_guid: str + ) -> tuple[datetime, datetime, list[str]]: + """ + Get batches for given ar_guid + """ + _query = f""" + SELECT + batch_id, + MIN(min_day) as start_day, + MAX(max_day) as end_day + FROM `{BQ_BATCHES_VIEW}` + WHERE ar_guid = @ar_guid + AND batch_id IS NOT NULL + GROUP BY batch_id + ORDER BY 1; + """ + + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter('ar_guid', 'STRING', ar_guid), + ], + labels=BQ_LABELS, + ) + + query_job_result = list( + self._connection.connection.query(_query, job_config=job_config).result() + ) + if query_job_result: + start_day = min((row.start_day for row in query_job_result)) + end_day = max((row.end_day for row in query_job_result)) + timedelta(days=1) + return start_day, end_day, [row.batch_id for row in query_job_result] + + # return empty list if no record found + return None, None, [] + + async def get_ar_guid_by_batch_id(self, batch_id: str) -> str: + """ + Get ar_guid for given batch_id + """ + _query = f""" + SELECT ar_guid + FROM `{BQ_BATCHES_VIEW}` + WHERE batch_id = @batch_id + AND ar_guid IS NOT NULL + LIMIT 1; + """ + + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter('batch_id', 'STRING', batch_id), + ], + labels=BQ_LABELS, + ) + + query_job_result = list( + self._connection.connection.query(_query, job_config=job_config).result() + ) + if query_job_result: + return query_job_result[0]['ar_guid'] + + # return None if no ar_guid found + return None diff --git a/db/python/layers/billing_layer.py b/db/python/layers/billing_layer.py index 632b345ea..a050611e1 100644 --- a/db/python/layers/billing_layer.py +++ b/db/python/layers/billing_layer.py @@ -6,7 +6,12 @@ BillingCostBudgetRecord, BillingRowRecord, BillingTotalCostQueryModel, - BillingTotalCostRecord, +) +from models.models.billing import ( + BillingHailBatchCostRecord, + BillingSource, + BillingTimeColumn, + BillingTimePeriods, ) @@ -146,7 +151,7 @@ async def query( async def get_total_cost( self, query: BillingTotalCostQueryModel, - ) -> list[BillingTotalCostRecord] | None: + ) -> list[dict] | None: """ Get Total cost of selected fields for requested time interval """ @@ -164,3 +169,92 @@ async def get_running_cost( """ billing_db = BillingDb(self.connection) return await billing_db.get_running_cost(field, invoice_month, source) + + async def get_cost_by_ar_guid( + self, + ar_guid: str | None = None, + ) -> BillingHailBatchCostRecord: + """ + Get Costs by AR GUID + """ + billing_db = BillingDb(self.connection) + + # First get all batches and the min/max day to use for the query + start_day, end_day, batches = await billing_db.get_batches_by_ar_guid(ar_guid) + + if not batches: + return BillingHailBatchCostRecord( + ar_guid=ar_guid, + batch_ids=[], + costs=[], + ) + + # Then get the costs for the given AR GUID/batches from the main table + all_cols = [BillingColumn.str_to_enum(v) for v in BillingColumn.raw_cols()] + + query = BillingTotalCostQueryModel( + fields=all_cols, + source=BillingSource.RAW, + start_date=start_day.strftime('%Y-%m-%d'), + end_date=end_day.strftime('%Y-%m-%d'), + filters={ + BillingColumn.LABELS: { + 'batch_id': batches, + 'ar-guid': ar_guid, + } + }, + filters_op='OR', + group_by=False, + time_column=BillingTimeColumn.USAGE_END_TIME, + time_periods=BillingTimePeriods.DAY, + ) + records = await billing_db.get_total_cost(query) + return BillingHailBatchCostRecord( + ar_guid=ar_guid, + batch_ids=batches, + costs=records, + ) + + async def get_cost_by_batch_id( + self, + batch_id: str | None = None, + ) -> BillingHailBatchCostRecord: + """ + Get Costs by Batch ID + """ + billing_db = BillingDb(self.connection) + + # First get all batches and the min/max day to use for the query + ar_guid = await billing_db.get_ar_guid_by_batch_id(batch_id) + + # The get all batches for the ar_guid + start_day, end_day, batches = await billing_db.get_batches_by_ar_guid(ar_guid) + + if not batches: + return BillingHailBatchCostRecord(ar_guid=ar_guid, batch_ids=[], costs=[]) + + # Then get the costs for the given AR GUID/batches from the main table + all_cols = [BillingColumn.str_to_enum(v) for v in BillingColumn.raw_cols()] + + query = BillingTotalCostQueryModel( + fields=all_cols, + source=BillingSource.RAW, + start_date=start_day.strftime('%Y-%m-%d'), + end_date=end_day.strftime('%Y-%m-%d'), + filters={ + BillingColumn.LABELS: { + 'batch_id': batches, + 'ar-guid': ar_guid, + } + }, + filters_op='OR', + group_by=False, + time_column=BillingTimeColumn.USAGE_END_TIME, + time_periods=BillingTimePeriods.DAY, + ) + records = await billing_db.get_total_cost(query) + return BillingHailBatchCostRecord( + ar_guid=ar_guid, + batch_ids=batches, + costs=records, + ) diff --git a/models/models/billing.py b/models/models/billing.py index 861317529..e011f8117 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -117,18 +117,51 @@ def from_json(record): ) +class BillingSource(str, Enum): + """List of billing sources""" + + RAW = 'raw' + AGGREGATE = 'aggregate' + EXTENDED = 'extended' + BUDGET = 'budget' + GCP_BILLING = 'gcp_billing' + BATCHES = 'batches' + + class BillingColumn(str, Enum): """List of billing columns""" - # base view columns + # raw view columns + ID = 'id' TOPIC = 'topic' - PROJECT = 'gcp_project' + SERVICE = 'service' + SKU = 'sku' + USAGE_START_TIME = 'usage_start_time' + USAGE_END_TIME = 'usage_end_time' + PROJECT = 'project' + LABELS = 'labels' + SYSTEM_LABELS = 'system_labels' + LOCATION = 'location' + EXPORT_TIME = 'export_time' + COST = 'cost' + CURRENCY = 'currency' + CURRENCY_CONVERSION_RATE = 'currency_conversion_rate' + USAGE = 'usage' + CREDITS = 'credits' + INVOICE = 'invoice' + COST_TYPE = 'cost_type' + ADJUSTMENT_INFO = 'adjustment_info' + + # base view columns + # TOPIC = 'topic' + # SKU = 'sku' + # CURRENCY = 'currency' + # COST = 'cost' + # LABELS = 'labels' + GCP_PROJECT = 'gcp_project' DAY = 'day' COST_CATEGORY = 'cost_category' - SKU = 'sku' AR_GUID = 'ar_guid' - CURRENCY = 'currency' - COST = 'cost' INVOICE_MONTH = 'invoice_month' # extended, filtered view columns @@ -144,6 +177,74 @@ class BillingColumn(str, Enum): WDL_TASK_NAME = 'wdl_task_name' NAMESPACE = 'namespace' + @classmethod + def can_group_by(cls, value: 'BillingColumn') -> bool: + """ + Return True if column can be grouped by + TODO: If any new columns are added above and cannot be in a group by, add them here + This could be record, array or struct type + """ + return value not in ( + BillingColumn.COST, + BillingColumn.SERVICE, + # BillingColumn.SKU, + BillingColumn.PROJECT, + BillingColumn.LABELS, + BillingColumn.SYSTEM_LABELS, + BillingColumn.LOCATION, + BillingColumn.USAGE, + BillingColumn.CREDITS, + BillingColumn.INVOICE, + BillingColumn.ADJUSTMENT_INFO, + ) + + @classmethod + def str_to_enum(cls, value: str) -> 'BillingColumn': + """Convert string to enum""" + str_to_enum = {v.value: v for k, v in BillingColumn.__members__.items()} + return str_to_enum[value] + + @classmethod + def raw_cols(cls) -> list[str]: + """Return list of raw column names""" + return [ + 'id', + 'topic', + 'service', + 'sku', + 'usage_start_time', + 'usage_end_time', + 'project', + 'labels', + 'system_labels', + 'location', + 'export_time', + 'cost', + 'currency', + 'currency_conversion_rate', + 'usage', + 'credits', + 'invoice', + 'cost_type', + 'adjustment_info', + ] + + @classmethod + def standard_cols(cls) -> list[str]: + """Return list of standard column names""" + return [ + 'topic', + 'gcp_project', + 'sku', + 'currency', + 'cost', + 'labels', + 'day', + 'cost_category', + 'ar_guid', + 'invoice_month', + ] + @classmethod def extended_cols(cls) -> list[str]: """Return list of extended column names""" @@ -165,7 +266,7 @@ def extended_cols(cls) -> list[str]: @staticmethod def generate_all_title(record) -> str: """Generate Column as All Title""" - if record == BillingColumn.PROJECT: + if record == BillingColumn.GCP_PROJECT: return 'All GCP Projects' return f'All {record.title()}s' @@ -181,6 +282,15 @@ class BillingTimePeriods(str, Enum): INVOICE_MONTH = 'invoice_month' +class BillingTimeColumn(str, Enum): + """List of billing time columns""" + + DAY = 'day' + USAGE_START_TIME = 'usage_start_time' + USAGE_END_TIME = 'usage_end_time' + EXPORT_TIME = 'export_time' + + class BillingTotalCostQueryModel(SMBase): """ Used to query for billing total cost @@ -191,13 +301,14 @@ class BillingTotalCostQueryModel(SMBase): fields: list[BillingColumn] start_date: str end_date: str - # optional, can be aggregate or gcp_billing - source: str | None = None + # optional, can be raw, aggregate or gcp_billing + source: BillingSource | None = None # optional - filters: dict[BillingColumn, str] | None = None + filters: dict[BillingColumn, str | list | dict] | None = None # optional, AND or OR filters_op: str | None = None + group_by: bool = True # order by, reverse= TRUE for DESC, FALSE for ASC order_by: dict[BillingColumn, bool] | None = None @@ -205,6 +316,7 @@ class BillingTotalCostQueryModel(SMBase): offset: int | None = None # default to day, can be day, week, month, invoice_month + time_column: BillingTimeColumn | None = None time_periods: BillingTimePeriods | None = None def __hash__(self): @@ -219,9 +331,10 @@ class BillingTotalCostRecord(SMBase): topic: str | None gcp_project: str | None cost_category: str | None - sku: str | None + sku: str | dict | None invoice_month: str | None ar_guid: str | None + # extended columns dataset: str | None batch_id: str | None @@ -319,3 +432,11 @@ def from_json(record): budget=record.get('budget'), last_loaded_day=record.get('last_loaded_day'), ) + + +class BillingHailBatchCostRecord(SMBase): + """Return class for the Billing Cost by batch_id/ar_guid""" + + ar_guid: str | None + batch_ids: list[str] | None + costs: list[dict] | None diff --git a/requirements.txt b/requirements.txt index 1f9640ea2..131aa6942 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,5 @@ SQLAlchemy==1.4.41 cryptography>=41.0.0 python-dateutil==2.8.2 slack-sdk==3.20.2 +pandas==2.1.4 +db-dtypes==1.2.0 diff --git a/web/src/Routes.tsx b/web/src/Routes.tsx index 2e5bfffcb..3570c33ef 100644 --- a/web/src/Routes.tsx +++ b/web/src/Routes.tsx @@ -7,6 +7,7 @@ import { BillingSeqrProp, BillingCostByTime, BillingCostByBatch, + BillingCostByAnalysis, BillingInvoiceMonthCost, BillingCostByCategory, } from './pages/billing' @@ -39,7 +40,6 @@ const Routes: React.FunctionComponent = () => ( } /> - ( } /> + + + + } + /> { + const [searchParams] = useSearchParams() + + const now = new Date() + + // Data loading + const [isLoading, setIsLoading] = React.useState(true) + const [error, setError] = React.useState() + + const [start, setStart] = React.useState( + searchParams.get('start') ?? + `${now.getFullYear()}-${now.getMonth().toString().padStart(2, '0')}-01` + ) + + const [data, setData] = React.useState([]) + + const [searchTxt, setSearchTxt] = React.useState( + searchParams.get('searchTxt') ?? undefined + ) + + enum SearchType { + Ar_guid, + Batch_id, + } + const searchOptions: string[] = Object.keys(SearchType).filter((item) => isNaN(Number(item))) + const dropdownOptions = searchOptions.map((item) => ({ + text: item.replaceAll('_', ' '), + value: item, + })) + + const [searchByType, setSearchByType] = React.useState( + SearchType[searchParams.get('searchType')] ?? undefined + ) + + // use navigate and update url params + const location = useLocation() + const navigate = useNavigate() + + const updateNav = (sType: SearchType | undefined, sTxt: string | undefined) => { + let url = `${location.pathname}?` + const params: string[] = [] + if (sType !== undefined) params.push(`searchType=${SearchType[sType]}`) + if (sTxt !== undefined) params.push(`searchTxt=${sTxt}`) + + url += params.join('&') + navigate(url) + } + + const getData = (sType: SearchType, sTxt: string) => { + if ((sType === undefined || sTxt === undefined) && sTxt.length < 6) { + // Seaarch text is not large enough + setIsLoading(false) + return + } + setIsLoading(true) + setError(undefined) + + if (sType === SearchType.Ar_guid) { + new BillingApi() + .costByArGuid(sTxt) + .then((response) => { + setIsLoading(false) + setData(response.data) + }) + .catch((er) => setError(er.message)) + } else if (sType === SearchType.Batch_id) { + new BillingApi() + .costByBatchId(sTxt) + .then((response) => { + setIsLoading(false) + setData(response.data) + }) + .catch((er) => setError(er.message)) + } else { + setIsLoading(false) + } + } + + const handleSearch = () => { + if (searchByType === undefined || searchTxt === undefined || searchTxt.length < 6) { + // Seaarch text is not large enough + setIsLoading(false) + return + } + getData(searchByType, searchTxt) + } + + const handleSearchChange = (event: any, dt: any) => { + setSearchTxt(dt.value) + } + + const handleSearchTypeChange = (event: any, dt: any) => { + setSearchByType(SearchType[dt.value]) + } + + const handleSearchKeyPress = (event: any) => { + updateNav(searchByType, searchTxt) + handleSearch() + } + + React.useEffect(() => { + handleSearch() + }, []) + + const searchCard = () => ( + +

+ Billing Cost By Analysis +

+ + + + + + - - - - - - -
-

- {' '} - E.g. -
- ar_guid: 855a6153-033c-4398-8000-46ed74c02fe8 -
- batch_id: 429518 -
- sequencing_group: cpg246751 -
- cromwell_workflow_id: cromwell-e252f430-4143-47ec-a9c0-5f7face1b296 -
-

-
- ) - } - - if (error) { - return ( - setError(undefined)}> - {error} -
- -
- ) - } - - if (isLoading) { - return ( -
- {searchCard()} - -

- This query takes a while... -

-
- ) - } - - if (data.length === 0) { - return ( -
- {searchCard()} -

- No data found. -

-
- ) - } - - return ( - <> - {searchCard()} - -
{extractHeader(data[0])}
- - - - - Cost Category - - - SKU - - - Cost - - - - - {data.map((k) => ( - - {k.cost_category} - {k.sku} - - {currencyFormat(k.cost)} - - - ))} - -
-
- - ) -} - -export default BillingCostByBatch diff --git a/web/src/pages/billing/components/HailBatchGrid.tsx b/web/src/pages/billing/components/HailBatchGrid.tsx index 0f72af235..94933e7fe 100644 --- a/web/src/pages/billing/components/HailBatchGrid.tsx +++ b/web/src/pages/billing/components/HailBatchGrid.tsx @@ -418,7 +418,7 @@ const HailBatchGrid: React.FunctionComponent<{ {title} - {v} + {v}
) })} @@ -430,7 +430,7 @@ const HailBatchGrid: React.FunctionComponent<{ key={`${log.key}-lbl`} > - + COST BREAKDOWN
@@ -447,7 +447,7 @@ const HailBatchGrid: React.FunctionComponent<{ key={`${log.key}-${dk.batch_resource}`} > - + {dk.batch_resource} ${dk.cost.toFixed(4)} diff --git a/web/src/pages/billing/index.ts b/web/src/pages/billing/index.ts index 8924e7d73..037d0b871 100644 --- a/web/src/pages/billing/index.ts +++ b/web/src/pages/billing/index.ts @@ -1,7 +1,6 @@ export { default as BillingHome } from "./BillingHome"; export { default as BillingSeqrProp } from "./BillingSeqrProp"; export { default as BillingCostByTime } from "./BillingCostByTime"; -export { default as BillingCostByBatch } from "./BillingCostByBatch"; export { default as BillingCostByAnalysis } from "./BillingCostByAnalysis"; export { default as BillingCostByCategory } from "./BillingCostByCategory"; export { default as BillingInvoiceMonthCost } from "./BillingInvoiceMonthCost"; diff --git a/web/src/shared/components/Header/NavBar.tsx b/web/src/shared/components/Header/NavBar.tsx index 0ffd6ff21..8ee0f52b4 100644 --- a/web/src/shared/components/Header/NavBar.tsx +++ b/web/src/shared/components/Header/NavBar.tsx @@ -42,11 +42,6 @@ const billingPages = { url: '/billing/costByTime', icon: , }, - { - title: 'Cost By Batch', - url: '/billing/costByBatch', - icon: , - }, { title: 'Cost By Analysis', url: '/billing/costByAnalysis', From d9c69ff743cc313e1d1baeee6fa5865915b1fa36 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Tue, 2 Jan 2024 14:32:54 +1100 Subject: [PATCH 10/34] Remove unused API point & cleanup, changes as per code review. --- api/routes/billing.py | 90 ++++------- db/python/layers/billing_db.py | 130 ++++------------ db/python/layers/billing_layer.py | 13 -- db/python/tables/billing.py | 18 --- models/enums/billing.py | 31 ++++ models/models/__init__.py | 1 - models/models/billing.py | 144 +----------------- .../pages/billing/BillingCostByAnalysis.tsx | 39 +++-- .../Graphs/StackedAreaByDateChart.tsx | 2 +- 9 files changed, 114 insertions(+), 354 deletions(-) delete mode 100644 db/python/tables/billing.py create mode 100644 models/enums/billing.py diff --git a/api/routes/billing.py b/api/routes/billing.py index 3c409dcb8..7482ee77a 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -11,8 +11,6 @@ BillingColumn, BillingCostBudgetRecord, BillingHailBatchCostRecord, - BillingQueryModel, - BillingRowRecord, BillingTotalCostQueryModel, BillingTotalCostRecord, ) @@ -20,6 +18,18 @@ router = APIRouter(prefix='/billing', tags=['billing']) +def _get_billing_layer_from(author: str) -> BillingLayer: + """ + Initialise billing + """ + if not is_billing_enabled(): + raise ValueError('Billing is not enabled') + + connection = BqConnection(author) + billing_layer = BillingLayer(connection) + return billing_layer + + @router.get( '/is-billing-enabled', response_model=bool, @@ -32,18 +42,6 @@ def is_billing_enabled() -> bool: return BQ_AGGREG_VIEW is not None -def initialise_billing_layer(author: str) -> BillingLayer: - """ - Initialise billing - """ - if not is_billing_enabled(): - raise ValueError('Billing is not enabled') - - connection = BqConnection(author) - billing_layer = BillingLayer(connection) - return billing_layer - - @router.get( '/gcp-projects', response_model=list[str], @@ -54,7 +52,7 @@ async def get_gcp_projects( author: str = get_author, ) -> list[str]: """Get list of all GCP projects in database""" - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_gcp_projects() return records @@ -69,7 +67,7 @@ async def get_topics( author: str = get_author, ) -> list[str]: """Get list of all topics in database""" - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_topics() return records @@ -84,7 +82,7 @@ async def get_cost_categories( author: str = get_author, ) -> list[str]: """Get list of all service description / cost categories in database""" - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_cost_categories() return records @@ -105,7 +103,7 @@ async def get_skus( There is over 400 Skus so limit is required Results are sorted ASC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_skus(limit, offset) return records @@ -123,7 +121,7 @@ async def get_datasets( Get list of all datasets in database Results are sorted ASC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_datasets() return records @@ -141,7 +139,7 @@ async def get_sequencing_types( Get list of all sequencing_types in database Results are sorted ASC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_sequencing_types() return records @@ -159,7 +157,7 @@ async def get_stages( Get list of all stages in database Results are sorted ASC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_stages() return records @@ -177,7 +175,7 @@ async def get_sequencing_groups( Get list of all sequencing_groups in database Results are sorted ASC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_sequencing_groups() return records @@ -195,7 +193,7 @@ async def get_compute_categories( Get list of all compute categories in database Results are sorted ASC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_compute_categories() return records @@ -213,7 +211,7 @@ async def get_cromwell_sub_workflow_names( Get list of all cromwell_sub_workflow_names in database Results are sorted ASC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_cromwell_sub_workflow_names() return records @@ -231,7 +229,7 @@ async def get_wdl_task_names( Get list of all wdl_task_names in database Results are sorted ASC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_wdl_task_names() return records @@ -249,7 +247,7 @@ async def get_invoice_months( Get list of all invoice months in database Results are sorted DESC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_invoice_months() return records @@ -267,37 +265,11 @@ async def get_namespaces( Get list of all namespaces in database Results are sorted DESC """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_namespaces() return records -@router.post( - '/query', response_model=list[BillingRowRecord], operation_id='queryBilling' -) -@alru_cache(maxsize=10, ttl=BILLING_CACHE_RESPONSE_TTL) -async def query_billing( - query: BillingQueryModel, - limit: int = 10, - author: str = get_author, -) -> list[BillingRowRecord]: - """ - Get Billing records by some criteria, date is required to minimize BQ cost - - E.g. - - { - "topic": ["hail"], - "date": "2023-03-02", - "cost_category": ["Hail compute Credit"] - } - - """ - billing_layer = initialise_billing_layer(author) - records = await billing_layer.query(query.to_filter(), limit) - return records - - @router.get( '/cost-by-ar-guid/{ar_guid}', response_model=BillingHailBatchCostRecord, @@ -305,11 +277,11 @@ async def query_billing( ) @alru_cache(maxsize=10, ttl=BILLING_CACHE_RESPONSE_TTL) async def get_cost_by_ar_guid( + ar_guid: str, author: str = get_author, - ar_guid: str = None, ) -> BillingHailBatchCostRecord: """Get Hail Batch costs by AR GUID""" - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_cost_by_ar_guid(ar_guid) return records @@ -321,11 +293,11 @@ async def get_cost_by_ar_guid( ) @alru_cache(maxsize=10, ttl=BILLING_CACHE_RESPONSE_TTL) async def get_cost_by_batch_id( + batch_id: str, author: str = get_author, - batch_id: str = None, ) -> BillingHailBatchCostRecord: """Get Hail Batch costs by Batch ID""" - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_cost_by_batch_id(batch_id) return records @@ -534,7 +506,7 @@ async def get_total_cost( } """ - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_total_cost(query) return [BillingTotalCostRecord.from_json(record) for record in records] @@ -560,6 +532,6 @@ async def get_running_costs( # pip install async-cache # @AsyncTTL(time_to_live=BILLING_CACHE_RESPONSE_TTL, maxsize=1024, skip_args=2) - billing_layer = initialise_billing_layer(author) + billing_layer = _get_billing_layer_from(author) records = await billing_layer.get_running_cost(field, invoice_month, source) return records diff --git a/db/python/layers/billing_db.py b/db/python/layers/billing_db.py index f8cbbb18e..2417dfc94 100644 --- a/db/python/layers/billing_db.py +++ b/db/python/layers/billing_db.py @@ -1,6 +1,6 @@ # pylint: disable=too-many-lines import re -from collections import Counter, defaultdict +from collections import Counter, defaultdict, namedtuple from datetime import datetime, timedelta from typing import Any @@ -17,18 +17,21 @@ ) from api.utils.dates import get_invoice_month_range, reformat_datetime from db.python.gcp_connect import BqDbBase -from db.python.tables.billing import BillingFilter from models.models import ( BillingColumn, BillingCostBudgetRecord, - BillingRowRecord, BillingTimePeriods, BillingTotalCostQueryModel, ) from models.models.billing import BillingSource +# Label added to each Billing Big Query request, +# so we can track the cost of metamist-api BQ usage BQ_LABELS = {'source': 'metamist-api'} +# Day Time details used in grouping and parsing formulas +TimeGroupingDetails = namedtuple('TimeGroupingDetails', ['field', 'formula']) + def abbrev_cost_category(cost_category: str) -> str: """abbreviate cost category""" @@ -37,28 +40,33 @@ def abbrev_cost_category(cost_category: str) -> str: def prepare_time_periods( query: BillingTotalCostQueryModel, -) -> tuple[str, str, str]: +) -> TimeGroupingDetails: """Prepare Time periods grouping and parsing formulas""" time_column = query.time_column or 'day' - day_parse_formula = '' - day_field = '' - day_grp = 'day, ' + result = TimeGroupingDetails('', '') # Based on specified time period, add the corresponding column if query.time_periods == BillingTimePeriods.DAY: - day_field = f'FORMAT_DATE("%Y-%m-%d", {time_column}) as day, ' - day_parse_formula = f'PARSE_DATE("%Y-%m-%d", day) as day, ' + result = TimeGroupingDetails( + field=f'FORMAT_DATE("%Y-%m-%d", {time_column}) as day, ', + formula='PARSE_DATE("%Y-%m-%d", day) as day, ', + ) elif query.time_periods == BillingTimePeriods.WEEK: - day_field = f'FORMAT_DATE("%Y%W", {time_column}) as day, ' - day_parse_formula = f'PARSE_DATE("%Y%W", day) as day, ' + result = TimeGroupingDetails( + field=f'FORMAT_DATE("%Y%W", {time_column}) as day, ', + formula='PARSE_DATE("%Y%W", day) as day, ', + ) elif query.time_periods == BillingTimePeriods.MONTH: - day_field = f'FORMAT_DATE("%Y%m", {time_column}) as day, ' - day_parse_formula = f'PARSE_DATE("%Y%m", day) as day, ' + result = TimeGroupingDetails( + field=f'FORMAT_DATE("%Y%m", {time_column}) as day, ', + formula='PARSE_DATE("%Y%m", day) as day, ', + ) elif query.time_periods == BillingTimePeriods.INVOICE_MONTH: - day_field = 'invoice_month as day, ' - day_parse_formula = f'PARSE_DATE("%Y%m", day) as day, ' + result = TimeGroupingDetails( + field='invoice_month as day, ', formula='PARSE_DATE("%Y%m", day) as day, ' + ) - return day_field, day_grp, day_parse_formula + return result class BillingDb(BqDbBase): @@ -113,8 +121,6 @@ async def get_topics(self): WHERE day > TIMESTAMP_ADD( CURRENT_TIMESTAMP(), INTERVAL @days DAY ) - -- TODO put this back when reloading is fixed - AND NOT topic IN ('seqr', 'hail') ORDER BY topic ASC; """ @@ -281,77 +287,6 @@ async def get_extended_values(self, field: str): # return empty list if no record found return [] - async def query( - self, - filter_: BillingFilter, - limit: int = 10, - ) -> list[BillingRowRecord] | None: - """Get Billing record from BQ""" - - # TODO: This function is not going to be used most likely - # get_total_cost will replace it - - # cost of this BQ is 30MB on DEV, - # DEV is partition by day and date is required filter params, - # cost is aprox per query: AU$ 0.000023 per query - - required_fields = [ - filter_.date, - ] - - if not any(required_fields): - raise ValueError('Must provide date to filter on') - - # construct filters - filters = [] - query_parameters = [] - - if filter_.topic: - filters.append('topic IN UNNEST(@topic)') - query_parameters.append( - bigquery.ArrayQueryParameter('topic', 'STRING', filter_.topic.in_), - ) - - if filter_.date: - filters.append('DATE_TRUNC(usage_end_time, DAY) = TIMESTAMP(@date)') - query_parameters.append( - bigquery.ScalarQueryParameter('date', 'STRING', filter_.date.eq), - ) - - if filter_.cost_category: - filters.append('service.description IN UNNEST(@cost_category)') - query_parameters.append( - bigquery.ArrayQueryParameter( - 'cost_category', 'STRING', filter_.cost_category.in_ - ), - ) - - filter_str = 'WHERE ' + ' AND '.join(filters) if filters else '' - - _query = f""" - SELECT id, topic, service, sku, usage_start_time, usage_end_time, project, - labels, export_time, cost, currency, currency_conversion_rate, invoice, cost_type - FROM `{BQ_AGGREG_RAW}` - {filter_str} - """ - if limit: - _query += ' LIMIT @limit_val' - query_parameters.append( - bigquery.ScalarQueryParameter('limit_val', 'INT64', limit) - ) - - job_config = bigquery.QueryJobConfig( - query_parameters=query_parameters, labels=BQ_LABELS - ) - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - - if query_job_result: - return [BillingRowRecord.from_json(dict(row)) for row in query_job_result] - - raise ValueError('No record found') - def _prepare_filter_str(self, query: BillingTotalCostQueryModel, view_to_use: str): """Prepare filter string""" and_filters = [] @@ -446,13 +381,13 @@ def convert_output(self, query_job_result): results = [] - def fix_labels(row): + def transform_labels(row): return {r['key']: r['value'] for r in row} for record in records: drec = dict(record) if 'labels' in drec: - drec.update(fix_labels(drec['labels'])) + drec.update(transform_labels(drec['labels'])) results.append(drec) @@ -511,16 +446,13 @@ async def get_total_cost( ) # prepare grouping by time periods - day_parse_formula = '' - day_field = '' - day_grp = '' + time_group = TimeGroupingDetails('', '') if query.time_periods or query.time_column: # remove existing day column, if added to fields # this is to prevent duplicating various time periods in one query # if BillingColumn.DAY in query.fields: # columns.remove(BillingColumn.DAY) - - day_field, day_grp, day_parse_formula = prepare_time_periods(query) + time_group = prepare_time_periods(query) filter_str, query_parameters, view_to_use = self._prepare_filter_str( query, view_to_use @@ -536,7 +468,7 @@ async def get_total_cost( order_by_str = f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' - group_by = f'GROUP BY {day_grp}{grp_selected}' if query.group_by else '' + group_by = f'GROUP BY day, {grp_selected}' if query.group_by else '' cost = 'SUM(cost) as cost' if query.group_by else 'cost' _query = f""" @@ -547,13 +479,13 @@ async def get_total_cost( ); WITH t AS ( - SELECT {day_field}{fields_selected}, {cost} + SELECT {time_group.field}{fields_selected}, {cost} FROM `{view_to_use}` {filter_str} {group_by} {order_by_str} ) - SELECT {day_parse_formula}{fields_selected}, cost FROM t + SELECT {time_group.formula}{fields_selected}, cost FROM t """ # append LIMIT and OFFSET if present diff --git a/db/python/layers/billing_layer.py b/db/python/layers/billing_layer.py index a050611e1..506b98ef6 100644 --- a/db/python/layers/billing_layer.py +++ b/db/python/layers/billing_layer.py @@ -1,10 +1,8 @@ from db.python.layers.billing_db import BillingDb from db.python.layers.bq_base import BqBaseLayer -from db.python.tables.billing import BillingFilter from models.models import ( BillingColumn, BillingCostBudgetRecord, - BillingRowRecord, BillingTotalCostQueryModel, ) from models.models.billing import ( @@ -137,17 +135,6 @@ async def get_namespaces( billing_db = BillingDb(self.connection) return await billing_db.get_extended_values('namespace') - async def query( - self, - _filter: BillingFilter, - limit: int = 10, - ) -> list[BillingRowRecord] | None: - """ - Get Billing record for the given gilter - """ - billing_db = BillingDb(self.connection) - return await billing_db.query(_filter, limit) - async def get_total_cost( self, query: BillingTotalCostQueryModel, diff --git a/db/python/tables/billing.py b/db/python/tables/billing.py deleted file mode 100644 index 54402a85c..000000000 --- a/db/python/tables/billing.py +++ /dev/null @@ -1,18 +0,0 @@ -import dataclasses - -from db.python.utils import ( - GenericFilter, - GenericFilterModel, -) - - -@dataclasses.dataclass -class BillingFilter(GenericFilterModel): - """Filter for billing""" - - topic: GenericFilter[str] = None - date: GenericFilter[str] = None - cost_category: GenericFilter[str] = None - - def __hash__(self): # pylint: disable=useless-parent-delegation - return super().__hash__() diff --git a/models/enums/billing.py b/models/enums/billing.py new file mode 100644 index 000000000..efcff271c --- /dev/null +++ b/models/enums/billing.py @@ -0,0 +1,31 @@ +from enum import Enum + + +class BillingSource(str, Enum): + """List of billing sources""" + + RAW = 'raw' + AGGREGATE = 'aggregate' + EXTENDED = 'extended' + BUDGET = 'budget' + GCP_BILLING = 'gcp_billing' + BATCHES = 'batches' + + +class BillingTimePeriods(str, Enum): + """List of billing grouping time periods""" + + # grouping time periods + DAY = 'day' + WEEK = 'week' + MONTH = 'month' + INVOICE_MONTH = 'invoice_month' + + +class BillingTimeColumn(str, Enum): + """List of billing time columns""" + + DAY = 'day' + USAGE_START_TIME = 'usage_start_time' + USAGE_END_TIME = 'usage_end_time' + EXPORT_TIME = 'export_time' diff --git a/models/models/__init__.py b/models/models/__init__.py index 5754b4239..c30ea3199 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -13,7 +13,6 @@ BillingColumn, BillingCostBudgetRecord, BillingCostDetailsRecord, - BillingRowRecord, BillingTimePeriods, BillingTotalCostQueryModel, BillingTotalCostRecord, diff --git a/models/models/billing.py b/models/models/billing.py index e011f8117..a918eb605 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -1,131 +1,8 @@ import datetime from enum import Enum -from db.python.tables.billing import BillingFilter -from db.python.utils import GenericFilter from models.base import SMBase - - -class BillingQueryModel(SMBase): - """Used to query for billing""" - - # topic is cluster index, provide some values to make it more efficient - topic: list[str] | None = None - - # make date required, to avoid full table scan - date: str - - cost_category: list[str] | None = None - - def to_filter(self) -> BillingFilter: - """Convert to internal analysis filter""" - return BillingFilter( - topic=GenericFilter(in_=self.topic) if self.topic else None, - date=GenericFilter(eq=self.date), - cost_category=GenericFilter(in_=self.cost_category) - if self.cost_category - else None, - ) - - def __hash__(self): - """Create hash for this object to use in caching""" - return hash(self.json()) - - -class BillingRowRecord(SMBase): - """Return class for the Billing record""" - - id: str - topic: str | None - service_id: str | None - service_description: str | None - - sku_id: str | None - sku_description: str | None - - usage_start_time: datetime.datetime | None - usage_end_time: datetime.datetime | None - - gcp_project_id: str | None - gcp_project_number: str | None - gcp_project_name: str | None - - # labels - dataset: str | None - batch_id: str | None - job_id: str | None - batch_name: str | None - sequencing_type: str | None - stage: str | None - sequencing_group: str | None - - export_time: datetime.datetime | None - cost: str | None - currency: str | None - currency_conversion_rate: str | None - invoice_month: str | None - cost_type: str | None - - class Config: - """Config for BillingRowRecord Response""" - - orm_mode = True - - @staticmethod - def from_json(record): - """Create BillingRowRecord from json""" - - record['service'] = record['service'] if record['service'] else {} - record['project'] = record['project'] if record['project'] else {} - record['invoice'] = record['invoice'] if record['invoice'] else {} - record['sku'] = record['sku'] if record['sku'] else {} - - labels = {} - - if record['labels']: - for lbl in record['labels']: - labels[lbl['key']] = lbl['value'] - - record['labels'] = labels - - return BillingRowRecord( - id=record['id'], - topic=record['topic'], - service_id=record['service'].get('id'), - service_description=record['service'].get('description'), - sku_id=record['sku'].get('id'), - sku_description=record['sku'].get('description'), - usage_start_time=record['usage_start_time'], - usage_end_time=record['usage_end_time'], - gcp_project_id=record['project'].get('id'), - gcp_project_number=record['project'].get('number'), - gcp_project_name=record['project'].get('name'), - # labels - dataset=record['labels'].get('dataset'), - batch_id=record['labels'].get('batch_id'), - job_id=record['labels'].get('job_id'), - batch_name=record['labels'].get('batch_name'), - sequencing_type=record['labels'].get('sequencing_type'), - stage=record['labels'].get('stage'), - sequencing_group=record['labels'].get('sequencing_group'), - export_time=record['export_time'], - cost=record['cost'], - currency=record['currency'], - currency_conversion_rate=record['currency_conversion_rate'], - invoice_month=record['invoice'].get('month', ''), - cost_type=record['cost_type'], - ) - - -class BillingSource(str, Enum): - """List of billing sources""" - - RAW = 'raw' - AGGREGATE = 'aggregate' - EXTENDED = 'extended' - BUDGET = 'budget' - GCP_BILLING = 'gcp_billing' - BATCHES = 'batches' +from models.enums.billing import BillingSource, BillingTimeColumn, BillingTimePeriods class BillingColumn(str, Enum): @@ -272,25 +149,6 @@ def generate_all_title(record) -> str: return f'All {record.title()}s' -class BillingTimePeriods(str, Enum): - """List of billing grouping time periods""" - - # grouping time periods - DAY = 'day' - WEEK = 'week' - MONTH = 'month' - INVOICE_MONTH = 'invoice_month' - - -class BillingTimeColumn(str, Enum): - """List of billing time columns""" - - DAY = 'day' - USAGE_START_TIME = 'usage_start_time' - USAGE_END_TIME = 'usage_end_time' - EXPORT_TIME = 'export_time' - - class BillingTotalCostQueryModel(SMBase): """ Used to query for billing total cost diff --git a/web/src/pages/billing/BillingCostByAnalysis.tsx b/web/src/pages/billing/BillingCostByAnalysis.tsx index ec1f2a491..8a163b6e8 100644 --- a/web/src/pages/billing/BillingCostByAnalysis.tsx +++ b/web/src/pages/billing/BillingCostByAnalysis.tsx @@ -7,6 +7,11 @@ import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' import { BillingApi, BillingTotalCostRecord } from '../../sm-api' import HailBatchGrid from './components/HailBatchGrid' +enum SearchType { + Ar_guid, + Batch_id, +} + const BillingCostByAnalysis: React.FunctionComponent = () => { const [searchParams] = useSearchParams() @@ -27,10 +32,6 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { searchParams.get('searchTxt') ?? undefined ) - enum SearchType { - Ar_guid, - Batch_id, - } const searchOptions: string[] = Object.keys(SearchType).filter((item) => isNaN(Number(item))) const dropdownOptions = searchOptions.map((item) => ({ text: item.replaceAll('_', ' '), @@ -130,22 +131,20 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { onChange={handleSearchChange} value={searchTxt} action={{ icon: 'search' }} - > - - +
diff --git a/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx b/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx index aa6d731f8..f8256d454 100644 --- a/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx +++ b/web/src/shared/components/Graphs/StackedAreaByDateChart.tsx @@ -76,7 +76,7 @@ export const StackedAreaByDateChart: React.FC = ({ colors, }) => { if (!data || data.length === 0) { - return No Data + return <>No Data } const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow From 4e35df57a5a18cfb3e41487ee4065a508cad90fb Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Wed, 3 Jan 2024 15:12:47 +1100 Subject: [PATCH 11/34] Small Frontend refactoring, reflecting PR review. --- web/src/index.css | 14 +++++-- .../pages/billing/BillingCostByAnalysis.tsx | 19 ++++------ .../pages/billing/BillingCostByCategory.tsx | 33 ++++++----------- web/src/pages/billing/BillingCostByTime.tsx | 37 +++++++------------ .../pages/billing/BillingInvoiceMonthCost.tsx | 9 +++-- web/src/pages/billing/BillingSeqrProp.tsx | 23 ++++-------- web/src/shared/utilities/generateUrl.ts | 15 ++++++++ web/src/shared/utilities/monthStartEndDate.ts | 15 ++++++++ 8 files changed, 87 insertions(+), 78 deletions(-) create mode 100644 web/src/shared/utilities/generateUrl.ts create mode 100644 web/src/shared/utilities/monthStartEndDate.ts diff --git a/web/src/index.css b/web/src/index.css index c0228c110..4f3d52a35 100644 --- a/web/src/index.css +++ b/web/src/index.css @@ -26,6 +26,10 @@ --color-check-green: #7ac142; --color-table-header: #f9fafb; + + --color-bg-over-budget: rgba(128, 0, 0, 0.2); + --color-bg-half-budget: rgba(255, 165, 0, 0.2); + --color-bg-under-budget: rgba(0, 128, 0, 0.2); } html[data-theme='dark-mode'] { @@ -45,6 +49,10 @@ html[data-theme='dark-mode'] { --color-check-green: #659251; --color-table-header: rgba(0, 0, 0, 0.15); + + --color-bg-over-budget: rgba(128, 0, 0, 0.2); + --color-bg-half-budget: rgba(255, 165, 0, 0.2); + --color-bg-under-budget: rgba(0, 128, 0, 0.2); } /* poor man's dark mode: https://github.com/swagger-api/swagger-ui/issues/5327#issuecomment-742375520 */ @@ -262,13 +270,13 @@ html[data-theme='dark-mode'] .ui.table { } .billing-over-budget { - background-color: rgba(128, 0, 0, 0.2) !important; + background-color: var(--color-bg-over-budget) !important; } .billing-half-budget { - background-color: rgba(255, 165, 0, 0.2) !important; + background-color: var(--color-bg-half-budget) !important; } .billing-under-budget { - background-color: rgba(0, 128, 0, 0.2) !important; + background-color: var(--color-bg-under-budget) !important; } diff --git a/web/src/pages/billing/BillingCostByAnalysis.tsx b/web/src/pages/billing/BillingCostByAnalysis.tsx index 8a163b6e8..4f3655f71 100644 --- a/web/src/pages/billing/BillingCostByAnalysis.tsx +++ b/web/src/pages/billing/BillingCostByAnalysis.tsx @@ -6,6 +6,8 @@ import SearchIcon from '@mui/icons-material/Search' import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' import { BillingApi, BillingTotalCostRecord } from '../../sm-api' import HailBatchGrid from './components/HailBatchGrid' +import { getMonthStartDate } from '../../shared/utilities/monthStartEndDate' +import generateUrl from '../../shared/utilities/generateUrl' enum SearchType { Ar_guid, @@ -15,15 +17,12 @@ enum SearchType { const BillingCostByAnalysis: React.FunctionComponent = () => { const [searchParams] = useSearchParams() - const now = new Date() - // Data loading const [isLoading, setIsLoading] = React.useState(true) const [error, setError] = React.useState() const [start, setStart] = React.useState( - searchParams.get('start') ?? - `${now.getFullYear()}-${now.getMonth().toString().padStart(2, '0')}-01` + searchParams.get('start') ?? getMonthStartDate() ) const [data, setData] = React.useState([]) @@ -39,7 +38,7 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { })) const [searchByType, setSearchByType] = React.useState( - SearchType[searchParams.get('searchType')] ?? undefined + SearchType[searchParams.get('searchType')] ?? SearchType.Ar_guid ) // use navigate and update url params @@ -47,12 +46,10 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { const navigate = useNavigate() const updateNav = (sType: SearchType | undefined, sTxt: string | undefined) => { - let url = `${location.pathname}?` - const params: string[] = [] - if (sType !== undefined) params.push(`searchType=${SearchType[sType]}`) - if (sTxt !== undefined) params.push(`searchTxt=${sTxt}`) - - url += params.join('&') + const url = generateUrl(location, { + searchType: sType, + searchTxt: sTxt, + }) navigate(url) } diff --git a/web/src/pages/billing/BillingCostByCategory.tsx b/web/src/pages/billing/BillingCostByCategory.tsx index 1c2daed1c..8096514c0 100644 --- a/web/src/pages/billing/BillingCostByCategory.tsx +++ b/web/src/pages/billing/BillingCostByCategory.tsx @@ -12,11 +12,11 @@ import { } from '../../sm-api' import { convertFieldName } from '../../shared/utilities/fieldName' +import { getMonthStartDate, getMonthEndDate } from '../../shared/utilities/monthStartEndDate' import { IStackedAreaByDateChartData } from '../../shared/components/Graphs/StackedAreaByDateChart' +import generateUrl from '../../shared/utilities/generateUrl' const BillingCostByCategory: React.FunctionComponent = () => { - const now = new Date() - const [searchParams] = useSearchParams() const inputGroupBy: string | undefined = searchParams.get('groupBy') ?? undefined @@ -29,16 +29,9 @@ const BillingCostByCategory: React.FunctionComponent = () => { const inputPeriod: string | undefined = searchParams.get('period') ?? BillingTimePeriods.Month const [start, setStart] = React.useState( - searchParams.get('start') ?? - `${now.getFullYear() - 1}-${now.getMonth().toString().padStart(2, '0')}-01` - ) - const [end, setEnd] = React.useState( - searchParams.get('end') ?? - `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-${now - .getDate() - .toString() - .padStart(2, '0')}` + searchParams.get('start') ?? getMonthStartDate() ) + const [end, setEnd] = React.useState(searchParams.get('end') ?? getMonthEndDate()) const [selectedGroup, setSelectedGroup] = React.useState(inputSelectedGroup) const [selectedCostCategory, setCostCategory] = React.useState( @@ -70,16 +63,14 @@ const BillingCostByCategory: React.FunctionComponent = () => { st: string, ed: string ) => { - let url = `${location.pathname}` - url += '?' - let params: string[] = [] - if (grpBy) params.push(`groupBy=${grpBy}`) - if (grp) params.push(`group=${grp}`) - if (category) params.push(`costCategory=${category}`) - if (period) params.push(`period=${period}`) - if (st) params.push(`start=${st}`) - if (ed) params.push(`end=${ed}`) - url += params.join('&') + const url = generateUrl(location, { + groupBy: grpBy, + group: grp, + costCategory: category, + period: period, + start: st, + end: ed, + }) navigate(url) } diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx index 6f216675a..e8f9b1307 100644 --- a/web/src/pages/billing/BillingCostByTime.tsx +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -12,14 +12,14 @@ import { } from '../../sm-api' import { convertFieldName } from '../../shared/utilities/fieldName' +import { getMonthStartDate, getMonthEndDate } from '../../shared/utilities/monthStartEndDate' import { IStackedAreaByDateChartData } from '../../shared/components/Graphs/StackedAreaByDateChart' import BillingCostByTimeTable from './components/BillingCostByTimeTable' import { BarChart, IData } from '../../shared/components/Graphs/BarChart' import { DonutChart } from '../../shared/components/Graphs/DonutChart' +import generateUrl from '../../shared/utilities/generateUrl' const BillingCostByTime: React.FunctionComponent = () => { - const now = new Date() - const [searchParams] = useSearchParams() const inputGroupBy: string | undefined = searchParams.get('groupBy') ?? undefined @@ -29,16 +29,9 @@ const BillingCostByTime: React.FunctionComponent = () => { const inputSelectedData: string | undefined = searchParams.get('selectedData') ?? undefined const [start, setStart] = React.useState( - searchParams.get('start') ?? - `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-01` - ) - const [end, setEnd] = React.useState( - searchParams.get('end') ?? - `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-${now - .getDate() - .toString() - .padStart(2, '0')}` + searchParams.get('start') ?? getMonthStartDate() ) + const [end, setEnd] = React.useState(searchParams.get('end') ?? getMonthEndDate()) const [groupBy, setGroupBy] = React.useState( fixedGroupBy ?? BillingColumn.GcpProject ) @@ -60,20 +53,16 @@ const BillingCostByTime: React.FunctionComponent = () => { const updateNav = ( grp: string | undefined, - data: string | undefined, - start: string, - end: string + selData: string | undefined, + st: string, + ed: string ) => { - let url = `${location.pathname}` - if (grp || data) url += '?' - - let params: string[] = [] - if (grp) params.push(`groupBy=${grp}`) - if (data) params.push(`selectedData=${data}`) - if (start) params.push(`start=${start}`) - if (end) params.push(`end=${end}`) - - url += params.join('&') + const url = generateUrl(location, { + groupBy: grp, + selectedData: selData, + start: st, + end: ed, + }) navigate(url) } diff --git a/web/src/pages/billing/BillingInvoiceMonthCost.tsx b/web/src/pages/billing/BillingInvoiceMonthCost.tsx index 6db3abf48..595f0b806 100644 --- a/web/src/pages/billing/BillingInvoiceMonthCost.tsx +++ b/web/src/pages/billing/BillingInvoiceMonthCost.tsx @@ -8,6 +8,7 @@ import { BillingApi, BillingColumn, BillingCostBudgetRecord } from '../../sm-api import FieldSelector from './components/FieldSelector' import { convertFieldName } from '../../shared/utilities/fieldName' import { HorizontalStackedBarChart } from '../../shared/components/Graphs/HorizontalStackedBarChart' +import generateUrl from '../../shared/utilities/generateUrl' const BillingCurrentCost = () => { const [isLoading, setIsLoading] = React.useState(true) @@ -35,10 +36,10 @@ const BillingCurrentCost = () => { const navigate = useNavigate() const updateNav = (grp: BillingColumn, invoiceMonth: string | undefined) => { - let url = `${location.pathname}?groupBy=${grp}` - if (invoiceMonth) { - url += `&invoiceMonth=${invoiceMonth}` - } + const url = generateUrl(location, { + groupBy: grp, + invoiceMonth: invoiceMonth, + }) navigate(url) } diff --git a/web/src/pages/billing/BillingSeqrProp.tsx b/web/src/pages/billing/BillingSeqrProp.tsx index 2fc9ba0e6..80493e2cc 100644 --- a/web/src/pages/billing/BillingSeqrProp.tsx +++ b/web/src/pages/billing/BillingSeqrProp.tsx @@ -2,30 +2,23 @@ import * as React from 'react' import { useLocation, useNavigate } from 'react-router-dom' import { Grid, Card, Input } from 'semantic-ui-react' import SeqrProportionalMapGraph from './components/SeqrProportionalMapGraph' +import { getMonthEndDate } from '../../shared/utilities/monthStartEndDate' +import generateUrl from '../../shared/utilities/generateUrl' const BillingSeqrProp: React.FunctionComponent = () => { const now = new Date() const [start, setStart] = React.useState(`${now.getFullYear()}-01-01`) - const [end, setEnd] = React.useState( - `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-${now - .getDate() - .toString() - .padStart(2, '0')}` - ) + const [end, setEnd] = React.useState(getMonthEndDate()) // use navigate and update url params const location = useLocation() const navigate = useNavigate() - const updateNav = (start: string, end: string) => { - let url = `${location.pathname}` - if (start || end) url += '?' - - let params: string[] = [] - if (start) params.push(`start=${start}`) - if (end) params.push(`end=${end}`) - - url += params.join('&') + const updateNav = (st: string, ed: string) => { + const url = generateUrl(location, { + start: st, + end: ed, + }) navigate(url) } diff --git a/web/src/shared/utilities/generateUrl.ts b/web/src/shared/utilities/generateUrl.ts new file mode 100644 index 000000000..6279c5d69 --- /dev/null +++ b/web/src/shared/utilities/generateUrl.ts @@ -0,0 +1,15 @@ +import { Dictionary } from 'lodash' + +const generateUrl = (location: Location, params: Dictionary): string => { + let paramsArray: string[] = [] + paramsArray = Object.entries(params) + .filter(([_, value]) => value !== null && value !== undefined) + .map(([key, value]) => `${key}=${value}`) + + if (paramsArray.length === 0) + return `${location.pathname}` + + return `${location.pathname}?${paramsArray.join('&')}` +} + +export default generateUrl diff --git a/web/src/shared/utilities/monthStartEndDate.ts b/web/src/shared/utilities/monthStartEndDate.ts new file mode 100644 index 000000000..a7d6022d1 --- /dev/null +++ b/web/src/shared/utilities/monthStartEndDate.ts @@ -0,0 +1,15 @@ +const getMonthStartDate = (): string => { + const now = new Date() + return `${now.getFullYear()}-${(now.getMonth() + 1).toString().padStart(2, '0')}-01` +} + +const getMonthEndDate = (): string => { + const now = new Date() + return [ + now.getFullYear(), + (now.getMonth() + 1).toString().padStart(2, '0'), + now.getDate().toString().padStart(2, '0') + ].join('-') +} + +export {getMonthStartDate, getMonthEndDate} \ No newline at end of file From 04a00b2aa4a4c8834e1cc93ff5224aa6aa9ac03d Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Wed, 3 Jan 2024 15:49:25 +1100 Subject: [PATCH 12/34] Updating billing style for dark mode. --- web/src/index.css | 14 +++++++++++--- web/src/pages/billing/BillingInvoiceMonthCost.tsx | 2 +- web/src/shared/utilities/monthStartEndDate.ts | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/web/src/index.css b/web/src/index.css index 4f3d52a35..8c1553665 100644 --- a/web/src/index.css +++ b/web/src/index.css @@ -30,6 +30,8 @@ --color-bg-over-budget: rgba(128, 0, 0, 0.2); --color-bg-half-budget: rgba(255, 165, 0, 0.2); --color-bg-under-budget: rgba(0, 128, 0, 0.2); + + --color-text-href: rgb(65, 131, 196); } html[data-theme='dark-mode'] { @@ -50,9 +52,11 @@ html[data-theme='dark-mode'] { --color-check-green: #659251; --color-table-header: rgba(0, 0, 0, 0.15); - --color-bg-over-budget: rgba(128, 0, 0, 0.2); - --color-bg-half-budget: rgba(255, 165, 0, 0.2); - --color-bg-under-budget: rgba(0, 128, 0, 0.2); + --color-bg-over-budget: rgba(128, 0, 0, 0.5); + --color-bg-half-budget: rgba(255, 165, 0, 0.5); + --color-bg-under-budget: rgba(0, 128, 0, 0.5); + + --color-text-href: rgb(188, 188, 251); } /* poor man's dark mode: https://github.com/swagger-api/swagger-ui/issues/5327#issuecomment-742375520 */ @@ -280,3 +284,7 @@ html[data-theme='dark-mode'] .ui.table { .billing-under-budget { background-color: var(--color-bg-under-budget) !important; } + +.billing-href a { + color: var(--color-text-href) !important; +} diff --git a/web/src/pages/billing/BillingInvoiceMonthCost.tsx b/web/src/pages/billing/BillingInvoiceMonthCost.tsx index 595f0b806..442d5011a 100644 --- a/web/src/pages/billing/BillingInvoiceMonthCost.tsx +++ b/web/src/pages/billing/BillingInvoiceMonthCost.tsx @@ -353,7 +353,7 @@ const BillingCurrentCost = () => { switch (k.category) { case 'field': return ( - + {p[k.category]} diff --git a/web/src/shared/utilities/monthStartEndDate.ts b/web/src/shared/utilities/monthStartEndDate.ts index a7d6022d1..089409332 100644 --- a/web/src/shared/utilities/monthStartEndDate.ts +++ b/web/src/shared/utilities/monthStartEndDate.ts @@ -12,4 +12,4 @@ const getMonthEndDate = (): string => { ].join('-') } -export {getMonthStartDate, getMonthEndDate} \ No newline at end of file +export {getMonthStartDate, getMonthEndDate} From f0577866b4f0f3374c19f4096f36e93fc8d9f514 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Wed, 3 Jan 2024 16:11:56 +1100 Subject: [PATCH 13/34] Optimised Frontend, replacing reduce with forEach where possible. --- .../pages/billing/BillingCostByCategory.tsx | 55 ++++++++---------- web/src/pages/billing/BillingCostByTime.tsx | 56 ++++++++----------- .../Graphs/HorizontalStackedBarChart.tsx | 8 +-- 3 files changed, 48 insertions(+), 71 deletions(-) diff --git a/web/src/pages/billing/BillingCostByCategory.tsx b/web/src/pages/billing/BillingCostByCategory.tsx index 8096514c0..2082ba1f8 100644 --- a/web/src/pages/billing/BillingCostByCategory.tsx +++ b/web/src/pages/billing/BillingCostByCategory.tsx @@ -121,44 +121,33 @@ const BillingCostByCategory: React.FunctionComponent = () => { setIsLoading(false) // calc totals per sku - const recTotals = response.data.reduce( - ( - acc: { [key: string]: { [key: string]: number } }, - item: BillingTotalCostRecord - ) => { - const { sku, cost } = item - if (!acc[sku]) { - acc[sku] = 0 - } - acc[sku] += cost - return acc - }, - {} - ) + const recTotals: { [key: string]: number } = {} + response.data.forEach((item: BillingTotalCostRecord) => { + const { sku, cost } = item + if (!recTotals[sku]) { + recTotals[sku] = 0 + } + recTotals[sku] += cost + }) const sortedRecTotals: { [key: string]: number } = Object.fromEntries( Object.entries(recTotals).sort(([, a], [, b]) => b - a) ) const rec_grps = Object.keys(sortedRecTotals) - const records = response.data.reduce( - ( - acc: { [key: string]: { [key: string]: number } }, - item: BillingTotalCostRecord - ) => { - const { day, sku, cost } = item - if (day !== undefined) { - if (!acc[day]) { - // initialise day structure - acc[day] = {} - rec_grps.forEach((k) => { - acc[day][k] = 0 - }) - } - acc[day][sku] = cost + const records: { [key: string]: { [key: string]: number } } = {} + + response.data.forEach((item: any) => { + const { day, sku, cost } = item + if (day !== undefined) { + if (!records[day]) { + // initialise day structure + records[day] = {} + rec_grps.forEach((k: string) => { + records[day][k] = 0 + }) } - return acc - }, - {} - ) + records[day][sku] = cost + } + }) setData( Object.keys(records).map((key) => ({ date: new Date(key), diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx index e8f9b1307..8ddab9ca3 100644 --- a/web/src/pages/billing/BillingCostByTime.tsx +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -96,44 +96,32 @@ const BillingCostByTime: React.FunctionComponent = () => { setIsLoading(false) // calc totals per cost_category - const recTotals = response.data.reduce( - ( - acc: { [key: string]: { [key: string]: number } }, - item: BillingTotalCostRecord - ) => { - const { cost_category, cost } = item - if (!acc[cost_category]) { - acc[cost_category] = 0 - } - acc[cost_category] += cost - return acc - }, - {} - ) + const recTotals: { [key: string]: number } = {} + response.data.forEach((item: BillingTotalCostRecord) => { + const { cost_category, cost } = item + if (!recTotals[cost_category]) { + recTotals[cost_category] = 0 + } + recTotals[cost_category] += cost + }) const sortedRecTotals: { [key: string]: number } = Object.fromEntries( Object.entries(recTotals).sort(([, a], [, b]) => b - a) ) const rec_grps = Object.keys(sortedRecTotals) - const records = response.data.reduce( - ( - acc: { [key: string]: { [key: string]: number } }, - item: BillingTotalCostRecord - ) => { - const { day, cost_category, cost } = item - if (day !== undefined) { - if (!acc[day]) { - // initialise day structure - acc[day] = {} - rec_grps.forEach((k) => { - acc[day][k] = 0 - }) - } - acc[day][cost_category] = cost + const records: { [key: string]: { [key: string]: number } } = {} + response.data.forEach((item: BillingTotalCostRecord) => { + const { day, cost_category, cost } = item + if (day !== undefined) { + if (!records[day]) { + // initial day structure + records[day] = {} + rec_grps.forEach((k) => { + records[day][k] = 0 + }) } - return acc - }, - {} - ) + records[day][cost_category] = cost + } + }) const no_undefined: string[] = rec_grps.filter( (item): item is string => item !== undefined ) @@ -154,7 +142,7 @@ const BillingCostByTime: React.FunctionComponent = () => { .slice(index) .reduce((sum, { value }) => sum + value, 0) - if (acc.length == maxDataPoints) { + if (acc.length === maxDataPoints) { acc.push({ label: 'Rest*', value: restValue }) } else { acc[maxDataPoints].value += restValue diff --git a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx index c0905b79d..d80c3021e 100644 --- a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx +++ b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx @@ -93,10 +93,10 @@ const HorizontalStackedBarChart: React.FC = ({ const indexedData = stackedData.map((innerArray, outerIdx) => innerArray.map((d, innerIdx) => ({ outerIdx, innerIdx, data: d })) ) - const budgetData = data.reduce((acc, d) => { - acc[d.field] = d.budget - return acc - }, {}) + const budgetData = {} + data.forEach((d) => { + budgetData[d.field] = d.budget + }) const maxBudget = Math.max(...data.map((item) => item.budget)) From 61474563da1c49ac9788566b90c5c3f3c1a7a9cb Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Thu, 4 Jan 2024 16:14:34 +1100 Subject: [PATCH 14/34] Refactoring Billing DB structures. --- api/routes/billing.py | 5 +- db/python/layers/__init__.py | 3 +- .../layers/{billing_layer.py => billing.py} | 121 ++- db/python/tables/billing_ar_batch.py | 69 ++ .../billing_db.py => tables/billing_base.py} | 755 +++++------------- db/python/tables/billing_daily.py | 127 +++ db/python/tables/billing_daily_extended.py | 50 ++ db/python/tables/billing_gcp_daily.py | 150 ++++ db/python/tables/billing_raw.py | 12 + models/models/billing.py | 17 + 10 files changed, 712 insertions(+), 597 deletions(-) rename db/python/layers/{billing_layer.py => billing.py} (56%) create mode 100644 db/python/tables/billing_ar_batch.py rename db/python/{layers/billing_db.py => tables/billing_base.py} (52%) create mode 100644 db/python/tables/billing_daily.py create mode 100644 db/python/tables/billing_daily_extended.py create mode 100644 db/python/tables/billing_gcp_daily.py create mode 100644 db/python/tables/billing_raw.py diff --git a/api/routes/billing.py b/api/routes/billing.py index 7482ee77a..f903717d6 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -6,11 +6,12 @@ from api.settings import BILLING_CACHE_RESPONSE_TTL, BQ_AGGREG_VIEW from api.utils.db import BqConnection, get_author -from db.python.layers.billing_layer import BillingLayer +from db.python.layers.billing import BillingLayer from models.models.billing import ( BillingColumn, BillingCostBudgetRecord, BillingHailBatchCostRecord, + BillingSource, BillingTotalCostQueryModel, BillingTotalCostRecord, ) @@ -520,7 +521,7 @@ async def get_total_cost( async def get_running_costs( field: BillingColumn, invoice_month: str | None = None, - source: str | None = None, + source: BillingSource | None = None, author: str = get_author, ) -> list[BillingCostBudgetRecord]: """ diff --git a/db/python/layers/__init__.py b/db/python/layers/__init__.py index 43d474fcc..1c4442896 100644 --- a/db/python/layers/__init__.py +++ b/db/python/layers/__init__.py @@ -1,8 +1,7 @@ from db.python.layers.analysis import AnalysisLayer from db.python.layers.assay import AssayLayer from db.python.layers.base import BaseLayer -from db.python.layers.billing_db import BillingDb -from db.python.layers.billing_layer import BillingLayer +from db.python.layers.billing import BillingLayer from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer from db.python.layers.sample import SampleLayer diff --git a/db/python/layers/billing_layer.py b/db/python/layers/billing.py similarity index 56% rename from db/python/layers/billing_layer.py rename to db/python/layers/billing.py index 506b98ef6..212aacd21 100644 --- a/db/python/layers/billing_layer.py +++ b/db/python/layers/billing.py @@ -1,5 +1,9 @@ -from db.python.layers.billing_db import BillingDb from db.python.layers.bq_base import BqBaseLayer +from db.python.tables.billing_ar_batch import BillingArBatchTable +from db.python.tables.billing_daily import BillingDailyTable +from db.python.tables.billing_daily_extended import BillingDailyExtendedTable +from db.python.tables.billing_gcp_daily import BillingGcpDailyTable +from db.python.tables.billing_raw import BillingRawTable from models.models import ( BillingColumn, BillingCostBudgetRecord, @@ -16,14 +20,42 @@ class BillingLayer(BqBaseLayer): """Billing layer""" + def table_factory( + self, source: BillingSource, fields: list[BillingColumn] | None = None + ) -> ( + BillingDailyTable + | BillingDailyExtendedTable + | BillingGcpDailyTable + | BillingRawTable + ): + """Get billing table object based on source and fields""" + if source == BillingSource.GCP_BILLING: + return BillingGcpDailyTable(self.connection) + elif source == BillingSource.RAW: + return BillingRawTable(self.connection) + + # check if any of the fields is in the extended columns + if fields: + used_extended_cols = [ + f + for f in fields + if f in BillingColumn.extended_cols() and BillingColumn.can_group_by(f) + ] + if used_extended_cols: + # there is a field from extended daily table + return BillingDailyExtendedTable(self.connection) + + # by default look at the daily table + return BillingDailyTable(self.connection) + async def get_gcp_projects( self, ) -> list[str] | None: """ Get All GCP projects in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_gcp_projects() + billing_table = BillingGcpDailyTable(self.connection) + return await billing_table.get_gcp_projects() async def get_topics( self, @@ -31,8 +63,8 @@ async def get_topics( """ Get All topics in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_topics() + billing_table = BillingDailyTable(self.connection) + return await billing_table.get_topics() async def get_cost_categories( self, @@ -40,8 +72,8 @@ async def get_cost_categories( """ Get All service description / cost categories in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_cost_categories() + billing_table = BillingDailyTable(self.connection) + return await billing_table.get_cost_categories() async def get_skus( self, @@ -51,8 +83,8 @@ async def get_skus( """ Get All SKUs in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_skus(limit, offset) + billing_table = BillingDailyTable(self.connection) + return await billing_table.get_skus(limit, offset) async def get_datasets( self, @@ -60,8 +92,8 @@ async def get_datasets( """ Get All datasets in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('dataset') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('dataset') async def get_stages( self, @@ -69,8 +101,8 @@ async def get_stages( """ Get All stages in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('stage') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('stage') async def get_sequencing_types( self, @@ -78,8 +110,8 @@ async def get_sequencing_types( """ Get All sequencing_types in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('sequencing_type') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('sequencing_type') async def get_sequencing_groups( self, @@ -87,8 +119,8 @@ async def get_sequencing_groups( """ Get All sequencing_groups in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('sequencing_group') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('sequencing_group') async def get_compute_categories( self, @@ -96,8 +128,8 @@ async def get_compute_categories( """ Get All compute_category values in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('compute_category') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('compute_category') async def get_cromwell_sub_workflow_names( self, @@ -105,8 +137,8 @@ async def get_cromwell_sub_workflow_names( """ Get All cromwell_sub_workflow_name values in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('cromwell_sub_workflow_name') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('cromwell_sub_workflow_name') async def get_wdl_task_names( self, @@ -114,8 +146,8 @@ async def get_wdl_task_names( """ Get All wdl_task_name values in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('wdl_task_name') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('wdl_task_name') async def get_invoice_months( self, @@ -123,8 +155,8 @@ async def get_invoice_months( """ Get All invoice months in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_invoice_months() + billing_table = BillingDailyTable(self.connection) + return await billing_table.get_invoice_months() async def get_namespaces( self, @@ -132,8 +164,8 @@ async def get_namespaces( """ Get All namespaces values in database """ - billing_db = BillingDb(self.connection) - return await billing_db.get_extended_values('namespace') + billing_table = BillingDailyExtendedTable(self.connection) + return await billing_table.get_extended_values('namespace') async def get_total_cost( self, @@ -142,20 +174,20 @@ async def get_total_cost( """ Get Total cost of selected fields for requested time interval """ - billing_db = BillingDb(self.connection) - return await billing_db.get_total_cost(query) + billing_table = self.table_factory(query.source, query.fields) + return await billing_table.get_total_cost(query) async def get_running_cost( self, field: BillingColumn, invoice_month: str | None = None, - source: str | None = None, + source: BillingSource | None = None, ) -> list[BillingCostBudgetRecord]: """ Get Running costs including monthly budget """ - billing_db = BillingDb(self.connection) - return await billing_db.get_running_cost(field, invoice_month, source) + billing_table = self.table_factory(source, [field]) + return await billing_table.get_running_cost(field, invoice_month) async def get_cost_by_ar_guid( self, @@ -164,10 +196,14 @@ async def get_cost_by_ar_guid( """ Get Costs by AR GUID """ - billing_db = BillingDb(self.connection) + ar_batch_lookup_table = BillingArBatchTable(self.connection) # First get all batches and the min/max day to use for the query - start_day, end_day, batches = await billing_db.get_batches_by_ar_guid(ar_guid) + ( + start_day, + end_day, + batches, + ) = await ar_batch_lookup_table.get_batches_by_ar_guid(ar_guid) if not batches: return BillingHailBatchCostRecord( @@ -195,7 +231,9 @@ async def get_cost_by_ar_guid( time_column=BillingTimeColumn.USAGE_END_TIME, time_periods=BillingTimePeriods.DAY, ) - records = await billing_db.get_total_cost(query) + + billing_table = self.table_factory(query.source, query.fields) + records = await billing_table.get_total_cost(query) return BillingHailBatchCostRecord( ar_guid=ar_guid, batch_ids=batches, @@ -209,13 +247,17 @@ async def get_cost_by_batch_id( """ Get Costs by Batch ID """ - billing_db = BillingDb(self.connection) + ar_batch_lookup_table = BillingArBatchTable(self.connection) # First get all batches and the min/max day to use for the query - ar_guid = await billing_db.get_ar_guid_by_batch_id(batch_id) + ar_guid = await ar_batch_lookup_table.get_ar_guid_by_batch_id(batch_id) # The get all batches for the ar_guid - start_day, end_day, batches = await billing_db.get_batches_by_ar_guid(ar_guid) + ( + start_day, + end_day, + batches, + ) = await ar_batch_lookup_table.get_batches_by_ar_guid(ar_guid) if not batches: return BillingHailBatchCostRecord(ar_guid=ar_guid, batch_ids=[], costs=[]) @@ -239,7 +281,8 @@ async def get_cost_by_batch_id( time_column=BillingTimeColumn.USAGE_END_TIME, time_periods=BillingTimePeriods.DAY, ) - records = await billing_db.get_total_cost(query) + billing_table = self.table_factory(query.source, query.fields) + records = await billing_table.get_total_cost(query) return BillingHailBatchCostRecord( ar_guid=ar_guid, batch_ids=batches, diff --git a/db/python/tables/billing_ar_batch.py b/db/python/tables/billing_ar_batch.py new file mode 100644 index 000000000..b1f3c7f59 --- /dev/null +++ b/db/python/tables/billing_ar_batch.py @@ -0,0 +1,69 @@ +from datetime import datetime, timedelta + +from google.cloud import bigquery + +from api.settings import BQ_BATCHES_VIEW +from db.python.tables.billing_base import BillingBaseTable + + +class BillingArBatchTable(BillingBaseTable): + """Billing AR - BatchID lookup Big Query table""" + + table_name = BQ_BATCHES_VIEW + + def get_table_name(self): + """Get table name""" + return self.table_name + + async def get_batches_by_ar_guid( + self, ar_guid: str + ) -> tuple[datetime, datetime, list[str]]: + """ + Get batches for given ar_guid + """ + _query = f""" + SELECT + batch_id, + MIN(min_day) as start_day, + MAX(max_day) as end_day + FROM `{self.table_name}` + WHERE ar_guid = @ar_guid + AND batch_id IS NOT NULL + GROUP BY batch_id + ORDER BY 1; + """ + + query_parameters = [ + bigquery.ScalarQueryParameter('ar_guid', 'STRING', ar_guid), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + start_day = min((row.start_day for row in query_job_result)) + end_day = max((row.end_day for row in query_job_result)) + timedelta(days=1) + return start_day, end_day, [row.batch_id for row in query_job_result] + + # return empty list if no record found + return None, None, [] + + async def get_ar_guid_by_batch_id(self, batch_id: str) -> str: + """ + Get ar_guid for given batch_id + """ + _query = f""" + SELECT ar_guid + FROM `{self.table_name}` + WHERE batch_id = @batch_id + AND ar_guid IS NOT NULL + LIMIT 1; + """ + + query_parameters = [ + bigquery.ScalarQueryParameter('batch_id', 'STRING', batch_id), + ] + query_job_result = self._execute_query(_query, query_parameters) + if query_job_result: + return query_job_result[0]['ar_guid'] + + # return None if no ar_guid found + return None diff --git a/db/python/layers/billing_db.py b/db/python/tables/billing_base.py similarity index 52% rename from db/python/layers/billing_db.py rename to db/python/tables/billing_base.py index 2417dfc94..000b953aa 100644 --- a/db/python/layers/billing_db.py +++ b/db/python/tables/billing_base.py @@ -1,20 +1,12 @@ -# pylint: disable=too-many-lines import re +from abc import ABCMeta, abstractmethod from collections import Counter, defaultdict, namedtuple -from datetime import datetime, timedelta +from datetime import datetime from typing import Any from google.cloud import bigquery -from api.settings import ( - BQ_AGGREG_EXT_VIEW, - BQ_AGGREG_RAW, - BQ_AGGREG_VIEW, - BQ_BATCHES_VIEW, - BQ_BUDGET_VIEW, - BQ_DAYS_BACK_OPTIMAL, - BQ_GCP_BILLING_VIEW, -) +from api.settings import BQ_BUDGET_VIEW, BQ_DAYS_BACK_OPTIMAL from api.utils.dates import get_invoice_month_range, reformat_datetime from db.python.gcp_connect import BqDbBase from models.models import ( @@ -23,12 +15,12 @@ BillingTimePeriods, BillingTotalCostQueryModel, ) -from models.models.billing import BillingSource # Label added to each Billing Big Query request, # so we can track the cost of metamist-api BQ usage BQ_LABELS = {'source': 'metamist-api'} + # Day Time details used in grouping and parsing formulas TimeGroupingDetails = namedtuple('TimeGroupingDetails', ['field', 'formula']) @@ -69,285 +61,109 @@ def prepare_time_periods( return result -class BillingDb(BqDbBase): - """Db layer for billing related routes""" - - async def get_gcp_projects(self): - """Get all GCP projects in database""" - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this part_time > filter is to limit the amount of data scanned, - # saving cost for running BQ - _query = f""" - SELECT DISTINCT gcp_project - FROM `{BQ_GCP_BILLING_VIEW}` - WHERE part_time > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - AND gcp_project IS NOT NULL - ORDER BY gcp_project ASC; - """ - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ], - labels=BQ_LABELS, - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)['gcp_project']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_topics(self): - """Get all topics in database""" - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this day > filter is to limit the amount of data scanned, - # saving cost for running BQ - # aggregated views are partitioned by day - _query = f""" - SELECT DISTINCT topic - FROM `{BQ_AGGREG_VIEW}` - WHERE day > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - ORDER BY topic ASC; - """ - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ], - labels=BQ_LABELS, - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)['topic']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_invoice_months(self): - """Get all invoice months in database""" - - _query = f""" - SELECT DISTINCT FORMAT_DATE("%Y%m", day) as invoice_month - FROM `{BQ_AGGREG_VIEW}` - WHERE EXTRACT(day from day) = 1 - ORDER BY invoice_month DESC; - """ - - job_config = bigquery.QueryJobConfig(labels=BQ_LABELS) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)['invoice_month']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_cost_categories(self): - """Get all service description in database""" - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this day > filter is to limit the amount of data scanned, - # saving cost for running BQ - # aggregated views are partitioned by day - _query = f""" - SELECT DISTINCT cost_category - FROM `{BQ_AGGREG_VIEW}` - WHERE day > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - ORDER BY cost_category ASC; - """ - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ], - labels=BQ_LABELS, - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)['cost_category']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_skus( - self, - limit: int | None = None, - offset: int | None = None, - ): - """Get all SKUs in database""" - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this day > filter is to limit the amount of data scanned, - # saving cost for running BQ - # aggregated views are partitioned by day - _query = f""" - SELECT DISTINCT sku - FROM `{BQ_AGGREG_VIEW}` - WHERE day > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - ORDER BY sku ASC - """ - - # append LIMIT and OFFSET if present - if limit: - _query += ' LIMIT @limit_val' - if offset: - _query += ' OFFSET @offset_val' - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - bigquery.ScalarQueryParameter('limit_val', 'INT64', limit), - bigquery.ScalarQueryParameter('offset_val', 'INT64', offset), - ], - labels=BQ_LABELS, - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)['sku']) for row in query_job_result] - - # return empty list if no record found - return [] - - async def get_extended_values(self, field: str): - """ - Get all extended values in database, for specified field. - Field is one of extended coumns. - """ - - if field not in BillingColumn.extended_cols(): - raise ValueError('Invalid field value') - - # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query - # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL - # this day > filter is to limit the amount of data scanned, - # saving cost for running BQ - # aggregated views are partitioned by day - _query = f""" - SELECT DISTINCT {field} - FROM `{BQ_AGGREG_EXT_VIEW}` - WHERE {field} IS NOT NULL - AND day > TIMESTAMP_ADD( - CURRENT_TIMESTAMP(), INTERVAL @days DAY - ) - ORDER BY 1 ASC; - """ +def construct_filter( + name: str, value: Any, is_label: bool = False +) -> tuple[str, bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter]: + """Based on Filter value, construct filter string and query parameter + + Args: + name (str): Filter name + value (Any): Filter value + is_label (bool, optional): Is filter a label?. Defaults to False. + + Returns: + tuple[str, bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter] + """ + compare = '=' + b1, b2 = '', '' + param_type = bigquery.ScalarQueryParameter + key = name.replace('-', '_') + + if isinstance(value, list): + compare = 'IN' + b1, b2 = 'UNNEST(', ')' + param_type = bigquery.ArrayQueryParameter + + if is_label: + name = f'getLabelValue(labels, "{name}")' + + return ( + f'{name} {compare} {b1}@{key}{b2}', + param_type(key, 'STRING', value), + ) + + +class BillingBaseTable(BqDbBase): + """Billing Base Table + This is abstract class, it should not be instantiated + """ + + __metaclass__ = ABCMeta + + @abstractmethod + def get_table_name(self): + """Get table name""" + pass + + def _execute_query( + self, query: str, params: list[Any] = [], results_as_list: bool = True + ) -> list[Any]: + """Execute query, add BQ labels""" + job_config = bigquery.QueryJobConfig(query_parameters=params, labels=BQ_LABELS) + if results_as_list: + return list( + self._connection.connection.query(query, job_config=job_config).result() + ) - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ], - labels=BQ_LABELS, - ) + # otherwise return as BQ iterator + return self._connection.connection.query(query, job_config=job_config) - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return [str(dict(row)[field]) for row in query_job_result] + def _filter_to_optimise_query(self) -> str: + """Filter string to optimise BQ query""" + return 'day >= TIMESTAMP(@start_day) AND day <= TIMESTAMP(@last_day)' - # return empty list if no record found - return [] + def _last_loaded_day_filter(self) -> str: + """Last Loaded day filter string""" + return 'day = TIMESTAMP(@last_loaded_day)' - def _prepare_filter_str(self, query: BillingTotalCostQueryModel, view_to_use: str): - """Prepare filter string""" - and_filters = [] - query_parameters = [] + def _prepare_time_filters(self, query: BillingTotalCostQueryModel): + """Prepare time filters""" time_column = query.time_column or 'day' + time_filters = [] + query_parameters = [] - and_filters.append(f'{time_column} >= TIMESTAMP(@start_date)') - and_filters.append(f'{time_column} <= TIMESTAMP(@end_date)') - query_parameters.extend( - [ - bigquery.ScalarQueryParameter('start_date', 'STRING', query.start_date), - bigquery.ScalarQueryParameter('end_date', 'STRING', query.end_date), - ] - ) - - if query.source == BillingSource.GCP_BILLING: - # BQ_GCP_BILLING_VIEW view is partitioned by different field - # BQ has limitation, materialized view can only by partition by base table - # partition or its subset, in our case _PARTITIONTIME - # (part_time field in the view) - # We are querying by day, - # which can be up to a week behind regarding _PARTITIONTIME - and_filters.append('part_time >= TIMESTAMP(@start_date)') - and_filters.append( - 'part_time <= TIMESTAMP_ADD(TIMESTAMP(@end_date), INTERVAL 7 DAY)' + if query.start_date: + time_filters.append(f'{time_column} >= TIMESTAMP(@start_date)') + query_parameters.extend( + [ + bigquery.ScalarQueryParameter( + 'start_date', 'STRING', query.start_date + ), + ] ) - - def construct_filter( - name: str, value: Any, is_label: bool = False - ) -> tuple[str, bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter]: - compare = '=' - b1, b2 = '', '' - param_type = bigquery.ScalarQueryParameter - key = name.replace('-', '_') - - if isinstance(value, list): - compare = 'IN' - b1, b2 = 'UNNEST(', ')' - param_type = bigquery.ArrayQueryParameter - - if is_label: - name = f'getLabelValue(labels, "{name}")' - - return ( - f'{name} {compare} {b1}@{key}{b2}', - param_type(key, 'STRING', value), + if query.end_date: + time_filters.append(f'{time_column} <= TIMESTAMP(@end_date)') + query_parameters.extend( + [ + bigquery.ScalarQueryParameter('end_date', 'STRING', query.end_date), + ] ) + return time_filters, query_parameters + + def _prepare_filter_str(self, query: BillingTotalCostQueryModel): + """Prepare filter string""" + and_filters, query_parameters = self._prepare_time_filters(query) + # No additional filters filters = [] if not query.filters: filter_str = 'WHERE ' + ' AND '.join(and_filters) if and_filters else '' - return filter_str, query_parameters, view_to_use + return filter_str, query_parameters # Add each of the filters in the query for filter_key, filter_value in query.filters.items(): col_name = str(filter_key.value) - if col_name in BillingColumn.extended_cols(): - # if one of the extended columns is needed, - # the view has to be extended - view_to_use = BQ_AGGREG_EXT_VIEW if not isinstance(filter_value, dict): filter_, query_param = construct_filter(col_name, filter_value) @@ -369,16 +185,15 @@ def construct_filter( and_filters.extend(filters) filter_str = 'WHERE ' + ' AND '.join(and_filters) if and_filters else '' - return filter_str, query_parameters, view_to_use + return filter_str, query_parameters - def convert_output(self, query_job_result): + def _convert_output(self, query_job_result): """Convert query result to json""" if not query_job_result or query_job_result.result().total_rows == 0: # return empty list if no record found return [] records = query_job_result.result() - results = [] def transform_labels(row): @@ -391,125 +206,9 @@ def transform_labels(row): results.append(drec) - # df = query_job_result.to_dataframe() - - # for col in df.columns: - # # convert date to string - # if df.dtypes[col] == 'dbdate': - # df[col] = df[col].astype(str) - - # # modify labels format - # if col == 'labels': - # df[col] = df[col].apply(fix_labels) - - # data = json.loads(df.to_json(orient='records', date_format='iso')) return results - # pylint: disable=too-many-locals - async def get_total_cost( - self, - query: BillingTotalCostQueryModel, - ) -> list[dict] | None: - """ - Get Total cost of selected fields for requested time interval from BQ view - """ - if not query.start_date or not query.end_date or not query.fields: - raise ValueError('Date and Fields are required') - - extended_cols = BillingColumn.extended_cols() - - # by default look at the normal view - if query.source == BillingSource.GCP_BILLING: - view_to_use = BQ_GCP_BILLING_VIEW - elif query.source == BillingSource.RAW: - view_to_use = BQ_AGGREG_RAW - else: - view_to_use = BQ_AGGREG_VIEW - - # Get columns to group by and check view to use - grp_columns = [] - for field in query.fields: - col_name = str(field.value) - if not BillingColumn.can_group_by(field): - # if the field cannot be grouped by, skip it - continue - - if col_name in extended_cols: - # if one of the extended columns is needed, the view has to be extended - view_to_use = BQ_AGGREG_EXT_VIEW - - grp_columns.append(col_name) - - grp_selected = ','.join(grp_columns) - fields_selected = ','.join( - (field.value for field in query.fields if field != BillingColumn.COST) - ) - - # prepare grouping by time periods - time_group = TimeGroupingDetails('', '') - if query.time_periods or query.time_column: - # remove existing day column, if added to fields - # this is to prevent duplicating various time periods in one query - # if BillingColumn.DAY in query.fields: - # columns.remove(BillingColumn.DAY) - time_group = prepare_time_periods(query) - - filter_str, query_parameters, view_to_use = self._prepare_filter_str( - query, view_to_use - ) - - # construct order by - order_by_cols = [] - if query.order_by: - for order_field, reverse in query.order_by.items(): - col_name = str(order_field.value) - col_order = 'DESC' if reverse else 'ASC' - order_by_cols.append(f'{col_name} {col_order}') - - order_by_str = f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' - - group_by = f'GROUP BY day, {grp_selected}' if query.group_by else '' - cost = 'SUM(cost) as cost' if query.group_by else 'cost' - - _query = f""" - CREATE TEMP FUNCTION getLabelValue( - labels ARRAY>, label STRING - ) AS ( - (SELECT value FROM UNNEST(labels) WHERE key = label LIMIT 1) - ); - - WITH t AS ( - SELECT {time_group.field}{fields_selected}, {cost} - FROM `{view_to_use}` - {filter_str} - {group_by} - {order_by_str} - ) - SELECT {time_group.formula}{fields_selected}, cost FROM t - """ - - # append LIMIT and OFFSET if present - if query.limit: - _query += ' LIMIT @limit_val' - query_parameters.append( - bigquery.ScalarQueryParameter('limit_val', 'INT64', query.limit) - ) - if query.offset: - _query += ' OFFSET @offset_val' - query_parameters.append( - bigquery.ScalarQueryParameter('offset_val', 'INT64', query.offset) - ) - - job_config = bigquery.QueryJobConfig( - query_parameters=query_parameters, labels=BQ_LABELS - ) - query_job_result = self._connection.connection.query( - _query, job_config=job_config - ) - - return self.convert_output(query_job_result) - - async def get_budgets_by_gcp_project( + async def _budgets_by_gcp_project( self, field: BillingColumn, is_current_month: bool ) -> dict[str, float]: """ @@ -530,17 +229,13 @@ async def get_budgets_by_gcp_project( ON d.gcp_project = t.gcp_project AND d.created_at = t.last_created_at """ - job_config = bigquery.QueryJobConfig(labels=BQ_LABELS) - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - + query_job_result = self._execute_query(_query) if query_job_result: return {row.gcp_project: row.budget for row in query_job_result} return {} - async def get_last_loaded_day(self): + async def _last_loaded_day(self): """Get the most recent fully loaded day in db Go 2 days back as the data is not always available for the current day 1 day back is not enough @@ -548,52 +243,25 @@ async def get_last_loaded_day(self): _query = f""" SELECT TIMESTAMP_ADD(MAX(day), INTERVAL -2 DAY) as last_loaded_day - FROM `{BQ_AGGREG_VIEW}` + FROM `{self.get_table_name()}` WHERE day > TIMESTAMP_ADD( CURRENT_TIMESTAMP(), INTERVAL @days DAY ) """ - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter( - 'days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL) - ), - ], - labels=BQ_LABELS, - ) + query_parameters = [ + bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)), + ] + query_job_result = self._execute_query(_query, query_parameters) - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) if query_job_result: return str(query_job_result[0].last_loaded_day) return None - async def prepare_daily_cost_subquery( - self, field, view_to_use, source, query_params - ): + async def _prepare_daily_cost_subquery(self, field, query_params, last_loaded_day): """prepare daily cost subquery""" - if source == 'gcp_billing': - # add extra filter to limit materialized view partition - # Raw BQ billing table is partitioned by part_time (when data are loaded) - # and not by end of usage time (day) - # There is a delay up to 4-5 days between part_time and day - # 7 days is added to be sure to get all data - gcp_billing_optimise_filter = """ - AND part_time >= TIMESTAMP(@last_loaded_day) - AND part_time <= TIMESTAMP_ADD( - TIMESTAMP(@last_loaded_day), INTERVAL 7 DAY - ) - """ - else: - gcp_billing_optimise_filter = '' - - # Find the last fully loaded day in the view - last_loaded_day = await self.get_last_loaded_day() - daily_cost_field = ', day.cost as daily_cost' daily_cost_join = f"""LEFT JOIN ( SELECT @@ -601,9 +269,8 @@ async def prepare_daily_cost_subquery( cost_category, SUM(cost) as cost FROM - `{view_to_use}` - WHERE day = TIMESTAMP(@last_loaded_day) - {gcp_billing_optimise_filter} + `{self.get_table_name()}` + WHERE {self._last_loaded_day_filter()} GROUP BY field, cost_category @@ -615,13 +282,12 @@ async def prepare_daily_cost_subquery( query_params.append( bigquery.ScalarQueryParameter('last_loaded_day', 'STRING', last_loaded_day), ) - return (last_loaded_day, query_params, daily_cost_field, daily_cost_join) + return (query_params, daily_cost_field, daily_cost_join) - async def execute_running_cost_query( + async def _execute_running_cost_query( self, field: BillingColumn, invoice_month: str | None = None, - source: str | None = None, ): """ Run query to get running cost of selected field @@ -641,37 +307,6 @@ async def execute_running_cost_query( start_day = start_day_date.strftime('%Y-%m-%d') last_day = last_day_date.strftime('%Y-%m-%d') - # by default look at the normal view - if field in BillingColumn.extended_cols(): - # if any of the extendeid fields are needed use the extended view - view_to_use = BQ_AGGREG_EXT_VIEW - elif source == 'gcp_billing': - # if source is gcp_billing, - # use the view on top of the raw billing table - view_to_use = BQ_GCP_BILLING_VIEW - else: - # otherwise use the normal view - view_to_use = BQ_AGGREG_VIEW - - if source == 'gcp_billing': - # add extra filter to limit materialized view partition - # Raw BQ billing table is partitioned by part_time (when data are loaded) - # and not by end of usage time (day) - # There is a delay up to 4-5 days between part_time and day - # 7 days is added to be sure to get all data - filter_to_optimise_query = """ - part_time >= TIMESTAMP(@start_day) - AND part_time <= TIMESTAMP_ADD( - TIMESTAMP(@last_day), INTERVAL 7 DAY - ) - """ - else: - # add extra filter to limit materialized view partition - filter_to_optimise_query = """ - day >= TIMESTAMP(@start_day) - AND day <= TIMESTAMP(@last_day) - """ - # start_day and last_day are in to optimise the query query_params = [ bigquery.ScalarQueryParameter('start_day', 'STRING', start_day), @@ -685,13 +320,14 @@ async def execute_running_cost_query( if is_current_month: # Only current month can have last 24 hours cost # Last 24H in UTC time + # Find the last fully loaded day in the view + last_loaded_day = await self._last_loaded_day() ( - last_loaded_day, query_params, daily_cost_field, daily_cost_join, - ) = await self.prepare_daily_cost_subquery( - field, view_to_use, source, query_params + ) = await self._prepare_daily_cost_subquery( + field, query_params, last_loaded_day ) else: # Do not calculate last 24H cost @@ -711,8 +347,8 @@ async def execute_running_cost_query( cost_category, SUM(cost) as cost FROM - `{view_to_use}` - WHERE {filter_to_optimise_query} + `{self.get_table_name()}` + WHERE {self._filter_to_optimise_query()} AND invoice_month = @invoice_month GROUP BY field, @@ -730,17 +366,10 @@ async def execute_running_cost_query( return ( is_current_month, last_loaded_day, - list( - self._connection.connection.query( - _query, - job_config=bigquery.QueryJobConfig( - query_parameters=query_params, labels=BQ_LABELS - ), - ).result() - ), + self._execute_query(_query, query_params), ) - async def append_total_running_cost( + async def _append_total_running_cost( self, field: BillingColumn, is_current_month: bool, @@ -795,7 +424,7 @@ async def append_total_running_cost( return results - async def append_running_cost_records( + async def _append_running_cost_records( self, field: BillingColumn, is_current_month: bool, @@ -809,7 +438,7 @@ async def append_running_cost_records( Add all the selected field rows: compute + storage to the results """ # get budget map per gcp project - budgets_per_gcp_project = await self.get_budgets_by_gcp_project( + budgets_per_gcp_project = await self._budgets_by_gcp_project( field, is_current_month ) @@ -850,11 +479,93 @@ async def append_running_cost_records( return results + async def get_total_cost( + self, + query: BillingTotalCostQueryModel, + ) -> list[dict] | None: + """ + Get Total cost of selected fields for requested time interval from BQ view + """ + if not query.start_date or not query.end_date or not query.fields: + raise ValueError('Date and Fields are required') + + # Get columns to group by and check view to use + grp_columns = [] + for field in query.fields: + col_name = str(field.value) + if not BillingColumn.can_group_by(field): + # if the field cannot be grouped by, skip it + continue + + grp_columns.append(col_name) + + grp_selected = ','.join(grp_columns) + fields_selected = ','.join( + (field.value for field in query.fields if field != BillingColumn.COST) + ) + + # prepare grouping by time periods + time_group = TimeGroupingDetails('', '') + if query.time_periods or query.time_column: + # remove existing day column, if added to fields + # this is to prevent duplicating various time periods in one query + # if BillingColumn.DAY in query.fields: + # columns.remove(BillingColumn.DAY) + time_group = prepare_time_periods(query) + + filter_str, query_parameters = self._prepare_filter_str(query) + + # construct order by + order_by_cols = [] + if query.order_by: + for order_field, reverse in query.order_by.items(): + col_name = str(order_field.value) + col_order = 'DESC' if reverse else 'ASC' + order_by_cols.append(f'{col_name} {col_order}') + + order_by_str = f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' + + group_by = f'GROUP BY day, {grp_selected}' if query.group_by else '' + cost = 'SUM(cost) as cost' if query.group_by else 'cost' + + _query = f""" + CREATE TEMP FUNCTION getLabelValue( + labels ARRAY>, label STRING + ) AS ( + (SELECT value FROM UNNEST(labels) WHERE key = label LIMIT 1) + ); + + WITH t AS ( + SELECT {time_group.field}{fields_selected}, {cost} + FROM `{self.get_table_name()}` + {filter_str} + {group_by} + {order_by_str} + ) + SELECT {time_group.formula}{fields_selected}, cost FROM t + """ + + # append LIMIT and OFFSET if present + if query.limit: + _query += ' LIMIT @limit_val' + query_parameters.append( + bigquery.ScalarQueryParameter('limit_val', 'INT64', query.limit) + ) + if query.offset: + _query += ' OFFSET @offset_val' + query_parameters.append( + bigquery.ScalarQueryParameter('offset_val', 'INT64', query.offset) + ) + + query_job_result = self._execute_query( + _query, query_parameters, results_as_list=False + ) + return self._convert_output(query_job_result) + async def get_running_cost( self, field: BillingColumn, invoice_month: str | None = None, - source: str | None = None, ) -> list[BillingCostBudgetRecord]: """ Get currently running cost of selected field @@ -880,7 +591,7 @@ async def get_running_cost( is_current_month, last_loaded_day, query_job_result, - ) = await self.execute_running_cost_query(field, invoice_month, source) + ) = await self._execute_running_cost_query(field, invoice_month) if not query_job_result: # return empty list return [] @@ -926,7 +637,7 @@ async def get_running_cost( total_daily[cost_group][row.field] += row.daily_cost # add total row: compute + storage - results = await self.append_total_running_cost( + results = await self._append_total_running_cost( field, is_current_month, last_loaded_day, @@ -938,7 +649,7 @@ async def get_running_cost( ) # add rest of the records: compute + storage - results = await self.append_running_cost_records( + results = await self._append_running_cost_records( field, is_current_month, last_loaded_day, @@ -949,67 +660,3 @@ async def get_running_cost( ) return results - - async def get_batches_by_ar_guid( - self, ar_guid: str - ) -> tuple[datetime, datetime, list[str]]: - """ - Get batches for given ar_guid - """ - _query = f""" - SELECT - batch_id, - MIN(min_day) as start_day, - MAX(max_day) as end_day - FROM `{BQ_BATCHES_VIEW}` - WHERE ar_guid = @ar_guid - AND batch_id IS NOT NULL - GROUP BY batch_id - ORDER BY 1; - """ - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter('ar_guid', 'STRING', ar_guid), - ], - labels=BQ_LABELS, - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - start_day = min((row.start_day for row in query_job_result)) - end_day = max((row.end_day for row in query_job_result)) + timedelta(days=1) - return start_day, end_day, [row.batch_id for row in query_job_result] - - # return empty list if no record found - return None, None, [] - - async def get_ar_guid_by_batch_id(self, batch_id: str) -> str: - """ - Get ar_guid for given batch_id - """ - _query = f""" - SELECT ar_guid - FROM `{BQ_BATCHES_VIEW}` - WHERE batch_id = @batch_id - AND ar_guid IS NOT NULL - LIMIT 1; - """ - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter('batch_id', 'STRING', batch_id), - ], - labels=BQ_LABELS, - ) - - query_job_result = list( - self._connection.connection.query(_query, job_config=job_config).result() - ) - if query_job_result: - return query_job_result[0]['ar_guid'] - - # return None if no ar_guid found - return None diff --git a/db/python/tables/billing_daily.py b/db/python/tables/billing_daily.py new file mode 100644 index 000000000..959acf95c --- /dev/null +++ b/db/python/tables/billing_daily.py @@ -0,0 +1,127 @@ +from google.cloud import bigquery + +from api.settings import BQ_AGGREG_VIEW, BQ_DAYS_BACK_OPTIMAL +from db.python.tables.billing_base import BillingBaseTable + + +class BillingDailyTable(BillingBaseTable): + """Billing Aggregated Daily Biq Query table""" + + table_name = BQ_AGGREG_VIEW + + def get_table_name(self): + """Get table name""" + return self.table_name + + async def get_topics(self): + """Get all topics in database""" + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day + _query = f""" + SELECT DISTINCT topic + FROM `{self.table_name}` + WHERE day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + ORDER BY topic ASC; + """ + + query_parameters = [ + bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)['topic']) for row in query_job_result] + + # return empty list if no record found + return [] + + async def get_invoice_months(self): + """Get all invoice months in database""" + + _query = f""" + SELECT DISTINCT FORMAT_DATE("%Y%m", day) as invoice_month + FROM `{self.table_name}` + WHERE EXTRACT(day from day) = 1 + ORDER BY invoice_month DESC; + """ + + query_job_result = self._execute_query(_query) + if query_job_result: + return [str(dict(row)['invoice_month']) for row in query_job_result] + + # return empty list if no record found + return [] + + async def get_cost_categories(self): + """Get all service description in database""" + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day + _query = f""" + SELECT DISTINCT cost_category + FROM `{BQ_AGGREG_VIEW}` + WHERE day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + ORDER BY cost_category ASC; + """ + + query_parameters = [ + bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)['cost_category']) for row in query_job_result] + + # return empty list if no record found + return [] + + async def get_skus( + self, + limit: int | None = None, + offset: int | None = None, + ): + """Get all SKUs in database""" + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day + _query = f""" + SELECT DISTINCT sku + FROM `{self.table_name}` + WHERE day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + ORDER BY sku ASC + """ + + # append LIMIT and OFFSET if present + if limit: + _query += ' LIMIT @limit_val' + if offset: + _query += ' OFFSET @offset_val' + + query_parameters = [ + bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)), + bigquery.ScalarQueryParameter('limit_val', 'INT64', limit), + bigquery.ScalarQueryParameter('offset_val', 'INT64', offset), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)['sku']) for row in query_job_result] + + # return empty list if no record found + return [] diff --git a/db/python/tables/billing_daily_extended.py b/db/python/tables/billing_daily_extended.py new file mode 100644 index 000000000..5e7fe62e8 --- /dev/null +++ b/db/python/tables/billing_daily_extended.py @@ -0,0 +1,50 @@ +from google.cloud import bigquery + +from api.settings import BQ_AGGREG_EXT_VIEW, BQ_DAYS_BACK_OPTIMAL +from db.python.tables.billing_base import BillingBaseTable +from models.models import BillingColumn + + +class BillingDailyExtendedTable(BillingBaseTable): + """Billing Aggregated Daily Extended Biq Query table""" + + table_name = BQ_AGGREG_EXT_VIEW + + def get_table_name(self): + """Get table name""" + return self.table_name + + async def get_extended_values(self, field: str): + """ + Get all extended values in database, for specified field. + Field is one of extended columns. + """ + + if field not in BillingColumn.extended_cols(): + raise ValueError('Invalid field value') + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this day > filter is to limit the amount of data scanned, + # saving cost for running BQ + # aggregated views are partitioned by day + _query = f""" + SELECT DISTINCT {field} + FROM `{self.table_name}` + WHERE {field} IS NOT NULL + AND day > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + ORDER BY 1 ASC; + """ + + query_parameters = [ + bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)[field]) for row in query_job_result] + + # return empty list if no record found + return [] diff --git a/db/python/tables/billing_gcp_daily.py b/db/python/tables/billing_gcp_daily.py new file mode 100644 index 000000000..eac74f470 --- /dev/null +++ b/db/python/tables/billing_gcp_daily.py @@ -0,0 +1,150 @@ +from google.cloud import bigquery + +from api.settings import BQ_DAYS_BACK_OPTIMAL, BQ_GCP_BILLING_VIEW +from db.python.tables.billing_base import BillingBaseTable +from models.models import BillingTotalCostQueryModel + + +class BillingGcpDailyTable(BillingBaseTable): + """Billing GCP Daily Big Query table""" + + table_name = BQ_GCP_BILLING_VIEW + + def get_table_name(self): + """Get table name""" + return self.table_name + + def _filter_to_optimise_query(self) -> str: + """Filter string to optimise BQ query + override base class method as gcp table has different partition field + """ + # add extra filter to limit materialized view partition + # Raw BQ billing table is partitioned by part_time (when data are loaded) + # and not by end of usage time (day) + # There is a delay up to 4-5 days between part_time and day + # 7 days is added to be sure to get all data + return ( + 'part_time >= TIMESTAMP(@start_day)' + 'AND part_time <= TIMESTAMP_ADD(TIMESTAMP(@last_day), INTERVAL 7 DAY)' + ) + + def _last_loaded_day_filter(self) -> str: + """Filter string to optimise BQ query + override base class method as gcp table has different partition field + """ + # add extra filter to limit materialized view partition + # Raw BQ billing table is partitioned by part_time (when data are loaded) + # and not by end of usage time (day) + # There is a delay up to 4-5 days between part_time and day + # 7 days is added to be sure to get all data + return ( + 'day = TIMESTAMP(@last_loaded_day)' + 'AND part_time >= TIMESTAMP(@last_loaded_day)' + 'AND part_time <= TIMESTAMP_ADD(TIMESTAMP(@last_loaded_day),INTERVAL 7 DAY)' + ) + + def _prepare_time_filters(self, query: BillingTotalCostQueryModel): + """Prepare time filters, append to time_filters list""" + time_filters, query_parameters = super()._prepare_time_filters(query) + + # BQ_GCP_BILLING_VIEW view is partitioned by different field + # BQ has limitation, materialized view can only by partition by base table + # partition or its subset, in our case _PARTITIONTIME + # (part_time field in the view) + # We are querying by day, + # which can be up to a week behind regarding _PARTITIONTIME + time_filters.append('part_time >= TIMESTAMP(@start_date)') + time_filters.append( + 'part_time <= TIMESTAMP_ADD(TIMESTAMP(@end_date), INTERVAL 7 DAY)' + ) + return time_filters, query_parameters + + async def _last_loaded_day(self): + """Get the most recent fully loaded day in db + Go 2 days back as the data is not always available for the current day + 1 day back is not enough + """ + + _query = f""" + SELECT TIMESTAMP_ADD(MAX(part_time), INTERVAL -2 DAY) as last_loaded_day + FROM `{self.table_name}` + WHERE part_time > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + """ + + query_parameters = [ + bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return str(query_job_result[0].last_loaded_day) + + return None + + async def _prepare_daily_cost_subquery(self, field, query_params, last_loaded_day): + """prepare daily cost subquery""" + + # add extra filter to limit materialized view partition + # Raw BQ billing table is partitioned by part_time (when data are loaded) + # and not by end of usage time (day) + # There is a delay up to 4-5 days between part_time and day + # 7 days is added to be sure to get all data + gcp_billing_optimise_filter = """ + AND part_time >= TIMESTAMP(@last_loaded_day) + AND part_time <= TIMESTAMP_ADD( + TIMESTAMP(@last_loaded_day), INTERVAL 7 DAY + ) + """ + + daily_cost_field = ', day.cost as daily_cost' + daily_cost_join = f"""LEFT JOIN ( + SELECT + {field.value} as field, + cost_category, + SUM(cost) as cost + FROM + `{self.get_table_name()}` + WHERE day = TIMESTAMP(@last_loaded_day) + {gcp_billing_optimise_filter} + GROUP BY + field, + cost_category + ) day + ON month.field = day.field + AND month.cost_category = day.cost_category + """ + + query_params.append( + bigquery.ScalarQueryParameter('last_loaded_day', 'STRING', last_loaded_day), + ) + return (query_params, daily_cost_field, daily_cost_join) + + async def get_gcp_projects(self): + """Get all GCP projects in database""" + + # cost of this BQ is 10MB on DEV is minimal, AU$ 0.000008 per query + # @days is defined by env variable BQ_DAYS_BACK_OPTIMAL + # this part_time > filter is to limit the amount of data scanned, + # saving cost for running BQ + _query = f""" + SELECT DISTINCT gcp_project + FROM `{self.table_name}` + WHERE part_time > TIMESTAMP_ADD( + CURRENT_TIMESTAMP(), INTERVAL @days DAY + ) + AND gcp_project IS NOT NULL + ORDER BY gcp_project ASC; + """ + + query_parameters = [ + bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)), + ] + query_job_result = self._execute_query(_query, query_parameters) + + if query_job_result: + return [str(dict(row)['gcp_project']) for row in query_job_result] + + # return empty list if no record found + return [] diff --git a/db/python/tables/billing_raw.py b/db/python/tables/billing_raw.py new file mode 100644 index 000000000..24351096d --- /dev/null +++ b/db/python/tables/billing_raw.py @@ -0,0 +1,12 @@ +from api.settings import BQ_AGGREG_RAW +from db.python.tables.billing_base import BillingBaseTable + + +class BillingRawTable(BillingBaseTable): + """Billing Raw (Consolidated) Biq Query table""" + + table_name = BQ_AGGREG_RAW + + def get_table_name(self): + """Get table name""" + return self.table_name diff --git a/models/models/billing.py b/models/models/billing.py index a918eb605..0ea24cde5 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -75,6 +75,23 @@ def can_group_by(cls, value: 'BillingColumn') -> bool: BillingColumn.ADJUSTMENT_INFO, ) + @classmethod + def is_extended_column(cls, value: 'BillingColumn') -> bool: + """Return True if column is extended""" + return value in ( + BillingColumn.DATASET, + BillingColumn.BATCH_ID, + BillingColumn.SEQUENCING_TYPE, + BillingColumn.STAGE, + BillingColumn.SEQUENCING_GROUP, + BillingColumn.COMPUTE_CATEGORY, + BillingColumn.CROMWELL_SUB_WORKFLOW_NAME, + BillingColumn.CROMWELL_WORKFLOW_ID, + BillingColumn.GOOG_PIPELINES_WORKER, + BillingColumn.WDL_TASK_NAME, + BillingColumn.NAMESPACE, + ) + @classmethod def str_to_enum(cls, value: str) -> 'BillingColumn': """Convert string to enum""" From 1a276290356e8c488c359ee47aa9c174e16f159b Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Thu, 4 Jan 2024 16:33:12 +1100 Subject: [PATCH 15/34] Cleaning up unused dependencies. --- db/python/layers/billing.py | 2 +- db/python/tables/billing_base.py | 14 ++++++++++---- db/python/tables/billing_gcp_daily.py | 2 +- requirements.txt | 2 -- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/db/python/layers/billing.py b/db/python/layers/billing.py index 212aacd21..4ee340f06 100644 --- a/db/python/layers/billing.py +++ b/db/python/layers/billing.py @@ -31,7 +31,7 @@ def table_factory( """Get billing table object based on source and fields""" if source == BillingSource.GCP_BILLING: return BillingGcpDailyTable(self.connection) - elif source == BillingSource.RAW: + if source == BillingSource.RAW: return BillingRawTable(self.connection) # check if any of the fields is in the extended columns diff --git a/db/python/tables/billing_base.py b/db/python/tables/billing_base.py index 000b953aa..426351ed1 100644 --- a/db/python/tables/billing_base.py +++ b/db/python/tables/billing_base.py @@ -103,13 +103,19 @@ class BillingBaseTable(BqDbBase): @abstractmethod def get_table_name(self): """Get table name""" - pass + raise NotImplementedError('Calling Abstract method directly') def _execute_query( - self, query: str, params: list[Any] = [], results_as_list: bool = True + self, query: str, params: list[Any] = None, results_as_list: bool = True ) -> list[Any]: """Execute query, add BQ labels""" - job_config = bigquery.QueryJobConfig(query_parameters=params, labels=BQ_LABELS) + if params: + job_config = bigquery.QueryJobConfig( + query_parameters=params, labels=BQ_LABELS + ) + else: + job_config = bigquery.QueryJobConfig(labels=BQ_LABELS) + if results_as_list: return list( self._connection.connection.query(query, job_config=job_config).result() @@ -259,7 +265,7 @@ async def _last_loaded_day(self): return None - async def _prepare_daily_cost_subquery(self, field, query_params, last_loaded_day): + def _prepare_daily_cost_subquery(self, field, query_params, last_loaded_day): """prepare daily cost subquery""" daily_cost_field = ', day.cost as daily_cost' diff --git a/db/python/tables/billing_gcp_daily.py b/db/python/tables/billing_gcp_daily.py index eac74f470..ed7346b13 100644 --- a/db/python/tables/billing_gcp_daily.py +++ b/db/python/tables/billing_gcp_daily.py @@ -83,7 +83,7 @@ async def _last_loaded_day(self): return None - async def _prepare_daily_cost_subquery(self, field, query_params, last_loaded_day): + def _prepare_daily_cost_subquery(self, field, query_params, last_loaded_day): """prepare daily cost subquery""" # add extra filter to limit materialized view partition diff --git a/requirements.txt b/requirements.txt index 131aa6942..1f9640ea2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,5 +22,3 @@ SQLAlchemy==1.4.41 cryptography>=41.0.0 python-dateutil==2.8.2 slack-sdk==3.20.2 -pandas==2.1.4 -db-dtypes==1.2.0 From cf5384ccbdaa54d0047a68080a522f3e52839786 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 5 Jan 2024 11:21:18 +1100 Subject: [PATCH 16/34] FIX: replaced button 'color=red' with 'negative' property. --- web/src/pages/admin/ProjectsAdmin.tsx | 2 +- web/src/pages/billing/BillingCostByAnalysis.tsx | 2 +- web/src/pages/billing/BillingCostByCategory.tsx | 2 +- web/src/pages/billing/BillingCostByTime.tsx | 2 +- web/src/pages/billing/BillingInvoiceMonthCost.tsx | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/web/src/pages/admin/ProjectsAdmin.tsx b/web/src/pages/admin/ProjectsAdmin.tsx index d1f3ae543..5f1d7702a 100644 --- a/web/src/pages/admin/ProjectsAdmin.tsx +++ b/web/src/pages/admin/ProjectsAdmin.tsx @@ -46,7 +46,7 @@ const ProjectsAdmin = () => { {error}
-
diff --git a/web/src/pages/billing/BillingCostByAnalysis.tsx b/web/src/pages/billing/BillingCostByAnalysis.tsx index 4f3655f71..03747fb06 100644 --- a/web/src/pages/billing/BillingCostByAnalysis.tsx +++ b/web/src/pages/billing/BillingCostByAnalysis.tsx @@ -168,7 +168,7 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { setError(undefined)}> {error}
-
diff --git a/web/src/pages/billing/BillingCostByCategory.tsx b/web/src/pages/billing/BillingCostByCategory.tsx index 2082ba1f8..31e60bd4e 100644 --- a/web/src/pages/billing/BillingCostByCategory.tsx +++ b/web/src/pages/billing/BillingCostByCategory.tsx @@ -186,7 +186,7 @@ const BillingCostByCategory: React.FunctionComponent = () => { setError(undefined)}> {error}
-
diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx index 8ddab9ca3..454988a33 100644 --- a/web/src/pages/billing/BillingCostByTime.tsx +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -188,7 +188,7 @@ const BillingCostByTime: React.FunctionComponent = () => { setError(undefined)}> {error}
-
diff --git a/web/src/pages/billing/BillingInvoiceMonthCost.tsx b/web/src/pages/billing/BillingInvoiceMonthCost.tsx index 442d5011a..a01e19e49 100644 --- a/web/src/pages/billing/BillingInvoiceMonthCost.tsx +++ b/web/src/pages/billing/BillingInvoiceMonthCost.tsx @@ -126,7 +126,7 @@ const BillingCurrentCost = () => { {error}
-
From e3d655aabf9291728f9918b574cc2036a14a349f Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 5 Jan 2024 11:29:22 +1100 Subject: [PATCH 17/34] FIX: replace HEX color for pattern with CSS var. --- .../shared/components/Graphs/HorizontalStackedBarChart.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx index d80c3021e..f0f349da6 100644 --- a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx +++ b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx @@ -124,7 +124,7 @@ const HorizontalStackedBarChart: React.FC = ({ .attr('width', 4) .attr('height', 4) .append('path') - .attr('stroke', '#000000') + .attr('stroke', 'var(--color-text-primary') .attr('stroke-width', 1) svg.append('defs') @@ -135,7 +135,7 @@ const HorizontalStackedBarChart: React.FC = ({ .attr('height', 4) .append('path') .attr('d', 'M-1,1 l2,-2 M0,4 l4,-4 M3,5 l2,-2') - .attr('stroke', '#000000') + .attr('stroke', 'var(--color-text-primary') .attr('stroke-width', 1) svg.append('defs') @@ -146,7 +146,7 @@ const HorizontalStackedBarChart: React.FC = ({ .attr('height', 4) .append('path') .attr('d', 'M 2 0 L 2 4') - .attr('stroke', '#000000') + .attr('stroke', 'var(--color-text-primary') .attr('stroke-width', 1) // X scale and Axis From 9fbe19ecbb1b2a9c24dae2856b574fa0fedf1b6f Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 5 Jan 2024 11:30:51 +1100 Subject: [PATCH 18/34] FIX: replace async call with sync for a simple function. --- db/python/tables/billing_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/python/tables/billing_base.py b/db/python/tables/billing_base.py index 426351ed1..384ac2ce4 100644 --- a/db/python/tables/billing_base.py +++ b/db/python/tables/billing_base.py @@ -332,7 +332,7 @@ async def _execute_running_cost_query( query_params, daily_cost_field, daily_cost_join, - ) = await self._prepare_daily_cost_subquery( + ) = self._prepare_daily_cost_subquery( field, query_params, last_loaded_day ) else: From 3a97611628065d4d064e451b6e357dec2262f3ad Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 5 Jan 2024 11:48:54 +1100 Subject: [PATCH 19/34] FIX: dark mode for Horizontal Stacked Bar. --- .../shared/components/Graphs/HorizontalStackedBarChart.tsx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx index f0f349da6..e317311d8 100644 --- a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx +++ b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx @@ -285,6 +285,7 @@ const HorizontalStackedBarChart: React.FC = ({ .attr('x', 0) .attr('y', -margin.top / 1.7) .attr('text-anchor', 'start') + .attr('fill', 'currentColor') .text(title) // set Y axis label @@ -294,6 +295,7 @@ const HorizontalStackedBarChart: React.FC = ({ .attr('x', width / 2) .attr('y', height + margin.bottom) .attr('text-anchor', 'middle') + .attr('fill', 'currentColor') .text('AUD') if (showLegend) { @@ -320,6 +322,7 @@ const HorizontalStackedBarChart: React.FC = ({ .attr('class', 'legend') .attr('x', 20 + i * 150) .attr('y', -(margin.top / 3.8)) + .attr('fill', 'currentColor') .text(labels[i]) } @@ -336,6 +339,7 @@ const HorizontalStackedBarChart: React.FC = ({ .attr('class', 'legend') .attr('x', 20 + labels.length * 150) .attr('y', -(margin.top / 3.8)) + .attr('fill', 'currentColor') .text('Budget') } } From 1a3932c68043a027e41202c592e12ca7fd5f8a15 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 5 Jan 2024 17:10:16 +1100 Subject: [PATCH 20/34] FIX: billing cost by analysis page, esp. search control resizing and functionality. --- .../pages/billing/BillingCostByAnalysis.tsx | 69 +++++++++++-------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/web/src/pages/billing/BillingCostByAnalysis.tsx b/web/src/pages/billing/BillingCostByAnalysis.tsx index 03747fb06..e47790643 100644 --- a/web/src/pages/billing/BillingCostByAnalysis.tsx +++ b/web/src/pages/billing/BillingCostByAnalysis.tsx @@ -1,6 +1,6 @@ import * as React from 'react' import { useLocation, useNavigate, useSearchParams } from 'react-router-dom' -import { Button, Card, Grid, Input, Message, Select } from 'semantic-ui-react' +import { Button, Card, Grid, Input, Message, Select, Dropdown } from 'semantic-ui-react' import SearchIcon from '@mui/icons-material/Search' import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' @@ -37,23 +37,23 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { value: item, })) - const [searchByType, setSearchByType] = React.useState( - SearchType[searchParams.get('searchType')] ?? SearchType.Ar_guid + const [searchByType, setSearchByType] = React.useState( + SearchType[searchParams.get('searchType')] ?? SearchType[0] ) // use navigate and update url params const location = useLocation() const navigate = useNavigate() - const updateNav = (sType: SearchType | undefined, sTxt: string | undefined) => { + const updateNav = (sType: SearchType, sTxt: string | undefined) => { const url = generateUrl(location, { - searchType: sType, + searchType: SearchType[sType], searchTxt: sTxt, }) navigate(url) } - const getData = (sType: SearchType, sTxt: string) => { + const getData = (sType: SearchType | undefined | string, sTxt: string) => { if ((sType === undefined || sTxt === undefined) && sTxt.length < 6) { // Seaarch text is not large enough setIsLoading(false) @@ -62,7 +62,10 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { setIsLoading(true) setError(undefined) - if (sType === SearchType.Ar_guid) { + // convert sType to enum + const convertedType: SearchType = SearchType[sType as keyof typeof SearchType] + + if (convertedType === SearchType.Ar_guid) { new BillingApi() .costByArGuid(sTxt) .then((response) => { @@ -70,7 +73,7 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { setData(response.data) }) .catch((er) => setError(er.message)) - } else if (sType === SearchType.Batch_id) { + } else if (convertedType === SearchType.Batch_id) { new BillingApi() .costByBatchId(sTxt) .then((response) => { @@ -97,14 +100,21 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { } const handleSearchTypeChange = (event: any, dt: any) => { - setSearchByType(SearchType[dt.value]) + setSearchByType(dt.value) } - const handleSearchKeyPress = (event: any) => { + const handleSearchClick = () => { updateNav(searchByType, searchTxt) handleSearch() } + const getDefaultSearchType = () => { + if (searchByType !== undefined) { + return searchByType + } + return dropdownOptions[0].value + } + React.useEffect(() => { handleSearch() }, []) @@ -121,38 +131,39 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { + {/* There is a Dropdown inside the search Input control to select searchType */} + } + labelPosition="right" + placeholder="Search..." onChange={handleSearchChange} value={searchTxt} - action={{ icon: 'search' }} /> - { fitted toggle checked={showAsChart} - slider onChange={() => setShowAsChart(!showAsChart)} /> @@ -341,7 +340,11 @@ const BillingCurrentCost = () => { sort.direction === 'ascending' ? ['asc'] : ['desc'] ).map((p) => ( - + { backgroundColor: 'var(--color-bg)', }} key={`${dk.cost_category} - ${p.field}`} + id={`${dk.cost_category} - ${p.field}`} > {dk.cost_category} diff --git a/web/src/pages/billing/components/HailBatchGrid.tsx b/web/src/pages/billing/components/HailBatchGrid.tsx index b05258819..4596a382f 100644 --- a/web/src/pages/billing/components/HailBatchGrid.tsx +++ b/web/src/pages/billing/components/HailBatchGrid.tsx @@ -31,8 +31,8 @@ const HailBatchGrid: React.FunctionComponent<{ type: 'ar_guid', key: ar_guid, ar_guid, - batch_id: ' TOTAL', - job_id: ' ALL JOBS', + batch_id: undefined, + job_id: undefined, topic, cost, start_time: usageStartDate, @@ -106,7 +106,7 @@ const HailBatchGrid: React.FunctionComponent<{ topic, namespace, batch_name, - job_id: ' ALL JOBS', + job_id: undefined, cost, start_time: usageStartDate, end_time: usageEndDate, @@ -198,7 +198,7 @@ const HailBatchGrid: React.FunctionComponent<{ const aggBatchJobResource: any[] = [] data.forEach((curr) => { - const { batch_id, batch_resource, topic, namespace, cost, job_id } = curr + const { batch_id, batch_resource, topic, namespace, cost, job_id, job_name } = curr const ar_guid = curr['ar-guid'] const idx = aggBatchJobResource.findIndex( (d) => @@ -220,6 +220,7 @@ const HailBatchGrid: React.FunctionComponent<{ topic, namespace, cost, + job_name, }) } else { aggBatchJobResource[idx].cost += cost @@ -249,45 +250,67 @@ const HailBatchGrid: React.FunctionComponent<{ } } + const prepareBatchUrl = (url: string, txt: string) => ( + + {txt} + + ) + + const prepareBgColor = (log: any) => { + if (log.batch_id === undefined) { + return 'var(--color-border-color)' + } + if (log.job_id === undefined) { + return 'var(--color-border-default)' + } + return 'var(--color-bg)' + } + const MAIN_FIELDS: Field[] = [ - { - category: 'ar_guid', - title: 'AR GUID', - }, - { - category: 'url', - title: 'HAIL BATCH', - dataMap: (data: any, value: string) => ( - - {data.batch_id} - - ), - }, { category: 'job_id', - title: 'JOB ID', + title: 'ID', + dataMap: (dataItem: any, value: string) => { + if (dataItem.batch_id === undefined) { + return `AR GUID: ${dataItem.ar_guid}` + } + if (dataItem.job_id === undefined) { + return prepareBatchUrl(dataItem.url, `BATCH ID: ${dataItem.batch_id}`) + } + return prepareBatchUrl(dataItem.url, `JOB: ${value}`) + }, }, { category: 'start_time', title: 'TIME STARTED', - dataMap: (data: any, value: string) => { + dataMap: (dataItem: any, value: string) => { const dateValue = new Date(value) - return {isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()} + return ( + + {Number.isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()} + + ) }, }, { category: 'end_time', title: 'TIME COMPLETED', - dataMap: (data: any, value: string) => { + dataMap: (dataItem: any, value: string) => { const dateValue = new Date(value) - return {isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()} + return ( + + {Number.isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()} + + ) }, }, { category: 'duration', title: 'DURATION', - dataMap: (data: any, value: string) => { - const duration = new Date(data.end_time.getTime() - data.start_time.getTime()) + dataMap: (dataItem: any, _value: string) => { + const duration = new Date( + dataItem.end_time.getTime() - dataItem.start_time.getTime() + ) const seconds = Math.floor((duration / 1000) % 60) const minutes = Math.floor((duration / (1000 * 60)) % 60) const hours = Math.floor((duration / (1000 * 60 * 60)) % 24) @@ -298,10 +321,10 @@ const HailBatchGrid: React.FunctionComponent<{ { category: 'cost', title: 'COST', - dataMap: (data: any, value: string) => ( + dataMap: (dataItem: any, _value: string) => ( ${data.cost.toFixed(4)}} + content={dataItem.cost} + trigger={${dataItem.cost.toFixed(4)}} position="top center" /> ), @@ -321,15 +344,15 @@ const HailBatchGrid: React.FunctionComponent<{ category: 'batch_name', title: 'NAME/SCRIPT', }, + { + category: 'job_name', + title: 'NAME', + }, ] const expandedRow = (log: any, idx: any) => - MAIN_FIELDS.map(({ category, title, width, dataMap, className }) => ( - + MAIN_FIELDS.map(({ category, dataMap, className }) => ( + {dataMap ? dataMap(log, log[category]) : sanitiseValue(log[category])} )) @@ -346,6 +369,7 @@ const HailBatchGrid: React.FunctionComponent<{ borderBottom: 'none', position: 'sticky', resize: 'horizontal', + textAlign: 'center', }} > {title} @@ -390,11 +414,17 @@ const HailBatchGrid: React.FunctionComponent<{ }) .map((log, idx) => ( - + handleToggle(log.key)} /> @@ -423,7 +453,7 @@ const HailBatchGrid: React.FunctionComponent<{ {title} - {v} + {v} ) })} @@ -435,7 +465,7 @@ const HailBatchGrid: React.FunctionComponent<{ key={`${log.key}-lbl`} > - + COST BREAKDOWN @@ -452,7 +482,7 @@ const HailBatchGrid: React.FunctionComponent<{ key={`${log.key}-${dk.batch_resource}`} > - + {dk.batch_resource} ${dk.cost.toFixed(4)} diff --git a/web/src/shared/components/Graphs/StackedBarChart.tsx b/web/src/shared/components/Graphs/StackedBarChart.tsx index 901e26fa9..d9793d056 100644 --- a/web/src/shared/components/Graphs/StackedBarChart.tsx +++ b/web/src/shared/components/Graphs/StackedBarChart.tsx @@ -40,10 +40,12 @@ export const StackedBarChart: React.FC = ({ data, accumul } const contDiv = containerDivRef.current - if (contDiv) { - // reset svg - contDiv.innerHTML = '' - } + console.log('contDiv', contDiv) + // if (contDiv) { + // // reset svg + // // contDiv.innerHTML = '' + // return
+ // } const series = Object.keys(data[0].values) const seriesLength = series.length From 8fe57e9046b32a9363fdd5683746f89f91f51d95 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Tue, 9 Jan 2024 14:55:41 +1100 Subject: [PATCH 23/34] FIX: BillingCostPageAnalysis, keeping the old record until loading of data finishes. --- .../pages/billing/BillingCostByAnalysis.tsx | 102 ++++++++++++------ 1 file changed, 68 insertions(+), 34 deletions(-) diff --git a/web/src/pages/billing/BillingCostByAnalysis.tsx b/web/src/pages/billing/BillingCostByAnalysis.tsx index e0438d3cf..87e1ef153 100644 --- a/web/src/pages/billing/BillingCostByAnalysis.tsx +++ b/web/src/pages/billing/BillingCostByAnalysis.tsx @@ -25,11 +25,9 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { searchParams.get('start') ?? getMonthStartDate() ) - const [data, setData] = React.useState([]) + const [data, setData] = React.useState(undefined) - const [searchTxt, setSearchTxt] = React.useState( - searchParams.get('searchTxt') ?? undefined - ) + const [searchTxt, setSearchTxt] = React.useState(searchParams.get('searchTxt') ?? '') const searchOptions: string[] = Object.keys(SearchType).filter((item) => isNaN(Number(item))) const dropdownOptions = searchOptions.map((item) => ({ @@ -119,6 +117,39 @@ const BillingCostByAnalysis: React.FunctionComponent = () => { handleSearch() }, []) + const errorComponent = () => { + if (error) { + return ( + setError(undefined)}> + {error} +
+ +
+ ) + } + + // if no error return null + return null + } + + const loadingComponent = () => { + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + + // otherwise return null + return null + } + const searchCard = () => (

{ ) - const gridCard = (data: BillingTotalCostRecord[]) => ( + const gridCard = (gridData: BillingTotalCostRecord[]) => ( - + ) - if (error) { - return ( - setError(undefined)}> - {error} -
- -
- ) - } + const dataComponent = () => { + if (data !== undefined && data.costs.length > 0) { + // only render grid if there are available cost data + return gridCard(data.costs) + } - if (isLoading) { - return ( -
- {searchCard()} - + // if valid search text and no data return return No data message + if ( + data !== undefined && + searchByType !== undefined && + searchTxt !== undefined && + searchTxt.length > 5 + ) { + return (

- This query takes a while... + No data found.

-
- ) - } + ) + } - if (data.length === 0) { - return ( -
- {searchCard()} + // otherwise prompt user to search, if not loading already + if (!isLoading) { + return (

- No data found. + + Enter a search term above, select type and press search button to get + started. +

-
- ) + ) + } + + // otherwise do not render anything + return null } return ( <> {searchCard()} - {gridCard(data.costs)} + {errorComponent()} + {loadingComponent()} + {dataComponent()} ) } From 7292f4e4f700b0421754b83a688a10a7cd66742e Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Thu, 11 Jan 2024 15:34:01 +1100 Subject: [PATCH 24/34] FIX: Billing StackedChart various issues. --- .../components/Graphs/StackedBarChart.tsx | 575 ++++++++++-------- 1 file changed, 329 insertions(+), 246 deletions(-) diff --git a/web/src/shared/components/Graphs/StackedBarChart.tsx b/web/src/shared/components/Graphs/StackedBarChart.tsx index d9793d056..b1c669474 100644 --- a/web/src/shared/components/Graphs/StackedBarChart.tsx +++ b/web/src/shared/components/Graphs/StackedBarChart.tsx @@ -1,5 +1,4 @@ import * as d3 from 'd3' -import _ from 'lodash' import React from 'react' export interface IStackedBarChartData { @@ -12,278 +11,362 @@ interface IStackedBarChartProps { accumulate: boolean } -export const StackedBarChart: React.FC = ({ data, accumulate }) => { - const colorFunc: (t: number) => string | undefined = d3.interpolateRainbow - const margin = { top: 50, right: 50, bottom: 100, left: 100 } - const height = 800 - margin.top - margin.bottom - const marginLegend = 10 +function getSeries(data: IStackedBarChartData[] | undefined) { + if (!data || data.length === 0) { + return []; + } - const containerDivRef = React.useRef() - const [width, setWidth] = React.useState(768) + return Object.keys(data[0].values) +} - React.useEffect(() => { - function updateWindowWidth() { - setWidth(containerDivRef.current?.clientWidth || 768) - } - if (containerDivRef.current) { - updateWindowWidth() - } - window.addEventListener('resize', updateWindowWidth) +function alignToStartOfMonth(date: Date): Date { + const year = date.getFullYear(); + const month = date.getMonth(); + return new Date(Date.UTC(year, month, 1)) +} - return () => { - window.removeEventListener('resize', updateWindowWidth) +function createNewDates(lastDate: Date, differenceInDays: number): Date[] { + const newDates: Date[] = []; + for (let i = 1; i <= 3; i++) { + const newDate = new Date(lastDate.getTime() + i * differenceInDays * 24 * 60 * 60 * 1000); + if(differenceInDays > 28) { + const alignedDate = alignToStartOfMonth(newDate); + newDates.push(alignedDate) } - }, []) + else{ + newDates.push(newDate) + } + } + return newDates; +} - if (!data || data.length === 0) { - return No Data +function getNewDates(data: IStackedBarChartData[]) { + // need at least 2 days to extrapolate + if (!data || data.length < 2) { + return []; } - const contDiv = containerDivRef.current - console.log('contDiv', contDiv) - // if (contDiv) { - // // reset svg - // // contDiv.innerHTML = '' - // return
- // } + // Get the last date in the data array + const lastDate = data[data.length - 1].date; + const prevDate = data[data.length - 2].date; - const series = Object.keys(data[0].values) - const seriesLength = series.length + const timeDifference = Math.abs(lastDate.getTime() - prevDate.getTime()); + const differenceInDays = Math.ceil(timeDifference / (1000 * 3600 * 24)); - // Get the last date in the data array - const lastDate = data[data.length - 1].date + // for monthly add 3 extra days so we get the next month + return createNewDates(lastDate, differenceInDays > 28 ? differenceInDays + 3: differenceInDays) +} - // Create 3 new dates - // TODO make it as custom props - const newDates = d3 - .range(1, 4) - .map((day) => new Date(lastDate.getTime() + day * 24 * 60 * 60 * 1000)) +function prepareData(series: string[], data: IStackedBarChartData[], accumulate: boolean, newDates: Date[]) { + if (!data || data.length === 0) { + return []; + } + + const predictedRatio = newDates.length / data.length + const firstDateData = data[0] + const lastDateData = data[data.length - 1] // Interpolate the values for the new dates - const newValues = newDates.map((date, i) => { - if (i < data.length) { - const prevData = data[data.length - 1 - i] - const nextData = data[data.length - 1 - i] - return { - date, - values: series.reduce((values, key) => { - // TODO revisit how we extrapolate new data - const interpolator = d3.interpolate(prevData.values[key], nextData.values[key]) - values[key] = interpolator((i + 1) / 6) - return values - }, {}), - } - } - }) + const newValues = newDates.map((date: Date, i: number) => { + return { + date, + values: series.reduce((acc: Record, key: string) => { + const values = { ...acc }; + const interpolator = d3.interpolate(firstDateData.values[key], lastDateData.values[key]); + const predX = 1 + (i + 1) * predictedRatio; + const predictedValue = interpolator(predX); + values[key] = predictedValue < 0 ? lastDateData.values[key] : predictedValue; + return values; + }, {}), + }; + }); // Add the new values to the data array let extData = data.concat(newValues) extData = extData.filter((item) => item !== undefined) - // X - values - const x_vals = extData.map((d) => d.date.toISOString().substring(0, 10)) - - // prepare stacked data - let stackedData - if (accumulate) { - const accumulatedData = extData.reduce((acc: any[], curr) => { - const last = acc[acc.length - 1] - const accumulated = { - date: curr.date, - values: Object.keys(curr.values).reduce( - (accValues: Record, key) => { - return { - ...accValues, - [key]: (last ? last.values[key] : 0) + curr.values[key], - } - }, - {} - ), - } - return [...acc, accumulated] - }, []) - - stackedData = d3 - .stack() - .offset(d3.stackOffsetNone) - .keys(series)(accumulatedData.map((d) => ({ date: d.date, ...d.values }))) - .map((ser, i) => ser.map((d) => ({ ...d, key: series[i] }))) - } else { - stackedData = d3 - .stack() - .offset(d3.stackOffsetNone) - .keys(series)(extData.map((d) => ({ date: d.date, ...d.values }))) - .map((ser, i) => ser.map((d) => ({ ...d, key: series[i] }))) - } + return extData; +} - // find max values for the X axes - const y1Max = d3.max(stackedData, (y) => d3.max(y, (d) => d[1])) +export const StackedBarChart: React.FC = ({ data, accumulate }) => { - // tooltip events - const tooltip = d3.select('body').append('div').attr('id', 'chart').attr('class', 'tooltip') + const svgRef = React.useRef(null); + const legendRef = React.useRef(null); - const mouseover = (d) => { - tooltip.style('opacity', 0.8) - d3.select(this).style('opacity', 0.5) - } - const mousemove = (event, d) => { - const formater = d3.format(',.2f') - tooltip - .html(d.key + ' ' + formater(d[1] - d[0]) + ' AUD') - .style('top', event.pageY - 10 + 'px') - .style('left', event.pageX + 10 + 'px') - } - const mouseleave = (d) => { - tooltip.style('opacity', 0) - d3.select(this).style('opacity', 1) - } + const containerDivRef = React.useRef() + const tooltipDivRef = React.useRef() + + const colorFunc: (t: number) => string | undefined = d3.interpolateRainbow + const margin = { top: 0, right: 10, bottom: 200, left: 100 } + const height = 800 - margin.top - margin.bottom + const marginLegend = 10 + const minWidth = 1900 + + const [width, setWidth] = React.useState(minWidth) + const series = getSeries(data) + const seriesCount = series.length - const x = d3 - .scaleBand() - .domain(d3.range(x_vals.length)) - .rangeRound([margin.left, width - margin.right]) - .padding(0.08) - - // create root svg element - const svg = d3 - .select(contDiv) - .append('svg') - .attr('viewBox', [0, 0, width, height]) - .attr('height', height) - .attr('style', 'max-width: 100%; height: auto;') - - // calculate opacity (for new dates) - const opacity = 0.3 - const calcOpacity = (d) => { - const idx = series.indexOf(d.key) - const color = d3.color(colorFunc(idx / seriesLength)) - if (newDates.includes(d.data.date)) { - return d3.rgb(color.r, color.g, color.b, opacity) + React.useEffect(() => { + if (!data || data.length === 0) { + return; } - return color - } + // Prepare all data structures and predicted data + const newDates = getNewDates(data) + const combinedData = prepareData(series, data, accumulate, newDates) + + // X - values + const x_vals = combinedData.map((d) => d.date.toISOString().substring(0, 10)) + + // prepare stacked data + let stackedData + if (accumulate) { + const accumulatedData = combinedData.reduce((acc: any[], curr) => { + const last = acc[acc.length - 1] + const accumulated = { + date: curr.date, + values: Object.keys(curr.values).reduce( + (accValues: Record, key) => { + return { + ...accValues, + [key]: (last ? last.values[key] : 0) + curr.values[key], + } + }, + {} + ), + } + return [...acc, accumulated] + }, []) + + stackedData = d3 + .stack() + .offset(d3.stackOffsetNone) + .keys(series)(accumulatedData.map((d) => ({ date: d.date, ...d.values }))) + .map((ser, i) => ser.map((d) => ({ ...d, key: series[i] }))) + } else { + stackedData = d3 + .stack() + .offset(d3.stackOffsetNone) + .keys(series)(combinedData.map((d) => ({ date: d.date, ...d.values }))) + .map((ser, i) => ser.map((d) => ({ ...d, key: series[i] }))) + } + + // find max values for the X axes + const y1Max = d3.max(stackedData, (y) => d3.max(y, (d) => d[1])) + + // tooltip events + const tooltip = d3.select(tooltipDivRef.current) + + const mouseover = (d) => { + tooltip.style('opacity', 0.8) + d3.select(this).style('opacity', 0.5) + } + const mousemove = (event, d) => { + const formater = d3.format(',.2f') + tooltip + .html(d.key + ' ' + formater(d[1] - d[0]) + ' AUD') + .style('top', event.layerY - 30 + 'px') + .style('left', event.layerX - 30 + 'px') + } + const mouseleave = (d) => { + tooltip.style('opacity', 0) + d3.select(this).style('opacity', 1) + } + + const x = d3 + .scaleBand() + .domain(d3.range(x_vals.length)) + .rangeRound([margin.left, minWidth - margin.right]) + .padding(0.08) + + // calculate opacity (for new dates) + const opacity = 0.3 + const calcOpacity = (d) => { + const idx = series.indexOf(d.key) + const color = d3.color(colorFunc(idx / seriesCount)) + if (newDates.includes(d.data.date)) { + return d3.rgb(color.r, color.g, color.b, opacity) + } + + return color + } + + // get SVG reference + const svg = d3.select(svgRef.current); + + // remove prevously rendered data + svg.selectAll('g').remove(); + svg.selectAll('rect').remove(); + + // generate bars + const g = svg + .selectAll('g') + .data(stackedData) + .enter() + .append('g') + .attr('fill', (d, i) => colorFunc(i / seriesCount)) + .attr('id', (d, i) => `path${i}`); + + const rect = g + .selectAll('rect') + .data((d) => d) + .enter() + .append('rect') + .attr('x', (d, i) => x(i)) + .attr('y', height - margin.bottom) + .attr('width', x.bandwidth()) + .attr('height', 0) + .attr('fill', (d) => calcOpacity(d)) + .on('mouseover', mouseover) + .on('mousemove', mousemove) + .on('mouseleave', mouseleave) + + // x-axis & labels + const formatX = (val: number): string => x_vals[val] + + let x_labels: d3.Selection = svg.select('.x-axis'); + + if (x_labels.empty()) { + x_labels = svg + .append('g') + .attr('class', 'x-axis') + .attr('transform', `translate(0,${height - margin.bottom})`) + .call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)); + } else { + x_labels.call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)); + } - // bars - const rect = svg - .selectAll('g') - .data(stackedData) - .join('g') - .attr('fill', (d, i) => colorFunc(i / seriesLength)) - .attr('id', (d, i) => `path${i}`) - .selectAll('rect') - .data((d) => d) - .join('rect') - .attr('x', (d, i) => x(i)) - .attr('y', height - margin.bottom) - .attr('width', x.bandwidth()) - .attr('height', 0) - .attr('fill', (d) => calcOpacity(d)) - .on('mouseover', mouseover) - .on('mousemove', mousemove) - .on('mouseleave', mouseleave) - - // x-axis & labels - const formatX = (val: number): string => x_vals[val] - - const x_labels = svg - .append('g') - .attr('transform', `translate(0,${height - margin.bottom})`) - .call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)) - - if (x_vals.length > 10) { // rotate x labels, if too many - x_labels - .selectAll('text') - .attr('transform', 'rotate(-90)') - .attr('text-anchor', 'end') - .attr('dy', '-0.55em') - .attr('dx', '-1em') - } + if (x_vals.length > 10) { + x_labels + .selectAll('text') + .attr('transform', 'rotate(-90)') + .attr('text-anchor', 'end') + .attr('dy', '-0.55em') + .attr('dx', '-1em'); + } + else{ + x_labels + .selectAll('text') + .attr('transform', 'rotate(0)') + .attr('text-anchor', 'middle') + .attr('dy', '0.55em') + .attr('dx', '0em'); + } - // y-axis & labels - const y = d3 - .scaleLinear() - .domain([0, y1Max]) - .range([height - margin.bottom, margin.top]) - - const y_axis = d3.axisLeft().scale(y).ticks(10, '$.0f') - svg.append('g').attr('transform', `translate(${margin.left}, 0)`).call(y_axis) - - // animate bars - rect.transition() - .duration(200) - .delay((d, i) => i * 5) - .attr('y', (d) => y(d[1])) - .attr('height', (d) => y(d[0]) - y(d[1])) - .transition() - .attr('x', (d, i) => x(i)) - .attr('width', x.bandwidth()) - - // on Hover - const onHoverOver = (tg: HTMLElement, v) => { - d3.selectAll(`#path${v}`).style('fill-opacity', 0.5) - d3.select(tg).selectAll('circle').style('fill-opacity', 0.5) - d3.select(tg).selectAll('text').attr('font-weight', 'bold') - } + // y-axis & labels + const y = d3 + .scaleLinear() + .domain([0, y1Max]) + .range([height - margin.bottom, margin.top]); + + let y_labels: d3.Selection = svg.select('.y-axis'); + + if (y_labels.empty()) { + y_labels = svg + .append('g') + .attr('class', 'y-axis') + .attr('transform', `translate(${margin.left},0)`) + .call(d3.axisLeft(y)); + } else { + y_labels.call(d3.axisLeft(y)); + } - const onHoverOut = (tg: HTMLElement, v) => { - d3.selectAll(`#path${v}`).style('fill-opacity', 1) - d3.select(tg).selectAll('circle').style('fill-opacity', 1) - d3.select(tg).selectAll('text').attr('font-weight', 'normal') - } + // animate bars + rect.transition() + .duration(200) + .delay((d, i) => i * 5) + .attr('y', (d) => y(d[1]) || 0) + .attr('height', (d) => y(d[0]) - y(d[1])) + .transition() + .attr('x', (d, i) => x(i) || 0) + .attr('width', x.bandwidth()) + + // on Hover + const onHoverOver = (tg: HTMLElement, v) => { + d3.selectAll(`#path${v}`).style('fill-opacity', 0.5) + d3.select(tg).selectAll('circle').style('fill-opacity', 0.5) + d3.select(tg).selectAll('text').attr('font-weight', 'bold') + } - // add legend - const svgLegend = d3 - .select(contDiv) - .append('svg') - .attr('height', height) - .attr('viewBox', `0 0 450 ${height}`) - - svgLegend - .selectAll('g.legend') - .attr('transform', `translate(0, ${margin.top})`) - .data(series) - .enter() - .append('g') - .attr('id', (d, i) => `legend${i}`) - .attr('transform', (d, i) => `translate(${marginLegend},${marginLegend + i * 20})`) - .each(function (d, i) { - d3.select(this) - .append('circle') - .attr('r', 8) - .attr('fill', (d) => colorFunc(i / seriesLength)) - d3.select(this) - .append('text') - .attr('text-anchor', 'start') - .attr('x', 10) - .attr('y', 0) - .attr('dy', '0.5em') - .text(d) - .attr('font-size', '0.8em') - d3.select(this) - .on('mouseover', (event, v) => { - const element = d3.select(`#legend${i}`) - onHoverOver(element.node(), i) - }) - .on('mouseout', (event, v) => { - const element = d3.select(`#legend${i}`) - onHoverOut(element.node(), i) - }) - }) - - // set all text to 15px - svg.selectAll('text').style('font-size', '20px') - - // Simple responsive, move legend to bottom if mobile - if (width < 1000) { - // if mobile / tablet size - svgLegend.attr('width', '100%') - svg.attr('width', '100%') - } else { - svgLegend.attr('width', '30%') - svg.attr('width', '70%') - } + const onHoverOut = (tg: HTMLElement, v) => { + d3.selectAll(`#path${v}`).style('fill-opacity', 1) + d3.select(tg).selectAll('circle').style('fill-opacity', 1) + d3.select(tg).selectAll('text').attr('font-weight', 'normal') + } + + const svgLegend = d3.select(legendRef.current); + + svgLegend + .selectAll('g.legend') + .data(series) + .join('g') + .attr('class', 'legend') + .attr('transform', `translate(0, ${margin.top})`) + .attr('id', (d, i) => `legend${i}`) + .attr('transform', (d, i) => `translate(${marginLegend},${marginLegend + i * 20})`) + .each(function (d, i) { + d3.select(this) + .selectAll('circle') // Replace append with selectAll + .data([d]) // Use data to bind a single data element + .join('circle') // Use join to handle enter/update/exit selections + .attr('r', 8) + .attr('fill', (d) => colorFunc(i / seriesCount)) + d3.select(this) + .selectAll('text') // Replace append with selectAll + .data([d]) // Use data to bind a single data element + .join('text') // Use join to handle enter/update/exit selections + .attr('text-anchor', 'start') + .attr('x', 10) + .attr('y', 0) + .attr('dy', '0.5em') + .text(d) + .attr('font-size', '0.8em') + d3.select(this) + .on('mouseover', (event, v) => { + const element = d3.select(`#legend${i}`) + onHoverOver(element.node(), i) + }) + .on('mouseout', (event, v) => { + const element = d3.select(`#legend${i}`) + onHoverOut(element.node(), i) + }) + }) + + // set all text to 2.5em + svg.selectAll('text') + .style('font-size', '2.5em') + + function updateWindowWidth() { + setWidth(containerDivRef.current?.clientWidth || 768) + } + if (containerDivRef.current) { + updateWindowWidth() + } + window.addEventListener('resize', updateWindowWidth) - return
+ }, [data, accumulate]) + + return ( + <>
+ + + + +
+
+ + ); } From f2cb0ab6158ae71612d49f77e278f799cc924171 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Thu, 11 Jan 2024 15:35:28 +1100 Subject: [PATCH 25/34] Linting --- .../components/Graphs/StackedBarChart.tsx | 158 +++++++++--------- 1 file changed, 83 insertions(+), 75 deletions(-) diff --git a/web/src/shared/components/Graphs/StackedBarChart.tsx b/web/src/shared/components/Graphs/StackedBarChart.tsx index b1c669474..754d6dae5 100644 --- a/web/src/shared/components/Graphs/StackedBarChart.tsx +++ b/web/src/shared/components/Graphs/StackedBarChart.tsx @@ -13,53 +13,57 @@ interface IStackedBarChartProps { function getSeries(data: IStackedBarChartData[] | undefined) { if (!data || data.length === 0) { - return []; + return [] } return Object.keys(data[0].values) } function alignToStartOfMonth(date: Date): Date { - const year = date.getFullYear(); - const month = date.getMonth(); + const year = date.getFullYear() + const month = date.getMonth() return new Date(Date.UTC(year, month, 1)) } function createNewDates(lastDate: Date, differenceInDays: number): Date[] { - const newDates: Date[] = []; + const newDates: Date[] = [] for (let i = 1; i <= 3; i++) { - const newDate = new Date(lastDate.getTime() + i * differenceInDays * 24 * 60 * 60 * 1000); - if(differenceInDays > 28) { - const alignedDate = alignToStartOfMonth(newDate); + const newDate = new Date(lastDate.getTime() + i * differenceInDays * 24 * 60 * 60 * 1000) + if (differenceInDays > 28) { + const alignedDate = alignToStartOfMonth(newDate) newDates.push(alignedDate) - } - else{ + } else { newDates.push(newDate) } } - return newDates; + return newDates } function getNewDates(data: IStackedBarChartData[]) { // need at least 2 days to extrapolate if (!data || data.length < 2) { - return []; + return [] } // Get the last date in the data array - const lastDate = data[data.length - 1].date; - const prevDate = data[data.length - 2].date; + const lastDate = data[data.length - 1].date + const prevDate = data[data.length - 2].date - const timeDifference = Math.abs(lastDate.getTime() - prevDate.getTime()); - const differenceInDays = Math.ceil(timeDifference / (1000 * 3600 * 24)); + const timeDifference = Math.abs(lastDate.getTime() - prevDate.getTime()) + const differenceInDays = Math.ceil(timeDifference / (1000 * 3600 * 24)) // for monthly add 3 extra days so we get the next month - return createNewDates(lastDate, differenceInDays > 28 ? differenceInDays + 3: differenceInDays) + return createNewDates(lastDate, differenceInDays > 28 ? differenceInDays + 3 : differenceInDays) } -function prepareData(series: string[], data: IStackedBarChartData[], accumulate: boolean, newDates: Date[]) { +function prepareData( + series: string[], + data: IStackedBarChartData[], + accumulate: boolean, + newDates: Date[] +) { if (!data || data.length === 0) { - return []; + return [] } const predictedRatio = newDates.length / data.length @@ -71,44 +75,46 @@ function prepareData(series: string[], data: IStackedBarChartData[], accumulate: return { date, values: series.reduce((acc: Record, key: string) => { - const values = { ...acc }; - const interpolator = d3.interpolate(firstDateData.values[key], lastDateData.values[key]); - const predX = 1 + (i + 1) * predictedRatio; - const predictedValue = interpolator(predX); - values[key] = predictedValue < 0 ? lastDateData.values[key] : predictedValue; - return values; + const values = { ...acc } + const interpolator = d3.interpolate( + firstDateData.values[key], + lastDateData.values[key] + ) + const predX = 1 + (i + 1) * predictedRatio + const predictedValue = interpolator(predX) + values[key] = predictedValue < 0 ? lastDateData.values[key] : predictedValue + return values }, {}), - }; - }); + } + }) // Add the new values to the data array let extData = data.concat(newValues) extData = extData.filter((item) => item !== undefined) - return extData; + return extData } export const StackedBarChart: React.FC = ({ data, accumulate }) => { - - const svgRef = React.useRef(null); - const legendRef = React.useRef(null); + const svgRef = React.useRef(null) + const legendRef = React.useRef(null) const containerDivRef = React.useRef() const tooltipDivRef = React.useRef() - + const colorFunc: (t: number) => string | undefined = d3.interpolateRainbow const margin = { top: 0, right: 10, bottom: 200, left: 100 } const height = 800 - margin.top - margin.bottom const marginLegend = 10 const minWidth = 1900 - + const [width, setWidth] = React.useState(minWidth) const series = getSeries(data) const seriesCount = series.length React.useEffect(() => { if (!data || data.length === 0) { - return; + return } // Prepare all data structures and predicted data @@ -117,7 +123,7 @@ export const StackedBarChart: React.FC = ({ data, accumul // X - values const x_vals = combinedData.map((d) => d.date.toISOString().substring(0, 10)) - + // prepare stacked data let stackedData if (accumulate) { @@ -192,11 +198,11 @@ export const StackedBarChart: React.FC = ({ data, accumul } // get SVG reference - const svg = d3.select(svgRef.current); + const svg = d3.select(svgRef.current) // remove prevously rendered data - svg.selectAll('g').remove(); - svg.selectAll('rect').remove(); + svg.selectAll('g').remove() + svg.selectAll('rect').remove() // generate bars const g = svg @@ -205,7 +211,7 @@ export const StackedBarChart: React.FC = ({ data, accumul .enter() .append('g') .attr('fill', (d, i) => colorFunc(i / seriesCount)) - .attr('id', (d, i) => `path${i}`); + .attr('id', (d, i) => `path${i}`) const rect = g .selectAll('rect') @@ -224,16 +230,17 @@ export const StackedBarChart: React.FC = ({ data, accumul // x-axis & labels const formatX = (val: number): string => x_vals[val] - let x_labels: d3.Selection = svg.select('.x-axis'); + let x_labels: d3.Selection = + svg.select('.x-axis') if (x_labels.empty()) { x_labels = svg .append('g') .attr('class', 'x-axis') .attr('transform', `translate(0,${height - margin.bottom})`) - .call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)); + .call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)) } else { - x_labels.call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)); + x_labels.call(d3.axisBottom(x).tickSizeOuter(0).tickFormat(formatX)) } // rotate x labels, if too many @@ -243,33 +250,33 @@ export const StackedBarChart: React.FC = ({ data, accumul .attr('transform', 'rotate(-90)') .attr('text-anchor', 'end') .attr('dy', '-0.55em') - .attr('dx', '-1em'); - } - else{ + .attr('dx', '-1em') + } else { x_labels .selectAll('text') .attr('transform', 'rotate(0)') .attr('text-anchor', 'middle') .attr('dy', '0.55em') - .attr('dx', '0em'); + .attr('dx', '0em') } // y-axis & labels const y = d3 .scaleLinear() .domain([0, y1Max]) - .range([height - margin.bottom, margin.top]); + .range([height - margin.bottom, margin.top]) - let y_labels: d3.Selection = svg.select('.y-axis'); + let y_labels: d3.Selection = + svg.select('.y-axis') if (y_labels.empty()) { y_labels = svg .append('g') .attr('class', 'y-axis') .attr('transform', `translate(${margin.left},0)`) - .call(d3.axisLeft(y)); + .call(d3.axisLeft(y)) } else { - y_labels.call(d3.axisLeft(y)); + y_labels.call(d3.axisLeft(y)) } // animate bars @@ -295,7 +302,7 @@ export const StackedBarChart: React.FC = ({ data, accumul d3.select(tg).selectAll('text').attr('font-weight', 'normal') } - const svgLegend = d3.select(legendRef.current); + const svgLegend = d3.select(legendRef.current) svgLegend .selectAll('g.legend') @@ -334,8 +341,7 @@ export const StackedBarChart: React.FC = ({ data, accumul }) // set all text to 2.5em - svg.selectAll('text') - .style('font-size', '2.5em') + svg.selectAll('text').style('font-size', '2.5em') function updateWindowWidth() { setWidth(containerDivRef.current?.clientWidth || 768) @@ -344,29 +350,31 @@ export const StackedBarChart: React.FC = ({ data, accumul updateWindowWidth() } window.addEventListener('resize', updateWindowWidth) - }, [data, accumulate]) - return ( - <>
- - - - -
-
- - ); + return ( + <> +
+ + +
+
+ + ) } From f6465e2ade62700f28479be1403dac19101e4aeb Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 12 Jan 2024 14:24:56 +1100 Subject: [PATCH 26/34] FIX: missing filters checks, updating charts when loading. --- db/python/layers/billing.py | 18 +- db/python/tables/bq/billing_base.py | 7 + models/models/billing.py | 3 + .../pages/billing/BillingCostByCategory.tsx | 2 + web/src/pages/billing/BillingCostByTime.tsx | 193 +++++++++++++----- .../components/BillingCostByTimeTable.tsx | 12 +- .../billing/components/CostByTimeChart.tsx | 3 - web/src/shared/components/Graphs/BarChart.tsx | 21 +- .../shared/components/Graphs/DonutChart.tsx | 22 +- .../components/Graphs/StackedBarChart.tsx | 4 + 10 files changed, 193 insertions(+), 92 deletions(-) diff --git a/db/python/layers/billing.py b/db/python/layers/billing.py index ecebae92d..89f71e155 100644 --- a/db/python/layers/billing.py +++ b/db/python/layers/billing.py @@ -21,7 +21,10 @@ class BillingLayer(BqBaseLayer): """Billing layer""" def table_factory( - self, source: BillingSource, fields: list[BillingColumn] | None = None + self, + source: BillingSource, + fields: list[BillingColumn] | None = None, + filters: dict[BillingColumn, str | list | dict] | None = None, ) -> ( BillingDailyTable | BillingDailyExtendedTable @@ -45,6 +48,17 @@ def table_factory( # there is a field from extended daily table return BillingDailyExtendedTable(self.connection) + # check if any of the filters is in the extended columns + if filters: + used_extended_cols = [ + f + for f in filters + if f in BillingColumn.extended_cols() and BillingColumn.can_group_by(f) + ] + if used_extended_cols: + # there is a field from extended daily table + return BillingDailyExtendedTable(self.connection) + # by default look at the daily table return BillingDailyTable(self.connection) @@ -174,7 +188,7 @@ async def get_total_cost( """ Get Total cost of selected fields for requested time interval """ - billing_table = self.table_factory(query.source, query.fields) + billing_table = self.table_factory(query.source, query.fields, query.filters) return await billing_table.get_total_cost(query) async def get_running_cost( diff --git a/db/python/tables/bq/billing_base.py b/db/python/tables/bq/billing_base.py index 9210504de..8bb1158b4 100644 --- a/db/python/tables/bq/billing_base.py +++ b/db/python/tables/bq/billing_base.py @@ -549,6 +549,13 @@ async def get_total_cost( SELECT {time_group.formula}{fields_selected}, cost FROM t """ + # append min cost condition + if query.min_cost: + _query += ' WHERE cost > @min_cost' + query_parameters.append( + bigquery.ScalarQueryParameter('min_cost', 'FLOAT64', query.min_cost) + ) + # append LIMIT and OFFSET if present if query.limit: _query += ' LIMIT @limit_val' diff --git a/models/models/billing.py b/models/models/billing.py index 0ea24cde5..830d4554d 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -194,6 +194,9 @@ class BillingTotalCostQueryModel(SMBase): time_column: BillingTimeColumn | None = None time_periods: BillingTimePeriods | None = None + # optional, show the min cost, e.g. 0.01, if not set, will show all + min_cost: float | None = None + def __hash__(self): """Create hash for this object to use in caching""" return hash(self.json()) diff --git a/web/src/pages/billing/BillingCostByCategory.tsx b/web/src/pages/billing/BillingCostByCategory.tsx index 31e60bd4e..41244d37f 100644 --- a/web/src/pages/billing/BillingCostByCategory.tsx +++ b/web/src/pages/billing/BillingCostByCategory.tsx @@ -177,6 +177,8 @@ const BillingCostByCategory: React.FunctionComponent = () => { filters: selFilters, order_by: { day: false }, time_periods: selectedPeriod, + // show only records with cost > 0.01 + min_cost: 0.01, }) } }, [groupBy, selectedGroup, selectedCostCategory, selectedPeriod, start, end]) diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx index 454988a33..3b6d99092 100644 --- a/web/src/pages/billing/BillingCostByTime.tsx +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -17,6 +17,7 @@ import { IStackedAreaByDateChartData } from '../../shared/components/Graphs/Stac import BillingCostByTimeTable from './components/BillingCostByTimeTable' import { BarChart, IData } from '../../shared/components/Graphs/BarChart' import { DonutChart } from '../../shared/components/Graphs/DonutChart' +import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' import generateUrl from '../../shared/utilities/generateUrl' const BillingCostByTime: React.FunctionComponent = () => { @@ -43,6 +44,7 @@ const BillingCostByTime: React.FunctionComponent = () => { // Data loading const [isLoading, setIsLoading] = React.useState(true) const [error, setError] = React.useState() + const [message, setMessage] = React.useState() const [groups, setGroups] = React.useState([]) const [data, setData] = React.useState([]) const [aggregatedData, setAggregatedData] = React.useState([]) @@ -90,6 +92,7 @@ const BillingCostByTime: React.FunctionComponent = () => { const getData = (query: BillingTotalCostQueryModel) => { setIsLoading(true) setError(undefined) + setMessage(undefined) new BillingApi() .getTotalCost(query) .then((response) => { @@ -156,8 +159,125 @@ const BillingCostByTime: React.FunctionComponent = () => { .catch((er) => setError(er.message)) } + const messageComponent = () => { + if (message) { + return ( + setError(undefined)}> + {message} + + ) + } + if (error) { + return ( + setError(undefined)}> + {error} +
+ +
+ ) + } + if (isLoading) { + return ( +
+ +

+ This query takes a while... +

+
+ ) + } + return null + } + + const dataComponent = () => { + if (message || error || isLoading) { + return null + } + + if (!message && !error && !isLoading && (!data || data.length === 0)) { + return ( + + No Data + + ) + } + + return ( + <> + + + + + + + + + + + + + + + + + + + + + + ) + } + React.useEffect(() => { - if (selectedData !== undefined && selectedData !== '' && selectedData !== null) { + if ( + selectedData !== undefined && + selectedData !== '' && + selectedData !== null && + start !== undefined && + start !== '' && + start !== null && + end !== undefined && + end !== '' && + end !== null && + groupBy !== undefined && + groupBy !== null + ) { + // valid selection, retrieve data let source = BillingSource.Aggregate if (groupBy === BillingColumn.GcpProject) { source = BillingSource.GcpBilling @@ -180,21 +300,22 @@ const BillingCostByTime: React.FunctionComponent = () => { source: source, }) } + } else { + // invalid selection, + setIsLoading(false) + setError(undefined) + + if (start !== undefined || start !== null || start !== '') { + setMessage('Please select Start date') + } else if (end !== undefined || end !== null || end !== '') { + setMessage('Please select End date') + } else { + // generic message + setMessage('Please make selection') + } } }, [start, end, groupBy, selectedData]) - if (error) { - return ( - setError(undefined)}> - {error} -
- -
- ) - } - return ( <> @@ -250,51 +371,11 @@ const BillingCostByTime: React.FunctionComponent = () => { /> + - - - - - - - - - + {messageComponent()} - - - - - - - - - + {dataComponent()} ) } diff --git a/web/src/pages/billing/components/BillingCostByTimeTable.tsx b/web/src/pages/billing/components/BillingCostByTimeTable.tsx index 992ccaeca..14ccd267d 100644 --- a/web/src/pages/billing/components/BillingCostByTimeTable.tsx +++ b/web/src/pages/billing/components/BillingCostByTimeTable.tsx @@ -120,9 +120,6 @@ const BillingCostByTimeTable: React.FC = ({ return (
-

- This query takes a while... -

) } @@ -161,11 +158,13 @@ const BillingCostByTimeTable: React.FC = ({ ).map((p) => ( - + {p.date.toLocaleDateString()} {headerFields().map((k) => ( - {currencyFormat(p.values[k.category])} + + {currencyFormat(p.values[k.category])} + ))} @@ -187,7 +186,6 @@ const BillingCostByTimeTable: React.FC = ({ fitted toggle checked={expandCompute} - slider onChange={() => setExpandCompute(!expandCompute)} /> @@ -227,7 +225,7 @@ const BillingCostByTimeTable: React.FC = ({ All Time Total {headerFields().map((k) => ( - + {currencyFormat( internalData.reduce( diff --git a/web/src/pages/billing/components/CostByTimeChart.tsx b/web/src/pages/billing/components/CostByTimeChart.tsx index 4eeff8e90..a68a28699 100644 --- a/web/src/pages/billing/components/CostByTimeChart.tsx +++ b/web/src/pages/billing/components/CostByTimeChart.tsx @@ -26,9 +26,6 @@ const CostByTimeChart: React.FunctionComponent = ({ return (
-

- This query takes a while... -

) } diff --git a/web/src/shared/components/Graphs/BarChart.tsx b/web/src/shared/components/Graphs/BarChart.tsx index c859029ea..ad66af92b 100644 --- a/web/src/shared/components/Graphs/BarChart.tsx +++ b/web/src/shared/components/Graphs/BarChart.tsx @@ -16,8 +16,16 @@ interface BarChartProps { } export const BarChart: React.FC = ({ data, maxSlices, colors, isLoading }) => { + if (isLoading) { + return ( +
+ +
+ ) + } + if (!data || data.length === 0) { - return
No data available
+ return <>No Data } const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow @@ -55,17 +63,6 @@ export const BarChart: React.FC = ({ data, maxSlices, colors, isL // reset svg contDiv.innerHTML = '' - if (isLoading) { - return ( -
- -

- This query takes a while... -

-
- ) - } - // construct svg const svg = select(contDiv) .append('svg') diff --git a/web/src/shared/components/Graphs/DonutChart.tsx b/web/src/shared/components/Graphs/DonutChart.tsx index 4b5418d87..9f179098c 100644 --- a/web/src/shared/components/Graphs/DonutChart.tsx +++ b/web/src/shared/components/Graphs/DonutChart.tsx @@ -30,9 +30,18 @@ function calcTranslate(data: IDonutChartPreparadData, move = 4) { } export const DonutChart: React.FC = ({ data, maxSlices, colors, isLoading }) => { + // if (isLoading) { + // return ( + //
+ // + //
+ // ) + // } + if (!data || data.length === 0) { - return
No data available
+ return <>No Data } + const colorFunc: (t: number) => string | undefined = colors ?? interpolateRainbow const duration = 250 const containerDivRef = React.useRef() @@ -105,17 +114,6 @@ export const DonutChart: React.FC = ({ data, maxSlices, colors // reset svg contDiv.innerHTML = '' - if (isLoading) { - return ( -
- -

- This query takes a while... -

-
- ) - } - // construct svg const svg = select(contDiv) .append('svg') diff --git a/web/src/shared/components/Graphs/StackedBarChart.tsx b/web/src/shared/components/Graphs/StackedBarChart.tsx index 754d6dae5..f91955e18 100644 --- a/web/src/shared/components/Graphs/StackedBarChart.tsx +++ b/web/src/shared/components/Graphs/StackedBarChart.tsx @@ -352,6 +352,10 @@ export const StackedBarChart: React.FC = ({ data, accumul window.addEventListener('resize', updateWindowWidth) }, [data, accumulate]) + if (!data || data.length === 0) { + return <>No Data + } + return ( <>
From 011a54803a28f1bef7e0cde54a28d786acfb65cd Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 12 Jan 2024 15:24:52 +1100 Subject: [PATCH 27/34] FIX: silenece linting no attribute msg for Middleware. --- api/server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/server.py b/api/server.py index 574b341f5..1e7a482dd 100644 --- a/api/server.py +++ b/api/server.py @@ -140,11 +140,11 @@ async def exception_handler(request: Request, e: Exception): cors_middleware = middlewares[0] request_origin = request.headers.get('origin', '') - if cors_middleware and '*' in cors_middleware.options['allow_origins']: + if cors_middleware and '*' in cors_middleware.options['allow_origins']: # type: ignore[attr-defined] response.headers['Access-Control-Allow-Origin'] = '*' elif ( cors_middleware - and request_origin in cors_middleware.options['allow_origins'] + and request_origin in cors_middleware.options['allow_origins'] # type: ignore[attr-defined] ): response.headers['Access-Control-Allow-Origin'] = request_origin From 14f29ffa9b6d4021ac507c0a29fba735a3a28736 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Thu, 18 Jan 2024 16:47:49 +1100 Subject: [PATCH 28/34] Refactoring filters, implemented first Billing GraphQL integration. --- api/graphql/schema.py | 108 ++++++- api/routes/billing.py | 2 +- db/python/layers/billing.py | 4 +- db/python/tables/bq/billing_ar_batch.py | 2 +- db/python/tables/bq/billing_base.py | 281 +++++++++--------- db/python/tables/bq/billing_filter.py | 49 +++ db/python/tables/bq/billing_gcp_daily.py | 64 ++-- db/python/tables/bq/billing_raw.py | 24 ++ db/python/tables/bq/function_bq_filter.py | 109 +++++++ db/python/tables/bq/generic_bq_filter.py | 101 +++++++ .../tables/bq/generic_bq_filter_model.py | 109 +++++++ models/models/__init__.py | 4 + models/models/billing.py | 48 ++- web/src/pages/billing/BillingCostByTime.tsx | 10 +- .../Graphs/HorizontalStackedBarChart.tsx | 2 +- 15 files changed, 728 insertions(+), 189 deletions(-) create mode 100644 db/python/tables/bq/billing_filter.py create mode 100644 db/python/tables/bq/function_bq_filter.py create mode 100644 db/python/tables/bq/generic_bq_filter.py create mode 100644 db/python/tables/bq/generic_bq_filter_model.py diff --git a/api/graphql/schema.py b/api/graphql/schema.py index d0ac1619f..1b7213642 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -18,8 +18,10 @@ from api.graphql.filters import GraphQLFilter, GraphQLMetaFilter from api.graphql.loaders import LoaderKeys, get_context from db.python import enum_tables +from db.python.gcp_connect import BqConnection from db.python.layers import AnalysisLayer, SampleLayer, SequencingGroupLayer from db.python.layers.assay import AssayLayer +from db.python.layers.billing import BillingLayer from db.python.layers.family import FamilyLayer from db.python.tables.analysis import AnalysisFilter from db.python.tables.assay import AssayFilter @@ -32,6 +34,9 @@ AnalysisInternal, AssayInternal, AuditLogInternal, + BillingColumn, + BillingInternal, + BillingTotalCostQueryModel, FamilyInternal, ParticipantInternal, Project, @@ -497,7 +502,6 @@ class GraphQLSequencingGroup: @staticmethod def from_internal(internal: SequencingGroupInternal) -> 'GraphQLSequencingGroup': - # print(internal) return GraphQLSequencingGroup( id=sequencing_group_id_format(internal.id), type=internal.type, @@ -593,6 +597,33 @@ async def sample(self, info: Info, root: 'GraphQLAssay') -> GraphQLSample: return GraphQLSample.from_internal(sample) +@strawberry.type +class GraphQLBilling: + """GraphQL Billing""" + + id: str | None + ar_guid: str | None + gcp_project: str | None + topic: str | None + batch_id: str | None + cost_category: str | None + day: datetime.date | None + cost: float | None + + @staticmethod + def from_internal(internal: BillingInternal) -> 'GraphQLBilling': + return GraphQLBilling( + id=internal.id, + ar_guid=internal.ar_guid, + gcp_project=internal.gcp_project, + topic=internal.topic, + batch_id=internal.batch_id, + cost_category=internal.cost_category, + day=internal.day, + cost=internal.cost, + ) + + @strawberry.type class Query: """GraphQL Queries""" @@ -731,6 +762,81 @@ async def my_projects(self, info: Info) -> list[GraphQLProject]: ) return [GraphQLProject.from_internal(p) for p in projects] + @strawberry.field + async def billing( + self, + info: Info, + batch_id: str | None = None, + ar_guid: str | None = None, + topic: str | None = None, + gcp_project: str | None = None, + day: GraphQLFilter[datetime.datetime] | None = None, + cost: GraphQLFilter[float] | None = None, + ) -> list[GraphQLBilling]: + """ + This is the first raw implementation of Billing inside GraphQL + """ + # TODO check billing is enabled e.g.: + # if not is_billing_enabled(): + # raise ValueError('Billing is not enabled') + + # TODO is there a better way to get the BQ connection? + connection = info.context['connection'] + bg_connection = BqConnection(connection.author) + slayer = BillingLayer(bg_connection) + + if ar_guid: + res = await slayer.get_cost_by_ar_guid(ar_guid) + if res: + # only show the costs + res = res.costs + + elif batch_id: + res = await slayer.get_cost_by_batch_id(batch_id) + if res: + # only show the costs + res = res.costs + + else: + # TODO construct fields from request.body (selected attributes) + # For time being, just use these fields + fields = [ + BillingColumn.DAY, + BillingColumn.COST, + BillingColumn.COST_CATEGORY, + ] + + filters = {} + if topic: + filters['topic'] = topic + fields.append(BillingColumn.TOPIC) + if gcp_project: + filters['gcp_project'] = gcp_project + fields.append(BillingColumn.GCP_PROJECT) + + if day: + all_days_vals = day.all_values() + start_date = min(all_days_vals).strftime('%Y-%m-%d') + end_date = max(all_days_vals).strftime('%Y-%m-%d') + else: + # TODO we need to limit to small time periods to avoid huge charges + # If day is not selected use only current day records + start_date = datetime.datetime.now().strftime('%Y-%m-%d') + end_date = start_date + + query = BillingTotalCostQueryModel( + fields=fields, + start_date=start_date, + end_date=end_date, + filters=filters, + ) + res = await slayer.get_total_cost(query) + + return [ + GraphQLBilling.from_internal(BillingInternal.from_db(**dict(p))) + for p in res + ] + schema = strawberry.Schema( query=Query, mutation=None, extensions=[QueryDepthLimiter(max_depth=10)] diff --git a/api/routes/billing.py b/api/routes/billing.py index f903717d6..be1fb46ff 100644 --- a/api/routes/billing.py +++ b/api/routes/billing.py @@ -7,7 +7,7 @@ from api.settings import BILLING_CACHE_RESPONSE_TTL, BQ_AGGREG_VIEW from api.utils.db import BqConnection, get_author from db.python.layers.billing import BillingLayer -from models.models.billing import ( +from models.models import ( BillingColumn, BillingCostBudgetRecord, BillingHailBatchCostRecord, diff --git a/db/python/layers/billing.py b/db/python/layers/billing.py index 89f71e155..8f991c728 100644 --- a/db/python/layers/billing.py +++ b/db/python/layers/billing.py @@ -7,13 +7,11 @@ from models.models import ( BillingColumn, BillingCostBudgetRecord, - BillingTotalCostQueryModel, -) -from models.models.billing import ( BillingHailBatchCostRecord, BillingSource, BillingTimeColumn, BillingTimePeriods, + BillingTotalCostQueryModel, ) diff --git a/db/python/tables/bq/billing_ar_batch.py b/db/python/tables/bq/billing_ar_batch.py index be2d3093e..d9326b6b3 100644 --- a/db/python/tables/bq/billing_ar_batch.py +++ b/db/python/tables/bq/billing_ar_batch.py @@ -30,7 +30,7 @@ async def get_batches_by_ar_guid( WHERE ar_guid = @ar_guid AND batch_id IS NOT NULL GROUP BY batch_id - ORDER BY 1; + ORDER BY batch_id; """ query_parameters = [ diff --git a/db/python/tables/bq/billing_base.py b/db/python/tables/bq/billing_base.py index 8bb1158b4..12e56f02b 100644 --- a/db/python/tables/bq/billing_base.py +++ b/db/python/tables/bq/billing_base.py @@ -9,6 +9,9 @@ from api.settings import BQ_BUDGET_VIEW, BQ_DAYS_BACK_OPTIMAL from api.utils.dates import get_invoice_month_range, reformat_datetime from db.python.gcp_connect import BqDbBase +from db.python.tables.bq.billing_filter import BillingFilter +from db.python.tables.bq.function_bq_filter import FunctionBQFilter +from db.python.tables.bq.generic_bq_filter import GenericBQFilter from models.models import ( BillingColumn, BillingCostBudgetRecord, @@ -22,7 +25,9 @@ # Day Time details used in grouping and parsing formulas -TimeGroupingDetails = namedtuple('TimeGroupingDetails', ['field', 'formula']) +TimeGroupingDetails = namedtuple( + 'TimeGroupingDetails', ['field', 'formula', 'separator'] +) def abbrev_cost_category(cost_category: str) -> str: @@ -35,64 +40,37 @@ def prepare_time_periods( ) -> TimeGroupingDetails: """Prepare Time periods grouping and parsing formulas""" time_column = query.time_column or 'day' - result = TimeGroupingDetails('', '') + result = TimeGroupingDetails('', '', '') # Based on specified time period, add the corresponding column if query.time_periods == BillingTimePeriods.DAY: result = TimeGroupingDetails( - field=f'FORMAT_DATE("%Y-%m-%d", {time_column}) as day, ', - formula='PARSE_DATE("%Y-%m-%d", day) as day, ', + field=f'FORMAT_DATE("%Y-%m-%d", {time_column}) as day', + formula='PARSE_DATE("%Y-%m-%d", day) as day', + separator=',', ) elif query.time_periods == BillingTimePeriods.WEEK: result = TimeGroupingDetails( - field=f'FORMAT_DATE("%Y%W", {time_column}) as day, ', - formula='PARSE_DATE("%Y%W", day) as day, ', + field=f'FORMAT_DATE("%Y%W", {time_column}) as day', + formula='PARSE_DATE("%Y%W", day) as day', + separator=',', ) elif query.time_periods == BillingTimePeriods.MONTH: result = TimeGroupingDetails( - field=f'FORMAT_DATE("%Y%m", {time_column}) as day, ', - formula='PARSE_DATE("%Y%m", day) as day, ', + field=f'FORMAT_DATE("%Y%m", {time_column}) as day', + formula='PARSE_DATE("%Y%m", day) as day', + separator=',', ) elif query.time_periods == BillingTimePeriods.INVOICE_MONTH: result = TimeGroupingDetails( - field='invoice_month as day, ', formula='PARSE_DATE("%Y%m", day) as day, ' + field='invoice_month as day', + formula='PARSE_DATE("%Y%m", day) as day', + separator=',', ) return result -def construct_filter( - name: str, value: Any, is_label: bool = False -) -> tuple[str, bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter]: - """Based on Filter value, construct filter string and query parameter - - Args: - name (str): Filter name - value (Any): Filter value - is_label (bool, optional): Is filter a label?. Defaults to False. - - Returns: - tuple[str, bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter] - """ - compare = '=' - b1, b2 = '', '' - param_type = bigquery.ScalarQueryParameter - key = name.replace('-', '_') - - if isinstance(value, list): - compare = 'IN' - b1, b2 = 'UNNEST(', ')' - param_type = bigquery.ArrayQueryParameter - - if is_label: - name = f'getLabelValue(labels, "{name}")' - - return ( - f'{name} {compare} {b1}@{key}{b2}', - param_type(key, 'STRING', value), - ) - - class BillingBaseTable(BqDbBase): """Billing Base Table This is abstract class, it should not be instantiated @@ -124,6 +102,26 @@ def _execute_query( # otherwise return as BQ iterator return self._connection.connection.query(query, job_config=job_config) + def _query_to_partitioned_filter( + self, query: BillingTotalCostQueryModel + ) -> BillingFilter: + """ + By default views are partitioned by 'day', + if different then overwrite in the subclass + """ + billing_filter = query.to_filter() + + # initial partition filter + billing_filter.day = GenericBQFilter[datetime]( + gte=datetime.strptime(query.start_date, '%Y-%m-%d') + if query.start_date + else None, + lte=datetime.strptime(query.end_date, '%Y-%m-%d') + if query.end_date + else None, + ) + return billing_filter + def _filter_to_optimise_query(self) -> str: """Filter string to optimise BQ query""" return 'day >= TIMESTAMP(@start_day) AND day <= TIMESTAMP(@last_day)' @@ -132,67 +130,6 @@ def _last_loaded_day_filter(self) -> str: """Last Loaded day filter string""" return 'day = TIMESTAMP(@last_loaded_day)' - def _prepare_time_filters(self, query: BillingTotalCostQueryModel): - """Prepare time filters""" - time_column = query.time_column or 'day' - time_filters = [] - query_parameters = [] - - if query.start_date: - time_filters.append(f'{time_column} >= TIMESTAMP(@start_date)') - query_parameters.extend( - [ - bigquery.ScalarQueryParameter( - 'start_date', 'STRING', query.start_date - ), - ] - ) - if query.end_date: - time_filters.append(f'{time_column} <= TIMESTAMP(@end_date)') - query_parameters.extend( - [ - bigquery.ScalarQueryParameter('end_date', 'STRING', query.end_date), - ] - ) - - return time_filters, query_parameters - - def _prepare_filter_str(self, query: BillingTotalCostQueryModel): - """Prepare filter string""" - and_filters, query_parameters = self._prepare_time_filters(query) - - # No additional filters - filters = [] - if not query.filters: - filter_str = 'WHERE ' + ' AND '.join(and_filters) if and_filters else '' - return filter_str, query_parameters - - # Add each of the filters in the query - for filter_key, filter_value in query.filters.items(): - col_name = str(filter_key.value) - - if not isinstance(filter_value, dict): - filter_, query_param = construct_filter(col_name, filter_value) - filters.append(filter_) - query_parameters.append(query_param) - else: - for label_key, label_value in filter_value.items(): - filter_, query_param = construct_filter( - label_key, label_value, True - ) - filters.append(filter_) - query_parameters.append(query_param) - - if query.filters_op == 'OR': - if filters: - and_filters.append('(' + ' OR '.join(filters) + ')') - else: - # if not specified, default to AND - and_filters.extend(filters) - - filter_str = 'WHERE ' + ' AND '.join(and_filters) if and_filters else '' - return filter_str, query_parameters - def _convert_output(self, query_job_result): """Convert query result to json""" if not query_job_result or query_job_result.result().total_rows == 0: @@ -483,70 +420,138 @@ async def _append_running_cost_records( return results - async def get_total_cost( - self, - query: BillingTotalCostQueryModel, - ) -> list[dict] | None: - """ - Get Total cost of selected fields for requested time interval from BQ view - """ - if not query.start_date or not query.end_date or not query.fields: - raise ValueError('Date and Fields are required') + def _prepare_order_by_string( + self, order_by: dict[BillingColumn, bool] | None + ) -> str: + """Prepare order by string""" + if not order_by: + return '' + + order_by_cols = [] + for order_field, reverse in order_by.items(): + col_name = str(order_field.value) + col_order = 'DESC' if reverse else 'ASC' + order_by_cols.append(f'{col_name} {col_order}') + + return f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' + + def _prepare_aggregation( + self, query: BillingTotalCostQueryModel + ) -> tuple[str, str]: + """Prepare both fields for aggregation and group by string""" + # Get columns to group by + + # if group by is populated, then we need to group by day as well + grp_columns = ['day'] if query.group_by else [] - # Get columns to group by and check view to use - grp_columns = [] for field in query.fields: col_name = str(field.value) if not BillingColumn.can_group_by(field): # if the field cannot be grouped by, skip it continue + # append to potential columns to group by grp_columns.append(col_name) - grp_selected = ','.join(grp_columns) fields_selected = ','.join( (field.value for field in query.fields if field != BillingColumn.COST) ) + grp_selected = ','.join(grp_columns) + group_by = f'GROUP BY {grp_selected}' if query.group_by else '' + + return fields_selected, group_by + + def _prepare_labels_function(self, query: BillingTotalCostQueryModel): + if not query.filters: + return None + + if BillingColumn.LABELS in query.filters and isinstance( + query.filters[BillingColumn.LABELS], dict + ): + # prepare labels as function filters, parameterized both sides + func_filter = FunctionBQFilter( + name='getLabelValue', + implementation=""" + CREATE TEMP FUNCTION getLabelValue( + labels ARRAY>, label STRING + ) AS ( + (SELECT value FROM UNNEST(labels) WHERE key = label LIMIT 1) + ); + """, + ) + func_filter.to_sql( + BillingColumn.LABELS, + query.filters[BillingColumn.LABELS], + query.filters_op, + ) + return func_filter + + # otherwise + return None + + async def get_total_cost( + self, + query: BillingTotalCostQueryModel, + ) -> list[dict] | None: + """ + Get Total cost of selected fields for requested time interval from BQ views + """ + if not query.start_date or not query.end_date or not query.fields: + raise ValueError('Date and Fields are required') + + # Get columns to select and to group by + fields_selected, group_by = self._prepare_aggregation(query) + + # construct order by + order_by_str = self._prepare_order_by_string(query.order_by) + # prepare grouping by time periods - time_group = TimeGroupingDetails('', '') + time_group = TimeGroupingDetails('', '', '') if query.time_periods or query.time_column: - # remove existing day column, if added to fields - # this is to prevent duplicating various time periods in one query - # if BillingColumn.DAY in query.fields: - # columns.remove(BillingColumn.DAY) time_group = prepare_time_periods(query) - filter_str, query_parameters = self._prepare_filter_str(query) + # overrides time specific fields with relevant time column name + query_filter = self._query_to_partitioned_filter(query) - # construct order by - order_by_cols = [] - if query.order_by: - for order_field, reverse in query.order_by.items(): - col_name = str(order_field.value) - col_order = 'DESC' if reverse else 'ASC' - order_by_cols.append(f'{col_name} {col_order}') + # prepare where string and SQL parameters + where_str, sql_parameters = query_filter.to_sql() + + # extract only BQ Query parameter, keys are not used in BQ SQL + # have to declare empty list first as linting is not happy + query_parameters: list[ + bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter + ] = [] + query_parameters.extend(sql_parameters.values()) + + # prepare labels as function filters if present + func_filter = self._prepare_labels_function(query) + if func_filter: + # extend where_str and query_parameters + query_parameters.extend(func_filter.func_sql_parameters) + + # now join Prepared Where with Labels Function Where + where_str = ' AND '.join([where_str, func_filter.func_where]) - order_by_str = f'ORDER BY {",".join(order_by_cols)}' if order_by_cols else '' + # if group by is populated, then we need SUM the cost, otherwise raw cost + cost_column = 'SUM(cost) as cost' if query.group_by else 'cost' - group_by = f'GROUP BY day, {grp_selected}' if query.group_by else '' - cost = 'SUM(cost) as cost' if query.group_by else 'cost' + if where_str: + # Where is not empty, prepend with WHERE + where_str = f'WHERE {where_str}' _query = f""" - CREATE TEMP FUNCTION getLabelValue( - labels ARRAY>, label STRING - ) AS ( - (SELECT value FROM UNNEST(labels) WHERE key = label LIMIT 1) - ); + {func_filter.fun_implementation if func_filter else ''} WITH t AS ( - SELECT {time_group.field}{fields_selected}, {cost} + SELECT {time_group.field}{time_group.separator} {fields_selected}, + {cost_column} FROM `{self.get_table_name()}` - {filter_str} + {where_str} {group_by} {order_by_str} ) - SELECT {time_group.formula}{fields_selected}, cost FROM t + SELECT {time_group.formula}{time_group.separator} {fields_selected}, cost FROM t """ # append min cost condition diff --git a/db/python/tables/bq/billing_filter.py b/db/python/tables/bq/billing_filter.py new file mode 100644 index 000000000..1c333c767 --- /dev/null +++ b/db/python/tables/bq/billing_filter.py @@ -0,0 +1,49 @@ +import dataclasses +import datetime + +from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from db.python.tables.bq.generic_bq_filter_model import GenericBQFilterModel + + +@dataclasses.dataclass +class BillingFilter(GenericBQFilterModel): + """ + Filter for billing, contains all possible attributes to filter on + """ + + # partition specific filters: + + # most billing views are parttioned by day + day: GenericBQFilter[datetime.datetime] = None + + # gpc table has different partition field: part_time + part_time: GenericBQFilter[datetime.datetime] = None + + # aggregate has different partition field: usage_end_time + usage_end_time: GenericBQFilter[datetime.datetime] = None + + # common filters: + invoice_month: GenericBQFilter[str] = None + + # min cost e.g. 0.01, if not set, will show all + cost: GenericBQFilter[float] = None + + ar_guid: GenericBQFilter[str] = None + gcp_project: GenericBQFilter[str] = None + topic: GenericBQFilter[str] = None + batch_id: GenericBQFilter[str] = None + cost_category: GenericBQFilter[str] = None + sku: GenericBQFilter[str] = None + dataset: GenericBQFilter[str] = None + sequencing_type: GenericBQFilter[str] = None + stage: GenericBQFilter[str] = None + sequencing_group: GenericBQFilter[str] = None + compute_category: GenericBQFilter[str] = None + cromwell_sub_workflow_name: GenericBQFilter[str] = None + cromwell_workflow_id: GenericBQFilter[str] = None + goog_pipelines_worker: GenericBQFilter[str] = None + wdl_task_name: GenericBQFilter[str] = None + namespace: GenericBQFilter[str] = None + + def __hash__(self): + return super().__hash__() diff --git a/db/python/tables/bq/billing_gcp_daily.py b/db/python/tables/bq/billing_gcp_daily.py index 1924b01e6..1588ab65f 100644 --- a/db/python/tables/bq/billing_gcp_daily.py +++ b/db/python/tables/bq/billing_gcp_daily.py @@ -1,7 +1,11 @@ +from datetime import datetime, timedelta + from google.cloud import bigquery from api.settings import BQ_DAYS_BACK_OPTIMAL, BQ_GCP_BILLING_VIEW from db.python.tables.bq.billing_base import BillingBaseTable +from db.python.tables.bq.billing_filter import BillingFilter +from db.python.tables.bq.generic_bq_filter import GenericBQFilter from models.models import BillingTotalCostQueryModel @@ -14,50 +18,28 @@ def get_table_name(self): """Get table name""" return self.table_name - def _filter_to_optimise_query(self) -> str: - """Filter string to optimise BQ query - override base class method as gcp table has different partition field + def _query_to_partitioned_filter( + self, query: BillingTotalCostQueryModel + ) -> BillingFilter: """ - # add extra filter to limit materialized view partition - # Raw BQ billing table is partitioned by part_time (when data are loaded) - # and not by end of usage time (day) - # There is a delay up to 4-5 days between part_time and day - # 7 days is added to be sure to get all data - return ( - 'part_time >= TIMESTAMP(@start_day)' - 'AND part_time <= TIMESTAMP_ADD(TIMESTAMP(@last_day), INTERVAL 7 DAY)' - ) - - def _last_loaded_day_filter(self) -> str: - """Filter string to optimise BQ query - override base class method as gcp table has different partition field + add extra filter to limit materialized view partition + Raw BQ billing table is partitioned by part_time (when data are loaded) + and not by end of usage time (day) + There is a delay up to 4-5 days between part_time and day + 7 days is added to be sure to get all data """ - # add extra filter to limit materialized view partition - # Raw BQ billing table is partitioned by part_time (when data are loaded) - # and not by end of usage time (day) - # There is a delay up to 4-5 days between part_time and day - # 7 days is added to be sure to get all data - return ( - 'day = TIMESTAMP(@last_loaded_day)' - 'AND part_time >= TIMESTAMP(@last_loaded_day)' - 'AND part_time <= TIMESTAMP_ADD(TIMESTAMP(@last_loaded_day),INTERVAL 7 DAY)' - ) - - def _prepare_time_filters(self, query: BillingTotalCostQueryModel): - """Prepare time filters, append to time_filters list""" - time_filters, query_parameters = super()._prepare_time_filters(query) - - # BQ_GCP_BILLING_VIEW view is partitioned by different field - # BQ has limitation, materialized view can only by partition by base table - # partition or its subset, in our case _PARTITIONTIME - # (part_time field in the view) - # We are querying by day, - # which can be up to a week behind regarding _PARTITIONTIME - time_filters.append('part_time >= TIMESTAMP(@start_date)') - time_filters.append( - 'part_time <= TIMESTAMP_ADD(TIMESTAMP(@end_date), INTERVAL 7 DAY)' + billing_filter = query.to_filter() + + # initial partition filter + billing_filter.part_time = GenericBQFilter[datetime]( + gte=datetime.strptime(query.start_date, '%Y-%m-%d') + if query.start_date + else None, + lte=(datetime.strptime(query.end_date, '%Y-%m-%d') + timedelta(days=7)) + if query.end_date + else None, ) - return time_filters, query_parameters + return billing_filter async def _last_loaded_day(self): """Get the most recent fully loaded day in db diff --git a/db/python/tables/bq/billing_raw.py b/db/python/tables/bq/billing_raw.py index 6a6c7b83e..a82fa4eec 100644 --- a/db/python/tables/bq/billing_raw.py +++ b/db/python/tables/bq/billing_raw.py @@ -1,5 +1,10 @@ +from datetime import datetime + from api.settings import BQ_AGGREG_RAW from db.python.tables.bq.billing_base import BillingBaseTable +from db.python.tables.bq.billing_filter import BillingFilter +from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from models.models import BillingTotalCostQueryModel class BillingRawTable(BillingBaseTable): @@ -10,3 +15,22 @@ class BillingRawTable(BillingBaseTable): def get_table_name(self): """Get table name""" return self.table_name + + def _query_to_partitioned_filter( + self, query: BillingTotalCostQueryModel + ) -> BillingFilter: + """ + Raw BQ billing table is partitioned by usage_end_time + """ + billing_filter = query.to_filter() + + # initial partition filter + billing_filter.usage_end_time = GenericBQFilter[datetime]( + gte=datetime.strptime(query.start_date, '%Y-%m-%d') + if query.start_date + else None, + lte=datetime.strptime(query.end_date, '%Y-%m-%d') + if query.end_date + else None, + ) + return billing_filter diff --git a/db/python/tables/bq/function_bq_filter.py b/db/python/tables/bq/function_bq_filter.py new file mode 100644 index 000000000..07e89e1eb --- /dev/null +++ b/db/python/tables/bq/function_bq_filter.py @@ -0,0 +1,109 @@ +from datetime import datetime +from enum import Enum +from typing import Any + +from google.cloud import bigquery + +from models.models import BillingColumn + + +class FunctionBQFilter: + """ + Function BigQuery filter where left site is a function call + In such case we need to parameterised values on both side of SQL + E.g. + + SELECT ... + FROM ... + WHERE getLabelValue(labels, 'batch_id') = '1234' + + In this case we have 2 string values which need to be parameterised + """ + + func_where = '' + func_sql_parameters: list[ + bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter + ] = [] + + def __init__(self, name: str, implementation: str): + self.func_name = name + self.fun_implementation = implementation + # param_id is a counter for parameterised values + self._param_id = 0 + + def to_sql( + self, + column_name: BillingColumn, + func_params: str | list[Any] | dict[Any, Any], + func_operator: str = None, + ) -> tuple[str, list[bigquery.ScalarQueryParameter | bigquery.ArrayQueryParameter]]: + """ + creates the left side of where : FUN(column_name, @params) + each of func_params convert to BQ parameter + combined multiple calls with provided operator, + if func_operator is None then AND is assumed by default + """ + values = [] + conditionals = [] + + if not isinstance(func_params, dict): + # Ignore func_params which are not dictionary for the time being + return '', [] + + for param_key, param_value in func_params.items(): + # parameterised both param_key and param_value + # e.g. this is raw SQL example: + # getLabelValue(labels, {param_key}) = {param_value} + self._param_id += 1 + key = f'param{self._param_id}' + val = f'value{self._param_id}' + # add param_key as parameterised BQ value + values.append(FunctionBQFilter._sql_value_prep(key, param_key)) + + # add param_value as parameterised BQ value + values.append(FunctionBQFilter._sql_value_prep(val, param_value)) + + # format as FUN(column_name, @param) = @value + conditionals.append( + ( + f'{self.func_name}({column_name.value},@{key}) = ' + f'{FunctionBQFilter._sql_cond_prep(val, param_value)}' + ) + ) + + if func_operator and func_operator == 'OR': + condition = ' OR '.join(conditionals) + else: + condition = ' AND '.join(conditionals) + + # set the class variables for later use + self.func_where = f'({condition})' + self.func_sql_parameters = values + return self.func_where, self.func_sql_parameters + + @staticmethod + def _sql_cond_prep(key: str, value: Any) -> str: + """ + By default '{key}' is used, + but for datetime it has to be wrapped in TIMESTAMP({key}) + """ + if isinstance(value, datetime): + return f'TIMESTAMP(@{key})' + + # otherwise as default + return f'@{key}' + + @staticmethod + def _sql_value_prep(key: str, value: Any) -> bigquery.ScalarQueryParameter: + """ """ + if isinstance(value, Enum): + return bigquery.ScalarQueryParameter(key, 'STRING', value.value) + if isinstance(value, int): + return bigquery.ScalarQueryParameter(key, 'INT64', value) + if isinstance(value, float): + return bigquery.ScalarQueryParameter(key, 'FLOAT64', value) + if isinstance(value, datetime): + return bigquery.ScalarQueryParameter(key, 'STRING', value) + + # otherwise as string parameter + return bigquery.ScalarQueryParameter(key, 'STRING', value) diff --git a/db/python/tables/bq/generic_bq_filter.py b/db/python/tables/bq/generic_bq_filter.py new file mode 100644 index 000000000..7e75b7d00 --- /dev/null +++ b/db/python/tables/bq/generic_bq_filter.py @@ -0,0 +1,101 @@ +from datetime import datetime +from enum import Enum +from typing import Any + +from google.cloud import bigquery + +from db.python.utils import GenericFilter, T + + +class GenericBQFilter(GenericFilter[T]): + """ + Generic BigQuery filter is BQ specific filter class, based on GenericFilter + """ + + def to_sql( + self, column: str, column_name: str = None + ) -> tuple[str, dict[str, T | list[T] | Any | list[Any]]]: + """ + Convert to SQL, and avoid SQL injection + + """ + conditionals = [] + values: dict[str, T | list[T] | Any | list[Any]] = {} + _column_name = column_name or column + + if not isinstance(column, str): + raise ValueError(f'Column {_column_name!r} must be a string') + if self.eq is not None: + k = self.generate_field_name(_column_name + '_eq') + conditionals.append(f'{column} = {self._sql_cond_prep(k, self.eq)}') + values[k] = self._sql_value_prep(k, self.eq) + if self.in_ is not None: + if not isinstance(self.in_, list): + raise ValueError('IN filter must be a list') + if len(self.in_) == 1: + k = self.generate_field_name(_column_name + '_in_eq') + conditionals.append(f'{column} = {self._sql_cond_prep(k, self.in_[0])}') + values[k] = self._sql_value_prep(k, self.in_[0]) + else: + k = self.generate_field_name(_column_name + '_in') + conditionals.append(f'{column} IN ({self._sql_cond_prep(k, self.in_)})') + values[k] = self._sql_value_prep(k, self.in_) + if self.nin is not None: + if not isinstance(self.nin, list): + raise ValueError('NIN filter must be a list') + k = self.generate_field_name(column + '_nin') + conditionals.append(f'{column} NOT IN ({self._sql_cond_prep(k, self.nin)})') + values[k] = self._sql_value_prep(k, self.nin) + if self.gt is not None: + k = self.generate_field_name(column + '_gt') + conditionals.append(f'{column} > {self._sql_cond_prep(k, self.gt)}') + values[k] = self._sql_value_prep(k, self.gt) + if self.gte is not None: + k = self.generate_field_name(column + '_gte') + conditionals.append(f'{column} >= {self._sql_cond_prep(k, self.gte)}') + values[k] = self._sql_value_prep(k, self.gte) + if self.lt is not None: + k = self.generate_field_name(column + '_lt') + conditionals.append(f'{column} < {self._sql_cond_prep(k, self.lt)}') + values[k] = self._sql_value_prep(k, self.lt) + if self.lte is not None: + k = self.generate_field_name(column + '_lte') + conditionals.append(f'{column} <= {self._sql_cond_prep(k, self.lte)}') + values[k] = self._sql_value_prep(k, self.lte) + + return ' AND '.join(conditionals), values + + @staticmethod + def _sql_cond_prep(key, value) -> str: + """ + By default '@{key}' is used, + but for datetime it has to be wrapped in TIMESTAMP(@{k}) + """ + if isinstance(value, datetime): + return f'TIMESTAMP(@{key})' + + # otherwise as default + return f'@{key}' + + @staticmethod + def _sql_value_prep(key, value): + """ + Overrides the default _sql_value_prep to handle BQ parameters + """ + if isinstance(value, list): + return bigquery.ArrayQueryParameter( + key, 'STRING', ','.join([str(v) for v in value]) + ) + if isinstance(value, Enum): + return bigquery.ScalarQueryParameter(key, 'STRING', value.value) + if isinstance(value, int): + return bigquery.ScalarQueryParameter(key, 'INT64', value) + if isinstance(value, float): + return bigquery.ScalarQueryParameter(key, 'FLOAT64', value) + if isinstance(value, datetime): + return bigquery.ScalarQueryParameter( + key, 'STRING', value.strftime('%Y-%m-%d %H:%M:%S') + ) + + # otherwise as string parameter + return bigquery.ScalarQueryParameter(key, 'STRING', value) diff --git a/db/python/tables/bq/generic_bq_filter_model.py b/db/python/tables/bq/generic_bq_filter_model.py new file mode 100644 index 000000000..3de5051af --- /dev/null +++ b/db/python/tables/bq/generic_bq_filter_model.py @@ -0,0 +1,109 @@ +import dataclasses +from typing import Any + +from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from db.python.utils import GenericFilterModel + + +def prepare_bq_query_from_dict_field( + filter_, field_name, column_name +) -> tuple[list[str], dict[str, Any]]: + """ + Prepare a SQL query from a dict field, which is a dict of GenericFilters. + Usually this is a JSON field in the database that we want to query on. + """ + conditionals: list[str] = [] + values: dict[str, Any] = {} + for key, value in filter_.items(): + if not isinstance(value, GenericBQFilter): + raise ValueError(f'Filter {field_name} must be a GenericFilter') + if '"' in key: + raise ValueError('Meta key contains " character, which is not allowed') + fconditionals, fvalues = value.to_sql( + f"JSON_EXTRACT({column_name}, '$.{key}')", + column_name=f'{column_name}_{key}', + ) + conditionals.append(fconditionals) + values.update(fvalues) + + return conditionals, values + + +@dataclasses.dataclass(kw_only=True) +class GenericBQFilterModel(GenericFilterModel): + """ + Class that contains fields of GenericBQFilters that can be used to filter + """ + + def __post_init__(self): + for field in dataclasses.fields(self): + value = getattr(self, field.name) + if value is None: + continue + + if isinstance(value, tuple) and len(value) == 1 and value[0] is None: + raise ValueError( + 'There is very likely a trailing comma on the end of ' + f'{self.__class__.__name__}.{field.name}. If you actually want a ' + 'tuple of length one with the value = (None,), then use ' + 'dataclasses.field(default_factory=lambda: (None,))' + ) + if isinstance(value, GenericBQFilter): + continue + + if isinstance(value, dict): + # make sure each field is a GenericFilter, or set it to be one, + # in this case it's always 'eq', never automatically in_ + new_value = { + k: v if isinstance(v, GenericBQFilter) else GenericBQFilter(eq=v) + for k, v in value.items() + } + setattr(self, field.name, new_value) + continue + + # lazily provided a value, which we'll correct + if isinstance(value, list): + setattr(self, field.name, GenericBQFilter(in_=value)) + else: + setattr(self, field.name, GenericBQFilter(eq=value)) + + def to_sql( + self, field_overrides: dict[str, Any] = None + ) -> tuple[str, dict[str, Any]]: + """Convert the model to SQL, and avoid SQL injection""" + _foverrides = field_overrides or {} + + # check for bad field_overrides + bad_field_overrides = set(_foverrides.keys()) - set( + f.name for f in dataclasses.fields(self) + ) + if bad_field_overrides: + raise ValueError( + f'Specified field overrides that were not used: {bad_field_overrides}' + ) + + fields = dataclasses.fields(self) + conditionals, values = [], {} + for field in fields: + fcolumn = _foverrides.get(field.name, field.name) + if filter_ := getattr(self, field.name): + if isinstance(filter_, dict): + meta_conditionals, meta_values = prepare_bq_query_from_dict_field( + filter_=filter_, field_name=field.name, column_name=fcolumn + ) + conditionals.extend(meta_conditionals) + values.update(meta_values) + elif isinstance(filter_, GenericBQFilter): + fconditionals, fvalues = filter_.to_sql(fcolumn) + conditionals.append(fconditionals) + values.update(fvalues) + else: + raise ValueError( + f'Filter {field.name} must be a GenericBQFilter or ' + 'dict[str, GenericBQFilter]' + ) + + if not conditionals: + return 'True', {} + + return ' AND '.join(filter(None, conditionals)), values diff --git a/models/models/__init__.py b/models/models/__init__.py index 0582ddfbb..d3b836e9a 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -15,6 +15,10 @@ BillingColumn, BillingCostBudgetRecord, BillingCostDetailsRecord, + BillingHailBatchCostRecord, + BillingInternal, + BillingSource, + BillingTimeColumn, BillingTimePeriods, BillingTotalCostQueryModel, BillingTotalCostRecord, diff --git a/models/models/billing.py b/models/models/billing.py index 830d4554d..05d062519 100644 --- a/models/models/billing.py +++ b/models/models/billing.py @@ -1,10 +1,41 @@ import datetime from enum import Enum +from db.python.tables.bq.billing_filter import BillingFilter +from db.python.tables.bq.generic_bq_filter import GenericBQFilter from models.base import SMBase from models.enums.billing import BillingSource, BillingTimeColumn, BillingTimePeriods +class BillingInternal(SMBase): + """Model for Analysis""" + + id: str | None + ar_guid: str | None + gcp_project: str | None + topic: str | None + batch_id: str | None + cost_category: str | None + cost: float | None + day: datetime.date | None + + @staticmethod + def from_db(**kwargs): + """ + Convert from db keys, mainly converting id to id_ + """ + return BillingInternal( + id=kwargs.get('id'), + ar_guid=kwargs.get('ar_guid', kwargs.get('ar-guid')), + gcp_project=kwargs.get('gcp_project'), + topic=kwargs.get('topic'), + batch_id=kwargs.get('batch_id'), + cost_category=kwargs.get('cost_category'), + cost=kwargs.get('cost'), + day=kwargs.get('day'), + ) + + class BillingColumn(str, Enum): """List of billing columns""" @@ -95,8 +126,10 @@ def is_extended_column(cls, value: 'BillingColumn') -> bool: @classmethod def str_to_enum(cls, value: str) -> 'BillingColumn': """Convert string to enum""" + # all column names have underscore in SQL, but dash in UI / stored data + adjusted_value = value.replace('-', '_') str_to_enum = {v.value: v for k, v in BillingColumn.__members__.items()} - return str_to_enum[value] + return str_to_enum[adjusted_value] @classmethod def raw_cols(cls) -> list[str]: @@ -201,6 +234,19 @@ def __hash__(self): """Create hash for this object to use in caching""" return hash(self.json()) + def to_filter(self) -> BillingFilter: + """ + Convert to internal analysis filter + """ + billing_filter = BillingFilter() + if self.filters: + # add filters as attributes + for fk, fv in self.filters.items(): + # fk is BillColumn, fv is value + setattr(billing_filter, fk.value, GenericBQFilter(eq=fv)) + + return billing_filter + class BillingTotalCostRecord(SMBase): """Return class for the Billing Total Cost record""" diff --git a/web/src/pages/billing/BillingCostByTime.tsx b/web/src/pages/billing/BillingCostByTime.tsx index 3b6d99092..ba191d335 100644 --- a/web/src/pages/billing/BillingCostByTime.tsx +++ b/web/src/pages/billing/BillingCostByTime.tsx @@ -305,9 +305,15 @@ const BillingCostByTime: React.FunctionComponent = () => { setIsLoading(false) setError(undefined) - if (start !== undefined || start !== null || start !== '') { + if (groupBy === undefined || groupBy === null) { + // Group By not selected + setMessage('Please select Group By') + } else if (selectedData === undefined || selectedData === null || selectedData === '') { + // Top Level not selected + setMessage(`Please select ${groupBy}`) + } else if (start === undefined || start === null || start === '') { setMessage('Please select Start date') - } else if (end !== undefined || end !== null || end !== '') { + } else if (end === undefined || end === null || end === '') { setMessage('Please select End date') } else { // generic message diff --git a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx index e317311d8..204d7cc7a 100644 --- a/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx +++ b/web/src/shared/components/Graphs/HorizontalStackedBarChart.tsx @@ -31,7 +31,7 @@ const HorizontalStackedBarChart: React.FC = ({ isLoading, showLegend, }) => { - if (!data || data.length === 0) { + if (!isLoading && (!data || data.length === 0)) { return
No data available
} From 3f298cc939b1c680a95e4c8d5312770a0ca97079 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Thu, 18 Jan 2024 16:59:42 +1100 Subject: [PATCH 29/34] Fixing linting. --- db/python/tables/bq/billing_filter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/db/python/tables/bq/billing_filter.py b/db/python/tables/bq/billing_filter.py index 1c333c767..9a379817f 100644 --- a/db/python/tables/bq/billing_filter.py +++ b/db/python/tables/bq/billing_filter.py @@ -1,3 +1,5 @@ +# pylint: disable=unused-import,too-many-instance-attributes + import dataclasses import datetime @@ -44,6 +46,3 @@ class BillingFilter(GenericBQFilterModel): goog_pipelines_worker: GenericBQFilter[str] = None wdl_task_name: GenericBQFilter[str] = None namespace: GenericBQFilter[str] = None - - def __hash__(self): - return super().__hash__() From 41e2ba6579459d86efe0e8d5ffd08821dfbb5c94 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 19 Jan 2024 15:26:44 +1100 Subject: [PATCH 30/34] Added unit tests for BQ filters. --- db/python/tables/bq/generic_bq_filter.py | 19 +- test/test_bq_generic_filters.py | 294 +++++++++++++++++++++++ test/test_generic_filters.py | 51 ++++ 3 files changed, 359 insertions(+), 5 deletions(-) create mode 100644 test/test_bq_generic_filters.py diff --git a/db/python/tables/bq/generic_bq_filter.py b/db/python/tables/bq/generic_bq_filter.py index 7e75b7d00..8aeabd729 100644 --- a/db/python/tables/bq/generic_bq_filter.py +++ b/db/python/tables/bq/generic_bq_filter.py @@ -38,13 +38,17 @@ def to_sql( values[k] = self._sql_value_prep(k, self.in_[0]) else: k = self.generate_field_name(_column_name + '_in') - conditionals.append(f'{column} IN ({self._sql_cond_prep(k, self.in_)})') + conditionals.append( + f'{column} IN UNNEST({self._sql_cond_prep(k, self.in_)})' + ) values[k] = self._sql_value_prep(k, self.in_) if self.nin is not None: if not isinstance(self.nin, list): raise ValueError('NIN filter must be a list') k = self.generate_field_name(column + '_nin') - conditionals.append(f'{column} NOT IN ({self._sql_cond_prep(k, self.nin)})') + conditionals.append( + f'{column} NOT IN UNNEST({self._sql_cond_prep(k, self.nin)})' + ) values[k] = self._sql_value_prep(k, self.nin) if self.gt is not None: k = self.generate_field_name(column + '_gt') @@ -83,9 +87,14 @@ def _sql_value_prep(key, value): Overrides the default _sql_value_prep to handle BQ parameters """ if isinstance(value, list): - return bigquery.ArrayQueryParameter( - key, 'STRING', ','.join([str(v) for v in value]) - ) + if value and isinstance(value[0], int): + return bigquery.ArrayQueryParameter(key, 'INT64', value) + if value and isinstance(value[0], float): + return bigquery.ArrayQueryParameter(key, 'FLOAT64', value) + + # otherwise all list records as string + return bigquery.ArrayQueryParameter(key, 'STRING', [str(v) for v in value]) + if isinstance(value, Enum): return bigquery.ScalarQueryParameter(key, 'STRING', value.value) if isinstance(value, int): diff --git a/test/test_bq_generic_filters.py b/test/test_bq_generic_filters.py new file mode 100644 index 000000000..38719573b --- /dev/null +++ b/test/test_bq_generic_filters.py @@ -0,0 +1,294 @@ +import dataclasses +import unittest +from datetime import datetime +from enum import Enum + +from google.cloud import bigquery + +from db.python.tables.bq.generic_bq_filter import GenericBQFilter +from db.python.tables.bq.generic_bq_filter_model import GenericBQFilterModel + + +@dataclasses.dataclass(kw_only=True) +class GenericBQFilterTest(GenericBQFilterModel): + """Test model for GenericBQFilter""" + + test_string: GenericBQFilter[str] | None = None + test_int: GenericBQFilter[int] | None = None + test_float: GenericBQFilter[float] | None = None + test_dt: GenericBQFilter[datetime] | None = None + test_dict: dict[str, GenericBQFilter[str]] | None = None + test_enum: GenericBQFilter[Enum] | None = None + + +class BGFilterTestEnum(str, Enum): + """Simple Enum classs""" + + ID = 'id' + VALUE = 'value' + + +class TestGenericBQFilters(unittest.TestCase): + """Test generic filters SQL generation""" + + def test_basic_no_override(self): + """Test that the basic filter converts to SQL as expected""" + filter_ = GenericBQFilterTest(test_string=GenericBQFilter(eq='test')) + sql, values = filter_.to_sql() + + self.assertEqual('test_string = @test_string_eq', sql) + self.assertDictEqual( + { + 'test_string_eq': bigquery.ScalarQueryParameter( + 'test_string_eq', 'STRING', 'test' + ) + }, + values, + ) + + def test_basic_override(self): + """Test that the basic filter with an override converts to SQL as expected""" + filter_ = GenericBQFilterTest(test_string=GenericBQFilter(eq='test')) + sql, values = filter_.to_sql({'test_string': 't.string'}) + + self.assertEqual('t.string = @t_string_eq', sql) + self.assertDictEqual( + { + 't_string_eq': bigquery.ScalarQueryParameter( + 't_string_eq', 'STRING', 'test' + ) + }, + values, + ) + + def test_single_string(self): + """ + Test that a single value filtered using the "in" operator + gets converted to an eq operation + """ + filter_ = GenericBQFilterTest(test_string=GenericBQFilter(in_=['test'])) + sql, values = filter_.to_sql() + + self.assertEqual('test_string = @test_string_in_eq', sql) + self.assertDictEqual( + { + 'test_string_in_eq': bigquery.ScalarQueryParameter( + 'test_string_in_eq', 'STRING', 'test' + ) + }, + values, + ) + + def test_single_int(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = 123 + filter_ = GenericBQFilterTest(test_int=GenericBQFilter(gt=value)) + sql, values = filter_.to_sql() + + self.assertEqual('test_int > @test_int_gt', sql) + self.assertDictEqual( + { + 'test_int_gt': bigquery.ScalarQueryParameter( + 'test_int_gt', 'INT64', value + ) + }, + values, + ) + + def test_single_float(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = 123.456 + filter_ = GenericBQFilterTest(test_float=GenericBQFilter(gte=value)) + sql, values = filter_.to_sql() + + self.assertEqual('test_float >= @test_float_gte', sql) + self.assertDictEqual( + { + 'test_float_gte': bigquery.ScalarQueryParameter( + 'test_float_gte', 'FLOAT64', value + ) + }, + values, + ) + + def test_single_datetime(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + datetime_str = '2021-10-08 01:02:03' + value = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S') + filter_ = GenericBQFilterTest(test_dt=GenericBQFilter(lt=value)) + sql, values = filter_.to_sql() + + self.assertEqual('test_dt < TIMESTAMP(@test_dt_lt)', sql) + self.assertDictEqual( + { + 'test_dt_lt': bigquery.ScalarQueryParameter( + 'test_dt_lt', 'STRING', datetime_str + ) + }, + values, + ) + + def test_single_enum(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = BGFilterTestEnum.ID + filter_ = GenericBQFilterTest(test_enum=GenericBQFilter(lte=value)) + sql, values = filter_.to_sql() + + self.assertEqual('test_enum <= @test_enum_lte', sql) + self.assertDictEqual( + { + 'test_enum_lte': bigquery.ScalarQueryParameter( + 'test_enum_lte', 'STRING', value.value + ) + }, + values, + ) + + def test_in_multiple_int(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = [1, 2] + filter_ = GenericBQFilterTest(test_int=GenericBQFilter(in_=value)) + sql, values = filter_.to_sql() + + self.assertEqual('test_int IN UNNEST(@test_int_in)', sql) + self.assertDictEqual( + { + 'test_int_in': bigquery.ArrayQueryParameter( + 'test_int_in', 'INT64', value + ) + }, + values, + ) + + def test_in_multiple_float(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = [1.0, 2.0] + filter_ = GenericBQFilterTest(test_float=GenericBQFilter(in_=value)) + sql, values = filter_.to_sql() + + self.assertEqual('test_float IN UNNEST(@test_float_in)', sql) + self.assertDictEqual( + { + 'test_float_in': bigquery.ArrayQueryParameter( + 'test_float_in', 'FLOAT64', value + ) + }, + values, + ) + + def test_in_multiple_str(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = ['A', 'B'] + filter_ = GenericBQFilterTest(test_string=GenericBQFilter(in_=value)) + sql, values = filter_.to_sql() + + self.assertEqual('test_string IN UNNEST(@test_string_in)', sql) + self.assertDictEqual( + { + 'test_string_in': bigquery.ArrayQueryParameter( + 'test_string_in', 'STRING', value + ) + }, + values, + ) + + def test_nin_multiple_str(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = ['A', 'B'] + filter_ = GenericBQFilterTest(test_string=GenericBQFilter(nin=value)) + sql, values = filter_.to_sql() + + self.assertEqual('test_string NOT IN UNNEST(@test_string_nin)', sql) + self.assertDictEqual( + { + 'test_string_nin': bigquery.ArrayQueryParameter( + 'test_string_nin', 'STRING', value + ) + }, + values, + ) + + def test_in_and_eq_multiple_str(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = ['A'] + filter_ = GenericBQFilterTest(test_string=GenericBQFilter(in_=value, eq='B')) + sql, values = filter_.to_sql() + + self.assertEqual( + 'test_string = @test_string_eq AND test_string = @test_string_in_eq', + sql, + ) + self.assertDictEqual( + { + 'test_string_eq': bigquery.ScalarQueryParameter( + 'test_string_eq', 'STRING', 'B' + ), + 'test_string_in_eq': bigquery.ScalarQueryParameter( + 'test_string_in_eq', 'STRING', 'A' + ), + }, + values, + ) + + def test_failed_in_multiple_str(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = 'Not a list' + filter_ = GenericBQFilterTest(test_string=GenericBQFilter(in_=value)) + + # check if ValueError is raised + with self.assertRaises(ValueError) as context: + filter_.to_sql() + + self.assertTrue('IN filter must be a list' in str(context.exception)) + + def test_failed_not_in_multiple_str(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = 'Not a list' + filter_ = GenericBQFilterTest(test_string=GenericBQFilter(nin=value)) + + # check if ValueError is raised + with self.assertRaises(ValueError) as context: + filter_.to_sql() + + self.assertTrue('NIN filter must be a list' in str(context.exception)) + + def test_fail_none_in_tuple(self): + """ + Test that values filtered using the "in" operator convert as expected + """ + value = (None,) + + # check if ValueError is raised + with self.assertRaises(ValueError) as context: + filter_ = GenericBQFilterTest(test_string=value) + filter_.to_sql() + + self.assertTrue( + 'There is very likely a trailing comma on the end of ' + 'GenericBQFilterTest.test_string. ' + 'If you actually want a tuple of length one with the value = (None,), ' + 'then use dataclasses.field(default_factory=lambda: (None,))' + in str(context.exception) + ) diff --git a/test/test_generic_filters.py b/test/test_generic_filters.py index 2c1348076..b5598be54 100644 --- a/test/test_generic_filters.py +++ b/test/test_generic_filters.py @@ -53,3 +53,54 @@ def test_in_multiple(self): self.assertEqual('test_int IN :test_int_in', sql) self.assertDictEqual({'test_int_in': value}, values) + + def test_gt_single(self): + """ + Test that a single value filtered using the "gt" operator + """ + filter_ = GenericFilterTest(test_int=GenericFilter(gt=123)) + sql, values = filter_.to_sql() + + self.assertEqual('test_int > :test_int_gt', sql) + self.assertDictEqual({'test_int_gt': 123}, values) + + def test_gte_single(self): + """ + Test that a single value filtered using the "gte" operator + """ + filter_ = GenericFilterTest(test_int=GenericFilter(gte=123)) + sql, values = filter_.to_sql() + + self.assertEqual('test_int >= :test_int_gte', sql) + self.assertDictEqual({'test_int_gte': 123}, values) + + def test_lt_single(self): + """ + Test that a single value filtered using the "lt" operator + """ + filter_ = GenericFilterTest(test_int=GenericFilter(lt=123)) + sql, values = filter_.to_sql() + + self.assertEqual('test_int < :test_int_lt', sql) + self.assertDictEqual({'test_int_lt': 123}, values) + + def test_lte_single(self): + """ + Test that a single value filtered using the "lte" operator + """ + filter_ = GenericFilterTest(test_int=GenericFilter(lte=123)) + sql, values = filter_.to_sql() + + self.assertEqual('test_int <= :test_int_lte', sql) + self.assertDictEqual({'test_int_lte': 123}, values) + + def test_not_in_multiple(self): + """ + Test that values filtered using the "nin" operator convert as expected + """ + value = [1, 2] + filter_ = GenericFilterTest(test_int=GenericFilter(nin=value)) + sql, values = filter_.to_sql() + + self.assertEqual('test_int NOT IN :test_int_nin', sql) + self.assertDictEqual({'test_int_nin': value}, values) From 624f7ef18c00dd77d645b6e2fc3237f7d253ff89 Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 19 Jan 2024 16:46:44 +1100 Subject: [PATCH 31/34] Fixing linting. --- test/test_bq_generic_filters.py | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/test/test_bq_generic_filters.py b/test/test_bq_generic_filters.py index 38719573b..bdd21b224 100644 --- a/test/test_bq_generic_filters.py +++ b/test/test_bq_generic_filters.py @@ -2,6 +2,7 @@ import unittest from datetime import datetime from enum import Enum +from typing import Any from google.cloud import bigquery @@ -19,6 +20,7 @@ class GenericBQFilterTest(GenericBQFilterModel): test_dt: GenericBQFilter[datetime] | None = None test_dict: dict[str, GenericBQFilter[str]] | None = None test_enum: GenericBQFilter[Enum] | None = None + test_any: Any | None = None class BGFilterTestEnum(str, Enum): @@ -248,32 +250,6 @@ def test_in_and_eq_multiple_str(self): values, ) - def test_failed_in_multiple_str(self): - """ - Test that values filtered using the "in" operator convert as expected - """ - value = 'Not a list' - filter_ = GenericBQFilterTest(test_string=GenericBQFilter(in_=value)) - - # check if ValueError is raised - with self.assertRaises(ValueError) as context: - filter_.to_sql() - - self.assertTrue('IN filter must be a list' in str(context.exception)) - - def test_failed_not_in_multiple_str(self): - """ - Test that values filtered using the "in" operator convert as expected - """ - value = 'Not a list' - filter_ = GenericBQFilterTest(test_string=GenericBQFilter(nin=value)) - - # check if ValueError is raised - with self.assertRaises(ValueError) as context: - filter_.to_sql() - - self.assertTrue('NIN filter must be a list' in str(context.exception)) - def test_fail_none_in_tuple(self): """ Test that values filtered using the "in" operator convert as expected @@ -282,12 +258,12 @@ def test_fail_none_in_tuple(self): # check if ValueError is raised with self.assertRaises(ValueError) as context: - filter_ = GenericBQFilterTest(test_string=value) + filter_ = GenericBQFilterTest(test_any=value) filter_.to_sql() self.assertTrue( 'There is very likely a trailing comma on the end of ' - 'GenericBQFilterTest.test_string. ' + 'GenericBQFilterTest.test_any. ' 'If you actually want a tuple of length one with the value = (None,), ' 'then use dataclasses.field(default_factory=lambda: (None,))' in str(context.exception) From ca5d1a3a87ea962aea0cfcb3cf50193f46ff3d6a Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Fri, 19 Jan 2024 17:35:14 +1100 Subject: [PATCH 32/34] Added tests for billing routes. --- test/test_api_billing.py | 21 ++++++++++++ test/test_api_utils.py | 69 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 test/test_api_billing.py create mode 100644 test/test_api_utils.py diff --git a/test/test_api_billing.py b/test/test_api_billing.py new file mode 100644 index 000000000..f523c8f0f --- /dev/null +++ b/test/test_api_billing.py @@ -0,0 +1,21 @@ +import unittest +from test.testbase import run_as_sync + +from api.routes.billing import get_gcp_projects, is_billing_enabled + + +class TestApiBilling(unittest.TestCase): + """Test API Billing routes""" + + def test_is_billing_enabled(self): + """ """ + result = is_billing_enabled() + self.assertEqual(False, result) + + @run_as_sync + async def test_get_gcp_projects(self): + """ """ + with self.assertRaises(ValueError) as context: + _result = await get_gcp_projects('test_user') + + self.assertTrue('Billing is not enabled' in str(context.exception)) diff --git a/test/test_api_utils.py b/test/test_api_utils.py new file mode 100644 index 000000000..44d8b3ed5 --- /dev/null +++ b/test/test_api_utils.py @@ -0,0 +1,69 @@ +import unittest +from datetime import datetime + +from api.utils.dates import ( + get_invoice_month_range, + parse_date_only_string, + reformat_datetime, +) + + +class TestApiUtils(unittest.TestCase): + """Test API utils functions""" + + def test_parse_date_only_string(self): + """ """ + result_none = parse_date_only_string(None) + self.assertEqual(None, result_none) + + result_date = parse_date_only_string('2021-01-10') + self.assertEqual(2021, result_date.year) + self.assertEqual(1, result_date.month) + self.assertEqual(10, result_date.day) + + # test exception + invalid_date_str = '123456789' + with self.assertRaises(ValueError) as context: + parse_date_only_string(invalid_date_str) + + self.assertTrue( + f'Date could not be converted: {invalid_date_str}' in str(context.exception) + ) + + def test_get_invoice_month_range(self): + jan_2021 = datetime.strptime('2021-01-10', '%Y-%m-%d').date() + res_jan_2021 = get_invoice_month_range(jan_2021) + + # there is 3 (INVOICE_DAY_DIFF) days difference between invoice month st and end + self.assertEqual( + (datetime(2020, 12, 29).date(), datetime(2021, 2, 3).date()), + res_jan_2021, + ) + + dec_2021 = datetime.strptime('2021-12-10', '%Y-%m-%d').date() + res_dec_2021 = get_invoice_month_range(dec_2021) + + # there is 3 (INVOICE_DAY_DIFF) days difference between invoice month st and end + self.assertEqual( + (datetime(2021, 11, 28).date(), datetime(2022, 1, 3).date()), + res_dec_2021, + ) + + def test_reformat_datetime(self): + in_format = '%Y-%m-%d' + out_format = '%d/%m/%Y' + + result_none = reformat_datetime(None, in_format, out_format) + self.assertEqual(None, result_none) + + result_formatted = reformat_datetime('2021-11-09', in_format, out_format) + self.assertEqual('09/11/2021', result_formatted) + + # test exception + invalid_date_str = '123456789' + with self.assertRaises(ValueError) as context: + reformat_datetime(invalid_date_str, in_format, out_format) + + self.assertTrue( + f'Date could not be converted: {invalid_date_str}' in str(context.exception) + ) From fdcc32550742a29d5739a01a899f2ec6963c2bec Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Mon, 22 Jan 2024 13:18:16 +1100 Subject: [PATCH 33/34] Billing - preparing for integrating into GraphQL. --- api/graphql/schema.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/graphql/schema.py b/api/graphql/schema.py index 1b7213642..19d1c4f3f 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -762,6 +762,11 @@ async def my_projects(self, info: Info) -> list[GraphQLProject]: ) return [GraphQLProject.from_internal(p) for p in projects] + """ + TODO split inot 4 or 5 different functions + e.g. billing_by_batch_id, billing_by_ar_guid, billing_by_topic, billing_by_gcp_project + """ + @strawberry.field async def billing( self, From 0318d5e3154fb6e1765fcf8f56bb4c01d4ce343f Mon Sep 17 00:00:00 2001 From: Milo Hyben Date: Wed, 31 Jan 2024 17:40:04 +1100 Subject: [PATCH 34/34] GraphQL Billing funcs first idea. --- api/graphql/schema.py | 192 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 185 insertions(+), 7 deletions(-) diff --git a/api/graphql/schema.py b/api/graphql/schema.py index 19d1c4f3f..ed263ad91 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -1,6 +1,6 @@ # type: ignore # flake8: noqa -# pylint: disable=no-value-for-parameter,redefined-builtin,missing-function-docstring,unused-argument +# pylint: disable=no-value-for-parameter,redefined-builtin,missing-function-docstring,unused-argument,too-many-lines """ Schema for GraphQL. @@ -8,6 +8,7 @@ and defaults to decide the GraphQL schema, so it might not necessarily look correct. """ import datetime +from collections import Counter from inspect import isclass import strawberry @@ -35,6 +36,7 @@ AssayInternal, AuditLogInternal, BillingColumn, + BillingHailBatchCostRecord, BillingInternal, BillingTotalCostQueryModel, FamilyInternal, @@ -71,6 +73,23 @@ async def m(info: Info) -> list[str]: GraphQLEnum = strawberry.type(type('GraphQLEnum', (object,), enum_methods)) +def to_camel_case(test_str: str) -> str: + # using for loop to convert string to camel case + result = '' + capitalize_next = False + for char in test_str: + if char == '_': + capitalize_next = True + else: + if capitalize_next: + result += char.upper() + capitalize_next = False + else: + result += char + + return result + + @strawberry.type class GraphQLProject: """Project GraphQL model""" @@ -624,6 +643,106 @@ def from_internal(internal: BillingInternal) -> 'GraphQLBilling': ) +@strawberry.type +class GraphQLBatchCostRecord: + """GraphQL Billing""" + + id: str | None + ar_guid: str | None + batch_id: str | None + job_id: str | None + day: datetime.date | None + + topic: str | None + namespace: str | None + name: str | None + + sku: str | None + cost: float | None + url: str | None + + @staticmethod + def from_json(json: dict) -> 'GraphQLBatchCostRecord': + return GraphQLBatchCostRecord( + id=json.get('id'), + ar_guid=json.get('ar_guid'), + batch_id=json.get('batch_id'), + job_id=json.get('job_id'), + day=json.get('day'), + topic=json.get('topic'), + namespace=json.get('namespace'), + name=json.get('name'), + sku=json.get('sku'), + cost=json.get('cost'), + url=json.get('url'), + ) + + @staticmethod + def from_internal( + internal: BillingHailBatchCostRecord, fields: list[str] | None = None + ) -> list['GraphQLBatchCostRecord']: + """ + TODO sum the costs based on selected fields + """ + results = [] + if not internal: + return results + + ar_guid = internal.ar_guid + + if fields is None: + for rec in internal.costs: + results.append( + GraphQLBatchCostRecord( + id=rec.get('id'), + ar_guid=ar_guid, + batch_id=rec.get('batch_id'), + job_id=rec.get('job_id'), + day=rec.get('day'), + topic=rec.get('topic'), + namespace=rec.get('namespace'), + name=rec.get('batch_name'), + sku=rec.get('batch_resource'), + cost=rec.get('cost'), + url=rec.get('url'), + ) + ) + else: + # we need to aggregate sum(cost) by fields + # if cost not present, then do distinct like operation? + + # prepare the fields + aggregated = Counter() + + class_fields = list( + GraphQLBatchCostRecord.__dict__['__annotations__'].keys() + ) + for rec in internal.costs: + # create key based on fields + key = '_'.join( + [ + str(rec.get(f)) + if 'cost' not in f and to_camel_case(f) in fields + else '' + for f in class_fields + ] + ) + aggregated[key] += rec.get('cost', 0) + + for key, cost in aggregated.items(): + # split the key back to fields + fields = key.split('_') + # map to class_fields + record = {} + for pos in range(len(class_fields)): + record[class_fields[pos]] = fields[pos] + + record['cost'] = cost + results.append(GraphQLBatchCostRecord.from_json(record)) + + return results + + @strawberry.type class Query: """GraphQL Queries""" @@ -762,13 +881,75 @@ async def my_projects(self, info: Info) -> list[GraphQLProject]: ) return [GraphQLProject.from_internal(p) for p in projects] - """ + """ TODO split inot 4 or 5 different functions e.g. billing_by_batch_id, billing_by_ar_guid, billing_by_topic, billing_by_gcp_project """ + @staticmethod + def get_billing_layer(info: Info) -> BillingLayer: + # TODO is there a better way to get the BQ connection? + connection = info.context['connection'] + bg_connection = BqConnection(connection.author) + return BillingLayer(bg_connection) + + @staticmethod + async def extract_fields(info: Info) -> list[str]: + from graphql.parser import GraphQLParser + + parser = GraphQLParser() + body = await info.context.get('request').json() + ast = parser.parse(body['query']) + fields = [f.name for f in ast.definitions[0].selections[-1].selections] + print('fields', fields) + return fields + + @strawberry.field + async def billing_by_batch_id( + self, + info: Info, + batch_id: str, + ) -> list[GraphQLBatchCostRecord]: + slayer = Query.get_billing_layer(info) + result = await slayer.get_cost_by_batch_id(batch_id) + fields = await Query.extract_fields(info) + return GraphQLBatchCostRecord.from_internal(result, fields) + + @strawberry.field + async def billing_by_ar_guid( + self, + info: Info, + ar_guid: str, + ) -> list[GraphQLBatchCostRecord]: + slayer = Query.get_billing_layer(info) + result = await slayer.get_cost_by_ar_guid(ar_guid) + fields = await Query.extract_fields(info) + return GraphQLBatchCostRecord.from_internal(result, fields) + + @strawberry.field + async def billing_by_topic( + self, + info: Info, + topic: str | None = None, + day: GraphQLFilter[datetime.datetime] | None = None, + cost: GraphQLFilter[float] | None = None, + ) -> list[GraphQLBilling]: + # slayer = Query.get_billing_layer(info) + return [] + + @strawberry.field + async def billing_by_gcp_project( + self, + info: Info, + gcp_project: str | None = None, + day: GraphQLFilter[datetime.datetime] | None = None, + cost: GraphQLFilter[float] | None = None, + ) -> list[GraphQLBilling]: + # slayer = Query.get_billing_layer(info) + return [] + @strawberry.field - async def billing( + async def billing_todel( self, info: Info, batch_id: str | None = None, @@ -785,10 +966,7 @@ async def billing( # if not is_billing_enabled(): # raise ValueError('Billing is not enabled') - # TODO is there a better way to get the BQ connection? - connection = info.context['connection'] - bg_connection = BqConnection(connection.author) - slayer = BillingLayer(bg_connection) + slayer = get_billing_layer(info) if ar_guid: res = await slayer.get_cost_by_ar_guid(ar_guid)