diff --git a/db/python/layers/billing.py b/db/python/layers/billing.py index 737be6857..e9fd83042 100644 --- a/db/python/layers/billing.py +++ b/db/python/layers/billing.py @@ -213,9 +213,12 @@ async def get_cost_by_ar_guid( start_day, end_day, batches, - ) = await ar_batch_lookup_table.get_batches_by_ar_guid(ar_guid) + ) = await ar_batch_lookup_table.get_batches_by_ar_guid( + ar_guid, limit_to_hail_batches=False + ) - if not batches: + if not start_day: + # ar-guid job is not found return BillingHailBatchCostRecord( ar_guid=ar_guid, batch_ids=[], @@ -232,7 +235,6 @@ async def get_cost_by_ar_guid( end_date=end_day.strftime('%Y-%m-%d'), filters={ BillingColumn.LABELS: { - 'batch_id': batches, 'ar-guid': ar_guid, } }, @@ -240,10 +242,14 @@ async def get_cost_by_ar_guid( group_by=False, time_column=BillingTimeColumn.USAGE_END_TIME, time_periods=BillingTimePeriods.DAY, + # cromwell jobs specifically can have a lot of 0 cost items, + # so we want to exclude them + min_cost=0 if not batches else None, ) billing_table = self.table_factory(query.source, query.fields) records = await billing_table.get_total_cost(query) + print(records) return BillingHailBatchCostRecord( ar_guid=ar_guid, batch_ids=batches, diff --git a/db/python/tables/bq/billing_ar_batch.py b/db/python/tables/bq/billing_ar_batch.py index d9326b6b3..dfa0c6ee4 100644 --- a/db/python/tables/bq/billing_ar_batch.py +++ b/db/python/tables/bq/billing_ar_batch.py @@ -16,11 +16,15 @@ def get_table_name(self): return self.table_name async def get_batches_by_ar_guid( - self, ar_guid: str + self, ar_guid: str, limit_to_hail_batches: bool = True ) -> tuple[datetime, datetime, list[str]]: """ Get batches for given ar_guid + Only Hail batch contains batch_id, + cromwell batch does not have batch_id, only ar_guid """ + cond = 'AND batch_id IS NOT NULL' if limit_to_hail_batches else '' + _query = f""" SELECT batch_id, @@ -28,7 +32,7 @@ async def get_batches_by_ar_guid( MAX(max_day) as end_day FROM `{self.table_name}` WHERE ar_guid = @ar_guid - AND batch_id IS NOT NULL + {cond} GROUP BY batch_id ORDER BY batch_id; """ @@ -41,7 +45,11 @@ async def get_batches_by_ar_guid( if query_job_result: start_day = min((row.start_day for row in query_job_result)) end_day = max((row.end_day for row in query_job_result)) + timedelta(days=1) - return start_day, end_day, [row.batch_id for row in query_job_result] + return ( + start_day, + end_day, + [row.batch_id for row in query_job_result if row.batch_id], + ) # return empty list if no record found return None, None, [] diff --git a/db/python/tables/bq/billing_base.py b/db/python/tables/bq/billing_base.py index 335603c3b..3912da146 100644 --- a/db/python/tables/bq/billing_base.py +++ b/db/python/tables/bq/billing_base.py @@ -109,6 +109,9 @@ def _execute_query( else: job_config = bigquery.QueryJobConfig(labels=BQ_LABELS) + print(query) + print(params) + if results_as_list: return list( self._connection.connection.query(query, job_config=job_config).result() @@ -572,7 +575,7 @@ async def get_total_cost( """ # append min cost condition - if query.min_cost: + if query.min_cost is not None: _query += ' WHERE cost > @min_cost' query_parameters.append( bigquery.ScalarQueryParameter('min_cost', 'FLOAT64', query.min_cost) diff --git a/web/src/pages/billing/BillingCostByAnalysis.tsx b/web/src/pages/billing/BillingCostByAnalysis.tsx index 87e1ef153..8fcfbad17 100644 --- a/web/src/pages/billing/BillingCostByAnalysis.tsx +++ b/web/src/pages/billing/BillingCostByAnalysis.tsx @@ -6,6 +6,7 @@ import SearchIcon from '@mui/icons-material/Search' import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks' import { BillingApi, BillingTotalCostRecord } from '../../sm-api' import HailBatchGrid from './components/HailBatchGrid' +import CromwellDataProcGrid from './components/CromwellDataProcGrid' import { getMonthStartDate } from '../../shared/utilities/monthStartEndDate' import generateUrl from '../../shared/utilities/generateUrl' @@ -191,23 +192,40 @@ const BillingCostByAnalysis: React.FunctionComponent = () => {
Try these examples:
- Ar guid: f5a065d2-c51f-46b7-a920-a89b639fc4ba + Search By Ar guid: +
+ f5a065d2-c51f-46b7-a920-a89b639fc4ba (HailBatch) +
+ dfdd532e-b12b-4f3a-af96-6e1163b28941 (Cromwell) b8ccaec8-423c-4299-953e-5b24157e47cd + huge cromwell job with seq groups +
+ f1ab510c-d63c-41a9-8018-51b64806abca (Dataproc) +

Batch id: 430604, 430605

) - const gridCard = (gridData: BillingTotalCostRecord[]) => ( + const gridHailBatchCard = (gridData: BillingTotalCostRecord[]) => ( ) + const gridCromwellCard = (gridData: BillingTotalCostRecord[]) => ( + + + + ) + const dataComponent = () => { if (data !== undefined && data.costs.length > 0) { // only render grid if there are available cost data - return gridCard(data.costs) + if (data.batch_ids !== undefined && data.batch_ids.length > 0) { + return gridHailBatchCard(data.costs) + } + return gridCromwellCard(data.costs) } // if valid search text and no data return return No data message diff --git a/web/src/pages/billing/components/CromwellDataProcGrid.tsx b/web/src/pages/billing/components/CromwellDataProcGrid.tsx new file mode 100644 index 000000000..ee8716e55 --- /dev/null +++ b/web/src/pages/billing/components/CromwellDataProcGrid.tsx @@ -0,0 +1,414 @@ +import * as React from 'react' +import { Table as SUITable, Popup, Checkbox } from 'semantic-ui-react' +import _ from 'lodash' +import Table from '../../../shared/components/Table' +import sanitiseValue from '../../../shared/utilities/sanitiseValue' +import '../../project/AnalysisRunnerView/AnalysisGrid.css' + +interface Field { + category: string + title: string + width?: string + className?: string + dataMap?: (data: any, value: string) => any +} + +function setFieldValue(field: string, value: any, rec: any) { + // set field value if not undefined + if (value !== undefined) { + rec[field] = value + } +} + +function prepareTotalRow(data: any[], key: string) { + // aggregate data by key + console.log(data) + const aggData: any[] = [] + data.forEach((curr) => { + const { cost, topic, usage_start_time, usage_end_time, creator } = curr + const usageStartDate = new Date(usage_start_time) + const usageEndDate = new Date(usage_end_time) + const ar_guid = curr['ar-guid'] + const cromwell_id = curr['cromwell-workflow-id'] + const goog_pipelines_worker = curr['goog-pipelines-worker'] + const compute_category = curr['compute-category'] + // specific for datproc jobs + const dataproc_autozone = curr['goog-dataproc-autozone'] + const dataproc_name = curr['goog-dataproc-cluster-name'] + const dataproc_uuid = curr['goog-dataproc-cluster-uuid'] + const dataproc_location = curr['goog-dataproc-location'] + + const idx = aggData.findIndex((d) => d.key === curr[key]) + if (curr[key] !== undefined && cost >= 0) { + // do not include credits, should be filter out at API? + if (idx === -1) { + const rec = { + type: key, + key: curr[key], + ar_guid, + compute_category, + topic, + cost, + start_time: usageStartDate, + end_time: usageEndDate, + wdl_task_name: key === 'wdl-task-name' ? curr[key] : undefined, + cromwell_sub: key === 'cromwell-sub-workflow-name' ? curr[key] : undefined, + seq_group_id: key === 'seq_group_id' ? curr[key] : undefined, + } + + // append specific fields for dataproc jobs / cromwell jobs + setFieldValue('goog_pipelines_worker', goog_pipelines_worker, rec) + setFieldValue('cromwell_id', cromwell_id, rec) + setFieldValue('creator', creator, rec) + setFieldValue('dataproc_autozone', dataproc_autozone, rec) + setFieldValue('dataproc_name', dataproc_name, rec) + setFieldValue('dataproc_uuid', dataproc_uuid, rec) + setFieldValue('dataproc_location', dataproc_location, rec) + + // append to aggData + aggData.push(rec) + } else { + aggData[idx].cost += cost + aggData[idx].start_time = new Date( + Math.min(usageStartDate.getTime(), aggData[idx].start_time.getTime()) + ) + aggData[idx].end_time = new Date( + Math.max(usageEndDate.getTime(), aggData[idx].end_time.getTime()) + ) + } + } + }) + + return aggData +} + +function prepareDetails(data: any[], key: string) { + // aggregate data by key + const aggData: any[] = [] + data.forEach((curr) => { + const { cost, topic, sku } = curr + const ar_guid = curr['ar-guid'] + const cromwell_id = curr['cromwell-workflow-id'] + const idx = aggData.findIndex( + (d) => d.key === curr[key] && d.batch_resource === sku.description + ) + if (curr[key] !== undefined && cost >= 0) { + // do not include credits, should be filter out at API? + if (idx === -1) { + aggData.push({ + type: key, + key: curr[key], + ar_guid, + cromwell_id, + topic, + cost, + wdl_task_name: key === 'wdl-task-name' ? curr[key] : undefined, + cromwell_sub: key === 'cromwell-sub-workflow-name' ? curr[key] : undefined, + seq_group_id: key === 'seq_group_id' ? curr[key] : undefined, + batch_resource: sku.description, + }) + } else { + aggData[idx].cost += cost + } + } + }) + + return aggData +} + +const CromwellDataProcGrid: React.FunctionComponent<{ + data: any[] +}> = ({ data }) => { + // prepare aggregated row by ar_guid, wdl, sub, seq + const aggArGUIDData: any[] = prepareTotalRow(data, 'ar-guid') + const aggSubData: any[] = prepareTotalRow(data, 'cromwell-sub-workflow-name') + const aggWDLData: any[] = prepareTotalRow(data, 'wdl-task-name') + const aggSGData: any[] = prepareTotalRow(data, 'seq_group_id') + + // prepare detailed cost per sku + const aggArGUIDDetails: any[] = prepareDetails(data, 'ar-guid') + const aggSubDetails: any[] = prepareDetails(data, 'cromwell-sub-workflow-name') + const aggWDLDetails: any[] = prepareDetails(data, 'wdl-task-name') + const aggSGDetails: any[] = prepareDetails(data, 'seq_group_id') + + const aggData = [...aggArGUIDData, ...aggWDLData, ...aggSubData, ...aggSGData] + const aggResource = [...aggArGUIDDetails, ...aggSubDetails, ...aggWDLDetails, ...aggSGDetails] + + // combine data and resource for each ar_guid, wdl, sub, seq + const combinedData = aggData.map((dataItem) => { + const details = aggResource.filter( + (resourceItem) => + resourceItem.key === dataItem.key && resourceItem.type === dataItem.type + ) + return { ...dataItem, details } + }) + + const [openRows, setOpenRows] = React.useState([]) + + const handleToggle = (position: number) => { + if (!openRows.includes(position)) { + setOpenRows([...openRows, position]) + } else { + setOpenRows(openRows.filter((i) => i !== position)) + } + } + + const prepareBgColor = (log: any) => { + if ( + log.wdl_task_name === undefined && + log.cromwell_sub === undefined && + log.seq_group_id === undefined + ) { + return 'var(--color-border-color)' + } + return 'var(--color-bg)' + } + + const MAIN_FIELDS: Field[] = [ + { + category: 'job_id', + title: 'ID', + dataMap: (dataItem: any, _value: string) => { + if (dataItem.wdl_task_name !== undefined) { + return `WDL TASK: ${dataItem.wdl_task_name}` + } + if (dataItem.cromwell_sub !== undefined) { + return `CROMWELL SUB WORKFLOW : ${dataItem.cromwell_sub}` + } + if (dataItem.seq_group_id !== undefined) { + return `SEQ GROUP : ${dataItem.seq_group_id}` + } + return `AR GUID: ${dataItem.ar_guid}` + }, + }, + { + category: 'start_time', + title: 'TIME STARTED', + dataMap: (dataItem: any, value: string) => { + const dateValue = new Date(value) + return ( + + {Number.isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()} + + ) + }, + }, + { + category: 'end_time', + title: 'TIME COMPLETED', + dataMap: (dataItem: any, value: string) => { + const dateValue = new Date(value) + return ( + + {Number.isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()} + + ) + }, + }, + { + category: 'duration', + title: 'DURATION', + dataMap: (dataItem: any, _value: string) => { + const duration = new Date( + dataItem.end_time.getTime() - dataItem.start_time.getTime() + ) + const seconds = Math.floor((duration / 1000) % 60) + const minutes = Math.floor((duration / (1000 * 60)) % 60) + const hours = Math.floor((duration / (1000 * 60 * 60)) % 24) + const formattedDuration = `${hours}h ${minutes}m ${seconds}s` + return {formattedDuration} + }, + }, + { + category: 'cost', + title: 'COST', + dataMap: (dataItem: any, _value: string) => ( + ${dataItem.cost.toFixed(4)}} + position="top center" + /> + ), + }, + ] + + const DETAIL_FIELDS: Field[] = [ + { + category: 'compute_category', + title: 'COMPUTE CATEGORY', + }, + { + category: 'creator', + title: 'CREATOR', + }, + { + category: 'topic', + title: 'TOPIC', + }, + { + category: 'cromwell_id', + title: 'CROMWELL WORKFLOW ID', + }, + { + category: 'goog_pipelines_worker', + title: 'GOOGLE PIPELINES WORKER', + }, + { + category: 'dataproc_autozone', + title: 'DATAPROC AUTOZONE', + }, + { + category: 'dataproc_name', + title: 'DATAPROC CLUSTER NAME', + }, + { + category: 'dataproc_uuid', + title: 'DATAPROC CLUSTER UUID', + }, + { + category: 'dataproc_location', + title: 'DATAPROC LOCATION', + }, + ] + + const expandedRow = (log: any, idx: any) => + MAIN_FIELDS.map(({ category, dataMap, className }) => ( + + {dataMap ? dataMap(log, log[category]) : sanitiseValue(log[category])} + + )) + + return ( + + + + + {MAIN_FIELDS.map(({ category, title }, i) => ( + + {title} + + ))} + + + + {MAIN_FIELDS.map(({ category }, i) => ( + + ))} + + + + {combinedData + .sort((a, b) => { + // Sorts an array of objects on cost + if (a.cost < b.cost) { + return 1 + } + if (a.cost > b.cost) { + return -1 + } + return 0 + }) + .map((log, idx) => ( + + + + handleToggle(log.key)} + /> + + {expandedRow(log, idx)} + + {Object.entries(log) + .filter(([c]) => + DETAIL_FIELDS.map(({ category }) => category).includes(c) + ) + .map(([k, v]) => { + const detailField = DETAIL_FIELDS.find( + ({ category }) => category === k + ) + const title = detailField ? detailField.title : k + return ( + + + + {title} + + {v} + + ) + })} + + + + COST BREAKDOWN + + + {typeof log === 'object' && + 'details' in log && + _.orderBy(log?.details, ['cost'], ['desc']).map((dk) => ( + + + + {dk.batch_resource} + + ${dk.cost.toFixed(4)} + + ))} + + ))} + +
+ ) +} + +export default CromwellDataProcGrid diff --git a/web/src/pages/billing/components/HailBatchGrid.tsx b/web/src/pages/billing/components/HailBatchGrid.tsx index d3e8e199c..e7fd3a18b 100644 --- a/web/src/pages/billing/components/HailBatchGrid.tsx +++ b/web/src/pages/billing/components/HailBatchGrid.tsx @@ -49,7 +49,6 @@ const HailBatchGrid: React.FunctionComponent<{ } } }) - const aggArGUIDResource: any[] = [] data.forEach((curr) => { const { cost, batch_resource } = curr @@ -94,7 +93,7 @@ const HailBatchGrid: React.FunctionComponent<{ d.topic === topic && d.namespace === namespace ) - if (cost >= 0) { + if (batch_id !== undefined && cost >= 0) { // do not include credits, should be filter out at API? if (idx === -1) { aggBatchData.push({ @@ -135,7 +134,7 @@ const HailBatchGrid: React.FunctionComponent<{ d.topic === topic && d.namespace === namespace ) - if (cost >= 0) { + if (batch_id !== undefined && cost >= 0) { // do not include credits, should be filter out at API? if (idx === -1) { aggBatchResource.push({ @@ -169,7 +168,7 @@ const HailBatchGrid: React.FunctionComponent<{ d.topic === topic && d.namespace === namespace ) - if (cost >= 0) { + if (job_id !== undefined && cost >= 0) { if (idx === -1) { aggBatchJobData.push({ type: 'batch_id/job_id', @@ -208,7 +207,7 @@ const HailBatchGrid: React.FunctionComponent<{ d.topic === topic && d.namespace === namespace ) - if (cost >= 0) { + if (batch_id !== undefined && job_id !== undefined && cost >= 0) { if (idx === -1) { aggBatchJobResource.push({ type: 'batch_id/job_id',