diff --git a/db/python/layers/billing.py b/db/python/layers/billing.py
index 737be6857..e9fd83042 100644
--- a/db/python/layers/billing.py
+++ b/db/python/layers/billing.py
@@ -213,9 +213,12 @@ async def get_cost_by_ar_guid(
start_day,
end_day,
batches,
- ) = await ar_batch_lookup_table.get_batches_by_ar_guid(ar_guid)
+ ) = await ar_batch_lookup_table.get_batches_by_ar_guid(
+ ar_guid, limit_to_hail_batches=False
+ )
- if not batches:
+ if not start_day:
+ # ar-guid job is not found
return BillingHailBatchCostRecord(
ar_guid=ar_guid,
batch_ids=[],
@@ -232,7 +235,6 @@ async def get_cost_by_ar_guid(
end_date=end_day.strftime('%Y-%m-%d'),
filters={
BillingColumn.LABELS: {
- 'batch_id': batches,
'ar-guid': ar_guid,
}
},
@@ -240,10 +242,14 @@ async def get_cost_by_ar_guid(
group_by=False,
time_column=BillingTimeColumn.USAGE_END_TIME,
time_periods=BillingTimePeriods.DAY,
+ # cromwell jobs specifically can have a lot of 0 cost items,
+ # so we want to exclude them
+ min_cost=0 if not batches else None,
)
billing_table = self.table_factory(query.source, query.fields)
records = await billing_table.get_total_cost(query)
+ print(records)
return BillingHailBatchCostRecord(
ar_guid=ar_guid,
batch_ids=batches,
diff --git a/db/python/tables/bq/billing_ar_batch.py b/db/python/tables/bq/billing_ar_batch.py
index d9326b6b3..dfa0c6ee4 100644
--- a/db/python/tables/bq/billing_ar_batch.py
+++ b/db/python/tables/bq/billing_ar_batch.py
@@ -16,11 +16,15 @@ def get_table_name(self):
return self.table_name
async def get_batches_by_ar_guid(
- self, ar_guid: str
+ self, ar_guid: str, limit_to_hail_batches: bool = True
) -> tuple[datetime, datetime, list[str]]:
"""
Get batches for given ar_guid
+ Only Hail batch contains batch_id,
+ cromwell batch does not have batch_id, only ar_guid
"""
+ cond = 'AND batch_id IS NOT NULL' if limit_to_hail_batches else ''
+
_query = f"""
SELECT
batch_id,
@@ -28,7 +32,7 @@ async def get_batches_by_ar_guid(
MAX(max_day) as end_day
FROM `{self.table_name}`
WHERE ar_guid = @ar_guid
- AND batch_id IS NOT NULL
+ {cond}
GROUP BY batch_id
ORDER BY batch_id;
"""
@@ -41,7 +45,11 @@ async def get_batches_by_ar_guid(
if query_job_result:
start_day = min((row.start_day for row in query_job_result))
end_day = max((row.end_day for row in query_job_result)) + timedelta(days=1)
- return start_day, end_day, [row.batch_id for row in query_job_result]
+ return (
+ start_day,
+ end_day,
+ [row.batch_id for row in query_job_result if row.batch_id],
+ )
# return empty list if no record found
return None, None, []
diff --git a/db/python/tables/bq/billing_base.py b/db/python/tables/bq/billing_base.py
index 335603c3b..3912da146 100644
--- a/db/python/tables/bq/billing_base.py
+++ b/db/python/tables/bq/billing_base.py
@@ -109,6 +109,9 @@ def _execute_query(
else:
job_config = bigquery.QueryJobConfig(labels=BQ_LABELS)
+ print(query)
+ print(params)
+
if results_as_list:
return list(
self._connection.connection.query(query, job_config=job_config).result()
@@ -572,7 +575,7 @@ async def get_total_cost(
"""
# append min cost condition
- if query.min_cost:
+ if query.min_cost is not None:
_query += ' WHERE cost > @min_cost'
query_parameters.append(
bigquery.ScalarQueryParameter('min_cost', 'FLOAT64', query.min_cost)
diff --git a/web/src/pages/billing/BillingCostByAnalysis.tsx b/web/src/pages/billing/BillingCostByAnalysis.tsx
index 87e1ef153..8fcfbad17 100644
--- a/web/src/pages/billing/BillingCostByAnalysis.tsx
+++ b/web/src/pages/billing/BillingCostByAnalysis.tsx
@@ -6,6 +6,7 @@ import SearchIcon from '@mui/icons-material/Search'
import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks'
import { BillingApi, BillingTotalCostRecord } from '../../sm-api'
import HailBatchGrid from './components/HailBatchGrid'
+import CromwellDataProcGrid from './components/CromwellDataProcGrid'
import { getMonthStartDate } from '../../shared/utilities/monthStartEndDate'
import generateUrl from '../../shared/utilities/generateUrl'
@@ -191,23 +192,40 @@ const BillingCostByAnalysis: React.FunctionComponent = () => {
Try these examples:
- Ar guid: f5a065d2-c51f-46b7-a920-a89b639fc4ba
+ Search By Ar guid:
+
+ f5a065d2-c51f-46b7-a920-a89b639fc4ba (HailBatch)
+
+ dfdd532e-b12b-4f3a-af96-6e1163b28941 (Cromwell) b8ccaec8-423c-4299-953e-5b24157e47cd
+ huge cromwell job with seq groups
+
+ f1ab510c-d63c-41a9-8018-51b64806abca (Dataproc)
+
Batch id: 430604, 430605
)
- const gridCard = (gridData: BillingTotalCostRecord[]) => (
+ const gridHailBatchCard = (gridData: BillingTotalCostRecord[]) => (
)
+ const gridCromwellCard = (gridData: BillingTotalCostRecord[]) => (
+
+
+
+ )
+
const dataComponent = () => {
if (data !== undefined && data.costs.length > 0) {
// only render grid if there are available cost data
- return gridCard(data.costs)
+ if (data.batch_ids !== undefined && data.batch_ids.length > 0) {
+ return gridHailBatchCard(data.costs)
+ }
+ return gridCromwellCard(data.costs)
}
// if valid search text and no data return return No data message
diff --git a/web/src/pages/billing/components/CromwellDataProcGrid.tsx b/web/src/pages/billing/components/CromwellDataProcGrid.tsx
new file mode 100644
index 000000000..ee8716e55
--- /dev/null
+++ b/web/src/pages/billing/components/CromwellDataProcGrid.tsx
@@ -0,0 +1,414 @@
+import * as React from 'react'
+import { Table as SUITable, Popup, Checkbox } from 'semantic-ui-react'
+import _ from 'lodash'
+import Table from '../../../shared/components/Table'
+import sanitiseValue from '../../../shared/utilities/sanitiseValue'
+import '../../project/AnalysisRunnerView/AnalysisGrid.css'
+
+interface Field {
+ category: string
+ title: string
+ width?: string
+ className?: string
+ dataMap?: (data: any, value: string) => any
+}
+
+function setFieldValue(field: string, value: any, rec: any) {
+ // set field value if not undefined
+ if (value !== undefined) {
+ rec[field] = value
+ }
+}
+
+function prepareTotalRow(data: any[], key: string) {
+ // aggregate data by key
+ console.log(data)
+ const aggData: any[] = []
+ data.forEach((curr) => {
+ const { cost, topic, usage_start_time, usage_end_time, creator } = curr
+ const usageStartDate = new Date(usage_start_time)
+ const usageEndDate = new Date(usage_end_time)
+ const ar_guid = curr['ar-guid']
+ const cromwell_id = curr['cromwell-workflow-id']
+ const goog_pipelines_worker = curr['goog-pipelines-worker']
+ const compute_category = curr['compute-category']
+ // specific for datproc jobs
+ const dataproc_autozone = curr['goog-dataproc-autozone']
+ const dataproc_name = curr['goog-dataproc-cluster-name']
+ const dataproc_uuid = curr['goog-dataproc-cluster-uuid']
+ const dataproc_location = curr['goog-dataproc-location']
+
+ const idx = aggData.findIndex((d) => d.key === curr[key])
+ if (curr[key] !== undefined && cost >= 0) {
+ // do not include credits, should be filter out at API?
+ if (idx === -1) {
+ const rec = {
+ type: key,
+ key: curr[key],
+ ar_guid,
+ compute_category,
+ topic,
+ cost,
+ start_time: usageStartDate,
+ end_time: usageEndDate,
+ wdl_task_name: key === 'wdl-task-name' ? curr[key] : undefined,
+ cromwell_sub: key === 'cromwell-sub-workflow-name' ? curr[key] : undefined,
+ seq_group_id: key === 'seq_group_id' ? curr[key] : undefined,
+ }
+
+ // append specific fields for dataproc jobs / cromwell jobs
+ setFieldValue('goog_pipelines_worker', goog_pipelines_worker, rec)
+ setFieldValue('cromwell_id', cromwell_id, rec)
+ setFieldValue('creator', creator, rec)
+ setFieldValue('dataproc_autozone', dataproc_autozone, rec)
+ setFieldValue('dataproc_name', dataproc_name, rec)
+ setFieldValue('dataproc_uuid', dataproc_uuid, rec)
+ setFieldValue('dataproc_location', dataproc_location, rec)
+
+ // append to aggData
+ aggData.push(rec)
+ } else {
+ aggData[idx].cost += cost
+ aggData[idx].start_time = new Date(
+ Math.min(usageStartDate.getTime(), aggData[idx].start_time.getTime())
+ )
+ aggData[idx].end_time = new Date(
+ Math.max(usageEndDate.getTime(), aggData[idx].end_time.getTime())
+ )
+ }
+ }
+ })
+
+ return aggData
+}
+
+function prepareDetails(data: any[], key: string) {
+ // aggregate data by key
+ const aggData: any[] = []
+ data.forEach((curr) => {
+ const { cost, topic, sku } = curr
+ const ar_guid = curr['ar-guid']
+ const cromwell_id = curr['cromwell-workflow-id']
+ const idx = aggData.findIndex(
+ (d) => d.key === curr[key] && d.batch_resource === sku.description
+ )
+ if (curr[key] !== undefined && cost >= 0) {
+ // do not include credits, should be filter out at API?
+ if (idx === -1) {
+ aggData.push({
+ type: key,
+ key: curr[key],
+ ar_guid,
+ cromwell_id,
+ topic,
+ cost,
+ wdl_task_name: key === 'wdl-task-name' ? curr[key] : undefined,
+ cromwell_sub: key === 'cromwell-sub-workflow-name' ? curr[key] : undefined,
+ seq_group_id: key === 'seq_group_id' ? curr[key] : undefined,
+ batch_resource: sku.description,
+ })
+ } else {
+ aggData[idx].cost += cost
+ }
+ }
+ })
+
+ return aggData
+}
+
+const CromwellDataProcGrid: React.FunctionComponent<{
+ data: any[]
+}> = ({ data }) => {
+ // prepare aggregated row by ar_guid, wdl, sub, seq
+ const aggArGUIDData: any[] = prepareTotalRow(data, 'ar-guid')
+ const aggSubData: any[] = prepareTotalRow(data, 'cromwell-sub-workflow-name')
+ const aggWDLData: any[] = prepareTotalRow(data, 'wdl-task-name')
+ const aggSGData: any[] = prepareTotalRow(data, 'seq_group_id')
+
+ // prepare detailed cost per sku
+ const aggArGUIDDetails: any[] = prepareDetails(data, 'ar-guid')
+ const aggSubDetails: any[] = prepareDetails(data, 'cromwell-sub-workflow-name')
+ const aggWDLDetails: any[] = prepareDetails(data, 'wdl-task-name')
+ const aggSGDetails: any[] = prepareDetails(data, 'seq_group_id')
+
+ const aggData = [...aggArGUIDData, ...aggWDLData, ...aggSubData, ...aggSGData]
+ const aggResource = [...aggArGUIDDetails, ...aggSubDetails, ...aggWDLDetails, ...aggSGDetails]
+
+ // combine data and resource for each ar_guid, wdl, sub, seq
+ const combinedData = aggData.map((dataItem) => {
+ const details = aggResource.filter(
+ (resourceItem) =>
+ resourceItem.key === dataItem.key && resourceItem.type === dataItem.type
+ )
+ return { ...dataItem, details }
+ })
+
+ const [openRows, setOpenRows] = React.useState([])
+
+ const handleToggle = (position: number) => {
+ if (!openRows.includes(position)) {
+ setOpenRows([...openRows, position])
+ } else {
+ setOpenRows(openRows.filter((i) => i !== position))
+ }
+ }
+
+ const prepareBgColor = (log: any) => {
+ if (
+ log.wdl_task_name === undefined &&
+ log.cromwell_sub === undefined &&
+ log.seq_group_id === undefined
+ ) {
+ return 'var(--color-border-color)'
+ }
+ return 'var(--color-bg)'
+ }
+
+ const MAIN_FIELDS: Field[] = [
+ {
+ category: 'job_id',
+ title: 'ID',
+ dataMap: (dataItem: any, _value: string) => {
+ if (dataItem.wdl_task_name !== undefined) {
+ return `WDL TASK: ${dataItem.wdl_task_name}`
+ }
+ if (dataItem.cromwell_sub !== undefined) {
+ return `CROMWELL SUB WORKFLOW : ${dataItem.cromwell_sub}`
+ }
+ if (dataItem.seq_group_id !== undefined) {
+ return `SEQ GROUP : ${dataItem.seq_group_id}`
+ }
+ return `AR GUID: ${dataItem.ar_guid}`
+ },
+ },
+ {
+ category: 'start_time',
+ title: 'TIME STARTED',
+ dataMap: (dataItem: any, value: string) => {
+ const dateValue = new Date(value)
+ return (
+
+ {Number.isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()}
+
+ )
+ },
+ },
+ {
+ category: 'end_time',
+ title: 'TIME COMPLETED',
+ dataMap: (dataItem: any, value: string) => {
+ const dateValue = new Date(value)
+ return (
+
+ {Number.isNaN(dateValue.getTime()) ? '' : dateValue.toLocaleString()}
+
+ )
+ },
+ },
+ {
+ category: 'duration',
+ title: 'DURATION',
+ dataMap: (dataItem: any, _value: string) => {
+ const duration = new Date(
+ dataItem.end_time.getTime() - dataItem.start_time.getTime()
+ )
+ const seconds = Math.floor((duration / 1000) % 60)
+ const minutes = Math.floor((duration / (1000 * 60)) % 60)
+ const hours = Math.floor((duration / (1000 * 60 * 60)) % 24)
+ const formattedDuration = `${hours}h ${minutes}m ${seconds}s`
+ return {formattedDuration}
+ },
+ },
+ {
+ category: 'cost',
+ title: 'COST',
+ dataMap: (dataItem: any, _value: string) => (
+ ${dataItem.cost.toFixed(4)}}
+ position="top center"
+ />
+ ),
+ },
+ ]
+
+ const DETAIL_FIELDS: Field[] = [
+ {
+ category: 'compute_category',
+ title: 'COMPUTE CATEGORY',
+ },
+ {
+ category: 'creator',
+ title: 'CREATOR',
+ },
+ {
+ category: 'topic',
+ title: 'TOPIC',
+ },
+ {
+ category: 'cromwell_id',
+ title: 'CROMWELL WORKFLOW ID',
+ },
+ {
+ category: 'goog_pipelines_worker',
+ title: 'GOOGLE PIPELINES WORKER',
+ },
+ {
+ category: 'dataproc_autozone',
+ title: 'DATAPROC AUTOZONE',
+ },
+ {
+ category: 'dataproc_name',
+ title: 'DATAPROC CLUSTER NAME',
+ },
+ {
+ category: 'dataproc_uuid',
+ title: 'DATAPROC CLUSTER UUID',
+ },
+ {
+ category: 'dataproc_location',
+ title: 'DATAPROC LOCATION',
+ },
+ ]
+
+ const expandedRow = (log: any, idx: any) =>
+ MAIN_FIELDS.map(({ category, dataMap, className }) => (
+
+ {dataMap ? dataMap(log, log[category]) : sanitiseValue(log[category])}
+
+ ))
+
+ return (
+
+
+
+
+ {MAIN_FIELDS.map(({ category, title }, i) => (
+
+ {title}
+
+ ))}
+
+
+
+ {MAIN_FIELDS.map(({ category }, i) => (
+
+ ))}
+
+
+
+ {combinedData
+ .sort((a, b) => {
+ // Sorts an array of objects on cost
+ if (a.cost < b.cost) {
+ return 1
+ }
+ if (a.cost > b.cost) {
+ return -1
+ }
+ return 0
+ })
+ .map((log, idx) => (
+
+
+
+ handleToggle(log.key)}
+ />
+
+ {expandedRow(log, idx)}
+
+ {Object.entries(log)
+ .filter(([c]) =>
+ DETAIL_FIELDS.map(({ category }) => category).includes(c)
+ )
+ .map(([k, v]) => {
+ const detailField = DETAIL_FIELDS.find(
+ ({ category }) => category === k
+ )
+ const title = detailField ? detailField.title : k
+ return (
+
+
+
+ {title}
+
+ {v}
+
+ )
+ })}
+
+
+
+ COST BREAKDOWN
+
+
+ {typeof log === 'object' &&
+ 'details' in log &&
+ _.orderBy(log?.details, ['cost'], ['desc']).map((dk) => (
+
+
+
+ {dk.batch_resource}
+
+ ${dk.cost.toFixed(4)}
+
+ ))}
+
+ ))}
+
+
+ )
+}
+
+export default CromwellDataProcGrid
diff --git a/web/src/pages/billing/components/HailBatchGrid.tsx b/web/src/pages/billing/components/HailBatchGrid.tsx
index d3e8e199c..e7fd3a18b 100644
--- a/web/src/pages/billing/components/HailBatchGrid.tsx
+++ b/web/src/pages/billing/components/HailBatchGrid.tsx
@@ -49,7 +49,6 @@ const HailBatchGrid: React.FunctionComponent<{
}
}
})
-
const aggArGUIDResource: any[] = []
data.forEach((curr) => {
const { cost, batch_resource } = curr
@@ -94,7 +93,7 @@ const HailBatchGrid: React.FunctionComponent<{
d.topic === topic &&
d.namespace === namespace
)
- if (cost >= 0) {
+ if (batch_id !== undefined && cost >= 0) {
// do not include credits, should be filter out at API?
if (idx === -1) {
aggBatchData.push({
@@ -135,7 +134,7 @@ const HailBatchGrid: React.FunctionComponent<{
d.topic === topic &&
d.namespace === namespace
)
- if (cost >= 0) {
+ if (batch_id !== undefined && cost >= 0) {
// do not include credits, should be filter out at API?
if (idx === -1) {
aggBatchResource.push({
@@ -169,7 +168,7 @@ const HailBatchGrid: React.FunctionComponent<{
d.topic === topic &&
d.namespace === namespace
)
- if (cost >= 0) {
+ if (job_id !== undefined && cost >= 0) {
if (idx === -1) {
aggBatchJobData.push({
type: 'batch_id/job_id',
@@ -208,7 +207,7 @@ const HailBatchGrid: React.FunctionComponent<{
d.topic === topic &&
d.namespace === namespace
)
- if (cost >= 0) {
+ if (batch_id !== undefined && job_id !== undefined && cost >= 0) {
if (idx === -1) {
aggBatchJobResource.push({
type: 'batch_id/job_id',