Skip to content

Commit

Permalink
Show billing for Cromwell and Dataproc jobs.
Browse files Browse the repository at this point in the history
  • Loading branch information
milo-hyben committed Feb 1, 2024
1 parent 6134af9 commit 4f34f3a
Show file tree
Hide file tree
Showing 6 changed files with 463 additions and 15 deletions.
12 changes: 9 additions & 3 deletions db/python/layers/billing.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,12 @@ async def get_cost_by_ar_guid(
start_day,
end_day,
batches,
) = await ar_batch_lookup_table.get_batches_by_ar_guid(ar_guid)
) = await ar_batch_lookup_table.get_batches_by_ar_guid(
ar_guid, limit_to_hail_batches=False
)

if not batches:
if not start_day:
# ar-guid job is not found
return BillingHailBatchCostRecord(
ar_guid=ar_guid,
batch_ids=[],
Expand All @@ -232,18 +235,21 @@ async def get_cost_by_ar_guid(
end_date=end_day.strftime('%Y-%m-%d'),
filters={
BillingColumn.LABELS: {
'batch_id': batches,
'ar-guid': ar_guid,
}
},
filters_op='OR',
group_by=False,
time_column=BillingTimeColumn.USAGE_END_TIME,
time_periods=BillingTimePeriods.DAY,
# cromwell jobs specifically can have a lot of 0 cost items,
# so we want to exclude them
min_cost=0 if not batches else None,
)

billing_table = self.table_factory(query.source, query.fields)
records = await billing_table.get_total_cost(query)
print(records)
return BillingHailBatchCostRecord(
ar_guid=ar_guid,
batch_ids=batches,
Expand Down
14 changes: 11 additions & 3 deletions db/python/tables/bq/billing_ar_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,23 @@ def get_table_name(self):
return self.table_name

async def get_batches_by_ar_guid(
self, ar_guid: str
self, ar_guid: str, limit_to_hail_batches: bool = True
) -> tuple[datetime, datetime, list[str]]:
"""
Get batches for given ar_guid
Only Hail batch contains batch_id,
cromwell batch does not have batch_id, only ar_guid
"""
cond = 'AND batch_id IS NOT NULL' if limit_to_hail_batches else ''

_query = f"""
SELECT
batch_id,
MIN(min_day) as start_day,
MAX(max_day) as end_day
FROM `{self.table_name}`
WHERE ar_guid = @ar_guid
AND batch_id IS NOT NULL
{cond}
GROUP BY batch_id
ORDER BY batch_id;
"""
Expand All @@ -41,7 +45,11 @@ async def get_batches_by_ar_guid(
if query_job_result:
start_day = min((row.start_day for row in query_job_result))
end_day = max((row.end_day for row in query_job_result)) + timedelta(days=1)
return start_day, end_day, [row.batch_id for row in query_job_result]
return (
start_day,
end_day,
[row.batch_id for row in query_job_result if row.batch_id],
)

# return empty list if no record found
return None, None, []
Expand Down
5 changes: 4 additions & 1 deletion db/python/tables/bq/billing_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ def _execute_query(
else:
job_config = bigquery.QueryJobConfig(labels=BQ_LABELS)

print(query)
print(params)

if results_as_list:
return list(
self._connection.connection.query(query, job_config=job_config).result()
Expand Down Expand Up @@ -572,7 +575,7 @@ async def get_total_cost(
"""

# append min cost condition
if query.min_cost:
if query.min_cost is not None:
_query += ' WHERE cost > @min_cost'
query_parameters.append(
bigquery.ScalarQueryParameter('min_cost', 'FLOAT64', query.min_cost)
Expand Down
24 changes: 21 additions & 3 deletions web/src/pages/billing/BillingCostByAnalysis.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import SearchIcon from '@mui/icons-material/Search'
import LoadingDucks from '../../shared/components/LoadingDucks/LoadingDucks'
import { BillingApi, BillingTotalCostRecord } from '../../sm-api'
import HailBatchGrid from './components/HailBatchGrid'
import CromwellDataProcGrid from './components/CromwellDataProcGrid'
import { getMonthStartDate } from '../../shared/utilities/monthStartEndDate'
import generateUrl from '../../shared/utilities/generateUrl'

Expand Down Expand Up @@ -191,23 +192,40 @@ const BillingCostByAnalysis: React.FunctionComponent = () => {
<br />
Try these examples:
<br />
Ar guid: f5a065d2-c51f-46b7-a920-a89b639fc4ba
Search By Ar guid:
<br />
f5a065d2-c51f-46b7-a920-a89b639fc4ba (HailBatch)
<br />
dfdd532e-b12b-4f3a-af96-6e1163b28941 (Cromwell) b8ccaec8-423c-4299-953e-5b24157e47cd
huge cromwell job with seq groups
<br />
f1ab510c-d63c-41a9-8018-51b64806abca (Dataproc)
<br />
<br />
Batch id: 430604, 430605
</p>
</Card>
)

const gridCard = (gridData: BillingTotalCostRecord[]) => (
const gridHailBatchCard = (gridData: BillingTotalCostRecord[]) => (
<Card fluid style={{ padding: '20px', overflowX: 'scroll' }} id="billing-container-data">
<HailBatchGrid data={gridData} />
</Card>
)

const gridCromwellCard = (gridData: BillingTotalCostRecord[]) => (
<Card fluid style={{ padding: '20px', overflowX: 'scroll' }} id="billing-container-data">
<CromwellDataProcGrid data={gridData} />
</Card>
)

const dataComponent = () => {
if (data !== undefined && data.costs.length > 0) {
// only render grid if there are available cost data
return gridCard(data.costs)
if (data.batch_ids !== undefined && data.batch_ids.length > 0) {
return gridHailBatchCard(data.costs)
}
return gridCromwellCard(data.costs)
}

// if valid search text and no data return return No data message
Expand Down
Loading

0 comments on commit 4f34f3a

Please sign in to comment.