Skip to content

Commit

Permalink
Merge branch 'dev' of github.com:populationgenomics/metamist into add…
Browse files Browse the repository at this point in the history
…-family-participants-to-graphql
  • Loading branch information
illusional committed Apr 23, 2024
2 parents 9f50063 + 8ee989f commit a7851ad
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 30 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ permissions:
jobs:
deploy:
runs-on: ubuntu-latest
environment: production
env:
DOCKER_BUILDKIT: 1
BUILDKIT_PROGRESS: plain
Expand All @@ -30,7 +31,7 @@ jobs:
name: "Authenticate to Google Cloud"
uses: "google-github-actions/auth@v2"
with:
workload_identity_provider: "projects/774248915715/locations/global/workloadIdentityPools/gh-deploy-pool/providers/gh-provider"
workload_identity_provider: "projects/774248915715/locations/global/workloadIdentityPools/github-pool/providers/github-provider"
service_account: "[email protected]"

- id: "google-cloud-sdk-setup"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/deploy_schema_updater.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ permissions:
jobs:
build-image:
runs-on: ubuntu-latest

environment: production
steps:
- uses: actions/checkout@v4

Expand All @@ -25,7 +25,7 @@ jobs:
name: "Authenticate to Google Cloud"
uses: "google-github-actions/auth@v2"
with:
workload_identity_provider: "projects/774248915715/locations/global/workloadIdentityPools/gh-deploy-pool/providers/gh-provider"
workload_identity_provider: "projects/774248915715/locations/global/workloadIdentityPools/github-pool/providers/github-provider"
service_account: "[email protected]"

- id: "google-cloud-sdk-setup"
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/trigger_schema_updater.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ permissions:
jobs:
invoke-cloud-run:
runs-on: ubuntu-latest
environment: production
steps:
- uses: actions/checkout@v4

Expand All @@ -33,7 +34,7 @@ jobs:
name: "Authenticate to Google Cloud"
uses: "google-github-actions/auth@v2"
with:
workload_identity_provider: "projects/774248915715/locations/global/workloadIdentityPools/gh-deploy-pool/providers/gh-provider"
workload_identity_provider: "projects/774248915715/locations/global/workloadIdentityPools/github-pool/providers/github-provider"
service_account: "[email protected]"

- id: "google-cloud-sdk-setup"
Expand Down
25 changes: 19 additions & 6 deletions db/python/layers/billing.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,18 +236,31 @@ async def get_cost_by_batch_id(
ar_batch_lookup_table = BillingArBatchTable(self.connection)

# First get all batches and the min/max day to use for the query
ar_guid = await ar_batch_lookup_table.get_ar_guid_by_batch_id(batch_id)

# The get all batches for the ar_guid
(
start_day,
end_day,
batches,
) = await ar_batch_lookup_table.get_batches_by_ar_guid(ar_guid)
ar_guid,
) = await ar_batch_lookup_table.get_ar_guid_by_batch_id(batch_id)

if not batches:
if ar_guid is None:
return []

if ar_guid != batch_id:
# found ar_guid for the batch_id
# The get all batches for the ar_guid
(
start_day,
end_day,
batches,
) = await ar_batch_lookup_table.get_batches_by_ar_guid(ar_guid)

if not batches:
return []
else:
# ar_guid is not present, so use the batch_id
batches = [batch_id]
ar_guid = None

billing_table = BillingDailyExtendedTable(self.connection)
results = await billing_table.get_batch_cost_summary(
start_day, end_day, batches, ar_guid
Expand Down
34 changes: 26 additions & 8 deletions db/python/tables/bq/billing_ar_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_table_name(self):

async def get_batches_by_ar_guid(
self, ar_guid: str
) -> tuple[datetime, datetime, list[str]]:
) -> tuple[datetime | None, datetime | None, list[str]]:
"""
Get batches for given ar_guid
"""
Expand Down Expand Up @@ -46,24 +46,42 @@ async def get_batches_by_ar_guid(
# return empty list if no record found
return None, None, []

async def get_ar_guid_by_batch_id(self, batch_id: str) -> str:
async def get_ar_guid_by_batch_id(
self, batch_id: str | None
) -> tuple[datetime | None, datetime | None, str | None]:
"""
Get ar_guid for given batch_id
Get ar_guid for given batch_id,
if batch_id is found, but not ar_guid, then return batch_id back
"""
if batch_id is None:
return None, None, None

_query = f"""
SELECT ar_guid
SELECT ar_guid,
MIN(min_day) as start_day,
MAX(max_day) as end_day
FROM `{self.table_name}`
WHERE batch_id = @batch_id
AND ar_guid IS NOT NULL
LIMIT 1;
GROUP BY ar_guid
ORDER BY ar_guid DESC; -- make NULL values appear last
"""

query_parameters = [
bigquery.ScalarQueryParameter('batch_id', 'STRING', batch_id),
]
query_job_result = self._execute_query(_query, query_parameters)
if query_job_result:
return query_job_result[0]['ar_guid']
if len(query_job_result) > 1 and query_job_result[1]['ar_guid'] is not None:
raise ValueError(f'Multiple ARs found for batch_id: {batch_id}')

ar_guid = query_job_result[0]['ar_guid']
start_day = query_job_result[0]['start_day']
end_day = query_job_result[0]['end_day'] + timedelta(days=1)
if ar_guid:
return start_day, end_day, ar_guid

# if ar_guid not found but batch_id exists
return start_day, end_day, batch_id

# return None if no ar_guid found
return None
return None, None, None
8 changes: 4 additions & 4 deletions db/python/tables/bq/billing_daily_extended.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ async def get_batch_cost_summary(
sum(d.cost) AS cost,
MIN(d.usage_start_time) AS usage_start_time,
max(d.usage_end_time) AS usage_end_time,
COUNT(DISTINCT d.job_id) as jobs_cnt
MAX(d.job_id) as jobs_cnt
FROM d
WHERE d.batch_id IS NOT NULL
GROUP BY batch_id, batch_name
Expand Down Expand Up @@ -253,7 +253,7 @@ async def get_batch_cost_summary(
sum(d.cost) AS cost,
MIN(d.usage_start_time) AS usage_start_time,
max(d.usage_end_time) AS usage_end_time,
COUNT(DISTINCT d.job_id) as jobs_cnt
MAX(d.job_id) as jobs_cnt
FROM d
WHERE d.wdl_task_name IS NOT NULL
GROUP BY wdl_task_name
Expand Down Expand Up @@ -283,7 +283,7 @@ async def get_batch_cost_summary(
sum(d.cost) AS cost,
MIN(d.usage_start_time) AS usage_start_time,
max(d.usage_end_time) AS usage_end_time,
COUNT(DISTINCT d.job_id) as jobs_cnt
MAX(d.job_id) as jobs_cnt
FROM d
WHERE d.cromwell_workflow_id IS NOT NULL
GROUP BY cromwell_workflow_id
Expand Down Expand Up @@ -313,7 +313,7 @@ async def get_batch_cost_summary(
sum(d.cost) AS cost,
MIN(d.usage_start_time) AS usage_start_time,
max(d.usage_end_time) AS usage_end_time,
COUNT(DISTINCT d.job_id) as jobs_cnt
MAX(d.job_id) as jobs_cnt
FROM d
WHERE d.cromwell_sub_workflow_name IS NOT NULL
GROUP BY cromwell_sub_workflow_name
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# fix boto3/core version to prevent pip downloading all the versions caused by cloudpathlib
boto3==1.28.56
botocore==1.31.56
cpg-utils
cpg-utils>=5.0.5
aiohttp
async_lru
cloudpathlib
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
'requests',
'typing-extensions',
# for get id-token
'cpg-utils >= 4.9.4',
'cpg-utils >= 5.0.5',
'gql[aiohttp,requests]',
'tabulate >= 0.9.0'
],
Expand Down
12 changes: 9 additions & 3 deletions test/test_bq_billing_ar_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ async def test_get_ar_guid_by_batch_id_no_data(self):
self.bq_result.result.return_value = []

# test get_ar_guid_by_batch_id function
ar_guid = await self.table_obj.get_ar_guid_by_batch_id(batch_id)
_, _, ar_guid = await self.table_obj.get_ar_guid_by_batch_id(batch_id)

self.assertEqual(None, ar_guid)

Expand All @@ -180,9 +180,15 @@ async def test_get_ar_guid_by_batch_id_one_rec(self):
expected_ar_guid = 'AR_GUID_1234'

# mock BigQuery result
self.bq_result.result.return_value = [{'ar_guid': expected_ar_guid}]
self.bq_result.result.return_value = [
{
'ar_guid': expected_ar_guid,
'start_day': datetime.datetime(2024, 1, 1, 0, 0),
'end_day': datetime.datetime(2024, 1, 2, 0, 0),
}
]

# test get_ar_guid_by_batch_id function
ar_guid = await self.table_obj.get_ar_guid_by_batch_id(batch_id)
_, _, ar_guid = await self.table_obj.get_ar_guid_by_batch_id(batch_id)

self.assertEqual(expected_ar_guid, ar_guid)
2 changes: 0 additions & 2 deletions test/test_layers_billing.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,8 +448,6 @@ async def test_get_cost_by_batch_id(self):
batch_id=dummy_batch_id,
start_day=given_start_day,
end_day=given_end_day,
# mockup __getitem__ to return dummy_ar_guid
__getitem__=mock.MagicMock(return_value=dummy_ar_guid),
),
]
self.bq_result.result.return_value = mock_rows
Expand Down
2 changes: 1 addition & 1 deletion web/src/pages/billing/components/BatchGrid.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ const BatchCard: React.FC<{ item: AnalysisCostRecordBatch }> = ({ item }) => {

<DisplayRow label="Cost">
{formatMoney(item.cost, 4)}{' '}
{item.jobs?.length > 0 && <em>- across {item.jobs.length} job(s)</em>}
{(item?.jobs_cnt || 0) > 0 && <em>- across {item.jobs_cnt} job(s)</em>}
</DisplayRow>

<DisplayRow label="Start" isVisible={isOpen}>
Expand Down

0 comments on commit a7851ad

Please sign in to comment.