Skip to content

Commit

Permalink
Applied changes as suggested by review.
Browse files Browse the repository at this point in the history
  • Loading branch information
milo-hyben committed Jan 24, 2024
1 parent efc09eb commit bab64f9
Show file tree
Hide file tree
Showing 11 changed files with 148 additions and 112 deletions.
107 changes: 62 additions & 45 deletions db/python/tables/bq/billing_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from models.models import (
BillingColumn,
BillingCostBudgetRecord,
BillingCostDetailsRecord,
BillingTimePeriods,
BillingTotalCostQueryModel,
)
Expand All @@ -29,46 +30,60 @@
'TimeGroupingDetails', ['field', 'formula', 'separator']
)

# constants to abbrevate (S)tores and (C)ompute
STORAGE = 'S'
COMPUTE = 'C'


def abbrev_cost_category(cost_category: str) -> str:
"""abbreviate cost category"""
return 'S' if cost_category == 'Cloud Storage' else 'C'
return STORAGE if cost_category == 'Cloud Storage' else COMPUTE


def prepare_time_periods(
query: BillingTotalCostQueryModel,
) -> TimeGroupingDetails:
"""Prepare Time periods grouping and parsing formulas"""
time_column = query.time_column or 'day'
result = TimeGroupingDetails('', '', '')
time_column = query.time_column or BillingTimePeriods.DAY

# Based on specified time period, add the corresponding column
if query.time_periods == BillingTimePeriods.DAY:
result = TimeGroupingDetails(
return TimeGroupingDetails(
field=f'FORMAT_DATE("%Y-%m-%d", {time_column}) as day',
formula='PARSE_DATE("%Y-%m-%d", day) as day',
separator=',',
)
elif query.time_periods == BillingTimePeriods.WEEK:
result = TimeGroupingDetails(

if query.time_periods == BillingTimePeriods.WEEK:
return TimeGroupingDetails(
field=f'FORMAT_DATE("%Y%W", {time_column}) as day',
formula='PARSE_DATE("%Y%W", day) as day',
separator=',',
)
elif query.time_periods == BillingTimePeriods.MONTH:
result = TimeGroupingDetails(

if query.time_periods == BillingTimePeriods.MONTH:
return TimeGroupingDetails(
field=f'FORMAT_DATE("%Y%m", {time_column}) as day',
formula='PARSE_DATE("%Y%m", day) as day',
separator=',',
)
elif query.time_periods == BillingTimePeriods.INVOICE_MONTH:
result = TimeGroupingDetails(

if query.time_periods == BillingTimePeriods.INVOICE_MONTH:
return TimeGroupingDetails(
field='invoice_month as day',
formula='PARSE_DATE("%Y%m", day) as day',
separator=',',
)

return result
return TimeGroupingDetails('', '', '')


def time_optimisation_parameter() -> bigquery.ScalarQueryParameter:
"""
BQ tables and views are partitioned by day, to avoid full scans
we need to limit the amount of data scanned
"""
return bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL))


class BillingBaseTable(BqDbBase):
Expand Down Expand Up @@ -165,7 +180,7 @@ async def _budgets_by_gcp_project(
WITH t AS (
SELECT gcp_project, MAX(created_at) as last_created_at
FROM `{BQ_BUDGET_VIEW}`
GROUP BY 1
GROUP BY gcp_project
)
SELECT t.gcp_project, d.budget
FROM t inner join `{BQ_BUDGET_VIEW}` d
Expand Down Expand Up @@ -193,7 +208,7 @@ async def _last_loaded_day(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down Expand Up @@ -328,38 +343,36 @@ async def _append_total_running_cost(
all_details = []
for cat, mth_cost in total_monthly_category.items():
all_details.append(
{
'cost_group': abbrev_cost_category(cat),
'cost_category': cat,
'daily_cost': total_daily_category[cat]
if is_current_month
else None,
'monthly_cost': mth_cost,
}
BillingCostDetailsRecord(
cost_group=abbrev_cost_category(cat),
cost_category=cat,
daily_cost=total_daily_category[cat] if is_current_month else None,
monthly_cost=mth_cost,
)
)

# add total row: compute + storage
results.append(
BillingCostBudgetRecord.from_json(
{
'field': f'{BillingColumn.generate_all_title(field)}',
'total_monthly': (
total_monthly['C']['ALL'] + total_monthly['S']['ALL']
),
'total_daily': (total_daily['C']['ALL'] + total_daily['S']['ALL'])
if is_current_month
else None,
'compute_monthly': total_monthly['C']['ALL'],
'compute_daily': (total_daily['C']['ALL'])
if is_current_month
else None,
'storage_monthly': total_monthly['S']['ALL'],
'storage_daily': (total_daily['S']['ALL'])
if is_current_month
else None,
'details': all_details,
'last_loaded_day': last_loaded_day,
}
BillingCostBudgetRecord(
field=f'{BillingColumn.generate_all_title(field)}',
total_monthly=(
total_monthly[COMPUTE]['ALL'] + total_monthly[STORAGE]['ALL']
),
total_daily=(total_daily[COMPUTE]['ALL'] + total_daily[STORAGE]['ALL'])
if is_current_month
else None,
compute_monthly=total_monthly[COMPUTE]['ALL'],
compute_daily=(total_daily[COMPUTE]['ALL'])
if is_current_month
else None,
storage_monthly=total_monthly[STORAGE]['ALL'],
storage_daily=(total_daily[STORAGE]['ALL'])
if is_current_month
else None,
details=all_details,
budget_spent=None,
budget=None,
last_loaded_day=last_loaded_day,
)
)

Expand All @@ -385,13 +398,17 @@ async def _append_running_cost_records(

# add rows by field
for key, details in field_details.items():
compute_daily = total_daily['C'][key] if key in total_daily['C'] else 0
storage_daily = total_daily['S'][key] if key in total_daily['S'] else 0
compute_daily = (
total_daily[COMPUTE][key] if key in total_daily[COMPUTE] else 0
)
storage_daily = (
total_daily[STORAGE][key] if key in total_daily[STORAGE] else 0
)
compute_monthly = (
total_monthly['C'][key] if key in total_monthly['C'] else 0
total_monthly[COMPUTE][key] if key in total_monthly[COMPUTE] else 0
)
storage_monthly = (
total_monthly['S'][key] if key in total_monthly['S'] else 0
total_monthly[STORAGE][key] if key in total_monthly[STORAGE] else 0
)
monthly = compute_monthly + storage_monthly
budget_monthly = budgets_per_gcp_project.get(key)
Expand Down
20 changes: 12 additions & 8 deletions db/python/tables/bq/billing_daily.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from google.cloud import bigquery

from api.settings import BQ_AGGREG_VIEW, BQ_DAYS_BACK_OPTIMAL
from db.python.tables.bq.billing_base import BillingBaseTable
from api.settings import BQ_AGGREG_VIEW
from db.python.tables.bq.billing_base import (
BillingBaseTable,
time_optimisation_parameter,
)


class BillingDailyTable(BillingBaseTable):
Expand Down Expand Up @@ -31,7 +34,7 @@ async def get_topics(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand All @@ -42,12 +45,13 @@ async def get_topics(self):
return []

async def get_invoice_months(self):
"""Get all invoice months in database"""
"""Get all invoice months in database
Aggregated views contain invoice_month field
"""

_query = f"""
SELECT DISTINCT FORMAT_DATE("%Y%m", day) as invoice_month
SELECT DISTINCT invoice_month
FROM `{self.table_name}`
WHERE EXTRACT(day from day) = 1
ORDER BY invoice_month DESC;
"""

Expand Down Expand Up @@ -76,7 +80,7 @@ async def get_cost_categories(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down Expand Up @@ -114,7 +118,7 @@ async def get_skus(
_query += ' OFFSET @offset_val'

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
bigquery.ScalarQueryParameter('limit_val', 'INT64', limit),
bigquery.ScalarQueryParameter('offset_val', 'INT64', offset),
]
Expand Down
11 changes: 6 additions & 5 deletions db/python/tables/bq/billing_daily_extended.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from google.cloud import bigquery

from api.settings import BQ_AGGREG_EXT_VIEW, BQ_DAYS_BACK_OPTIMAL
from db.python.tables.bq.billing_base import BillingBaseTable
from api.settings import BQ_AGGREG_EXT_VIEW
from db.python.tables.bq.billing_base import (
BillingBaseTable,
time_optimisation_parameter,
)
from models.models import BillingColumn


Expand Down Expand Up @@ -39,7 +40,7 @@ async def get_extended_values(self, field: str):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down
11 changes: 7 additions & 4 deletions db/python/tables/bq/billing_gcp_daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

from google.cloud import bigquery

from api.settings import BQ_DAYS_BACK_OPTIMAL, BQ_GCP_BILLING_VIEW
from db.python.tables.bq.billing_base import BillingBaseTable
from api.settings import BQ_GCP_BILLING_VIEW
from db.python.tables.bq.billing_base import (
BillingBaseTable,
time_optimisation_parameter,
)
from db.python.tables.bq.billing_filter import BillingFilter
from db.python.tables.bq.generic_bq_filter import GenericBQFilter
from models.models import BillingTotalCostQueryModel
Expand Down Expand Up @@ -56,7 +59,7 @@ async def _last_loaded_day(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down Expand Up @@ -121,7 +124,7 @@ async def get_gcp_projects(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down
2 changes: 1 addition & 1 deletion db/python/tables/bq/function_bq_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _sql_cond_prep(key: str, value: Any) -> str:
def _sql_value_prep(key: str, value: Any) -> bigquery.ScalarQueryParameter:
""" """
if isinstance(value, Enum):
return bigquery.ScalarQueryParameter(key, 'STRING', value.value)
return FunctionBQFilter._sql_value_prep(key, value.value)
if isinstance(value, int):
return bigquery.ScalarQueryParameter(key, 'INT64', value)
if isinstance(value, float):
Expand Down
2 changes: 1 addition & 1 deletion db/python/tables/bq/generic_bq_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _sql_value_prep(key, value):
key, 'STRING', ','.join([str(v) for v in value])
)
if isinstance(value, Enum):
return bigquery.ScalarQueryParameter(key, 'STRING', value.value)
return GenericBQFilter._sql_value_prep(key, value.value)
if isinstance(value, int):
return bigquery.ScalarQueryParameter(key, 'INT64', value)
if isinstance(value, float):
Expand Down
2 changes: 2 additions & 0 deletions db/python/tables/bq/generic_bq_filter_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ def prepare_bq_query_from_dict_field(
raise ValueError(f'Filter {field_name} must be a GenericFilter')
if '"' in key:
raise ValueError('Meta key contains " character, which is not allowed')
if '\'' in key:
raise ValueError('Meta key contains \' character, which is not allowed')
fconditionals, fvalues = value.to_sql(
f"JSON_EXTRACT({column_name}, '$.{key}')",
column_name=f'{column_name}_{key}',
Expand Down
Loading

0 comments on commit bab64f9

Please sign in to comment.