Skip to content

Commit

Permalink
Merge from dev.
Browse files Browse the repository at this point in the history
  • Loading branch information
milo-hyben committed Jan 31, 2024
2 parents 0318d5e + 6134af9 commit 3f658ee
Show file tree
Hide file tree
Showing 21 changed files with 324 additions and 161 deletions.
2 changes: 1 addition & 1 deletion api/routes/billing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from api.settings import BILLING_CACHE_RESPONSE_TTL, BQ_AGGREG_VIEW
from api.utils.db import BqConnection, get_author
from db.python.layers.billing import BillingLayer
from models.enums import BillingSource
from models.models import (
BillingColumn,
BillingCostBudgetRecord,
BillingHailBatchCostRecord,
BillingSource,
BillingTotalCostQueryModel,
BillingTotalCostRecord,
)
Expand Down
4 changes: 2 additions & 2 deletions api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,11 @@ async def exception_handler(request: Request, e: Exception):
cors_middleware = middlewares[0]

request_origin = request.headers.get('origin', '')
if cors_middleware and '*' in cors_middleware.options['allow_origins']: # type: ignore[attr-defined]
if cors_middleware and '*' in cors_middleware.options['allow_origins']: # type: ignore
response.headers['Access-Control-Allow-Origin'] = '*'
elif (
cors_middleware
and request_origin in cors_middleware.options['allow_origins'] # type: ignore[attr-defined]
and request_origin in cors_middleware.options['allow_origins'] # type: ignore
):
response.headers['Access-Control-Allow-Origin'] = request_origin

Expand Down
4 changes: 1 addition & 3 deletions db/python/layers/billing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
from db.python.tables.bq.billing_daily_extended import BillingDailyExtendedTable
from db.python.tables.bq.billing_gcp_daily import BillingGcpDailyTable
from db.python.tables.bq.billing_raw import BillingRawTable
from models.enums import BillingSource, BillingTimeColumn, BillingTimePeriods
from models.models import (
BillingColumn,
BillingCostBudgetRecord,
BillingHailBatchCostRecord,
BillingSource,
BillingTimeColumn,
BillingTimePeriods,
BillingTotalCostQueryModel,
)

Expand Down
115 changes: 66 additions & 49 deletions db/python/tables/bq/billing_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
from db.python.tables.bq.billing_filter import BillingFilter
from db.python.tables.bq.function_bq_filter import FunctionBQFilter
from db.python.tables.bq.generic_bq_filter import GenericBQFilter
from models.enums import BillingTimeColumn, BillingTimePeriods
from models.models import (
BillingColumn,
BillingCostBudgetRecord,
BillingTimePeriods,
BillingCostDetailsRecord,
BillingTotalCostQueryModel,
)

Expand All @@ -29,46 +30,60 @@
'TimeGroupingDetails', ['field', 'formula', 'separator']
)

# constants to abbrevate (S)tores and (C)ompute
STORAGE = 'S'
COMPUTE = 'C'


def abbrev_cost_category(cost_category: str) -> str:
"""abbreviate cost category"""
return 'S' if cost_category == 'Cloud Storage' else 'C'
return STORAGE if cost_category == 'Cloud Storage' else COMPUTE


def prepare_time_periods(
query: BillingTotalCostQueryModel,
) -> TimeGroupingDetails:
"""Prepare Time periods grouping and parsing formulas"""
time_column = query.time_column or 'day'
result = TimeGroupingDetails('', '', '')
time_column: BillingTimeColumn = query.time_column or BillingTimeColumn.DAY

# Based on specified time period, add the corresponding column
if query.time_periods == BillingTimePeriods.DAY:
result = TimeGroupingDetails(
field=f'FORMAT_DATE("%Y-%m-%d", {time_column}) as day',
return TimeGroupingDetails(
field=f'FORMAT_DATE("%Y-%m-%d", {time_column.value}) as day',
formula='PARSE_DATE("%Y-%m-%d", day) as day',
separator=',',
)
elif query.time_periods == BillingTimePeriods.WEEK:
result = TimeGroupingDetails(
field=f'FORMAT_DATE("%Y%W", {time_column}) as day',

if query.time_periods == BillingTimePeriods.WEEK:
return TimeGroupingDetails(
field=f'FORMAT_DATE("%Y%W", {time_column.value}) as day',
formula='PARSE_DATE("%Y%W", day) as day',
separator=',',
)
elif query.time_periods == BillingTimePeriods.MONTH:
result = TimeGroupingDetails(
field=f'FORMAT_DATE("%Y%m", {time_column}) as day',

if query.time_periods == BillingTimePeriods.MONTH:
return TimeGroupingDetails(
field=f'FORMAT_DATE("%Y%m", {time_column.value}) as day',
formula='PARSE_DATE("%Y%m", day) as day',
separator=',',
)
elif query.time_periods == BillingTimePeriods.INVOICE_MONTH:
result = TimeGroupingDetails(

if query.time_periods == BillingTimePeriods.INVOICE_MONTH:
return TimeGroupingDetails(
field='invoice_month as day',
formula='PARSE_DATE("%Y%m", day) as day',
separator=',',
)

return result
return TimeGroupingDetails('', '', '')


def time_optimisation_parameter() -> bigquery.ScalarQueryParameter:
"""
BQ tables and views are partitioned by day, to avoid full scans
we need to limit the amount of data scanned
"""
return bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL))


class BillingBaseTable(BqDbBase):
Expand Down Expand Up @@ -165,7 +180,7 @@ async def _budgets_by_gcp_project(
WITH t AS (
SELECT gcp_project, MAX(created_at) as last_created_at
FROM `{BQ_BUDGET_VIEW}`
GROUP BY 1
GROUP BY gcp_project
)
SELECT t.gcp_project, d.budget
FROM t inner join `{BQ_BUDGET_VIEW}` d
Expand Down Expand Up @@ -193,7 +208,7 @@ async def _last_loaded_day(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down Expand Up @@ -328,38 +343,36 @@ async def _append_total_running_cost(
all_details = []
for cat, mth_cost in total_monthly_category.items():
all_details.append(
{
'cost_group': abbrev_cost_category(cat),
'cost_category': cat,
'daily_cost': total_daily_category[cat]
if is_current_month
else None,
'monthly_cost': mth_cost,
}
BillingCostDetailsRecord(
cost_group=abbrev_cost_category(cat),
cost_category=cat,
daily_cost=total_daily_category[cat] if is_current_month else None,
monthly_cost=mth_cost,
)
)

# add total row: compute + storage
results.append(
BillingCostBudgetRecord.from_json(
{
'field': f'{BillingColumn.generate_all_title(field)}',
'total_monthly': (
total_monthly['C']['ALL'] + total_monthly['S']['ALL']
),
'total_daily': (total_daily['C']['ALL'] + total_daily['S']['ALL'])
if is_current_month
else None,
'compute_monthly': total_monthly['C']['ALL'],
'compute_daily': (total_daily['C']['ALL'])
if is_current_month
else None,
'storage_monthly': total_monthly['S']['ALL'],
'storage_daily': (total_daily['S']['ALL'])
if is_current_month
else None,
'details': all_details,
'last_loaded_day': last_loaded_day,
}
BillingCostBudgetRecord(
field=f'{BillingColumn.generate_all_title(field)}',
total_monthly=(
total_monthly[COMPUTE]['ALL'] + total_monthly[STORAGE]['ALL']
),
total_daily=(total_daily[COMPUTE]['ALL'] + total_daily[STORAGE]['ALL'])
if is_current_month
else None,
compute_monthly=total_monthly[COMPUTE]['ALL'],
compute_daily=(total_daily[COMPUTE]['ALL'])
if is_current_month
else None,
storage_monthly=total_monthly[STORAGE]['ALL'],
storage_daily=(total_daily[STORAGE]['ALL'])
if is_current_month
else None,
details=all_details,
budget_spent=None,
budget=None,
last_loaded_day=last_loaded_day,
)
)

Expand All @@ -385,13 +398,17 @@ async def _append_running_cost_records(

# add rows by field
for key, details in field_details.items():
compute_daily = total_daily['C'][key] if key in total_daily['C'] else 0
storage_daily = total_daily['S'][key] if key in total_daily['S'] else 0
compute_daily = (
total_daily[COMPUTE][key] if key in total_daily[COMPUTE] else 0
)
storage_daily = (
total_daily[STORAGE][key] if key in total_daily[STORAGE] else 0
)
compute_monthly = (
total_monthly['C'][key] if key in total_monthly['C'] else 0
total_monthly[COMPUTE][key] if key in total_monthly[COMPUTE] else 0
)
storage_monthly = (
total_monthly['S'][key] if key in total_monthly['S'] else 0
total_monthly[STORAGE][key] if key in total_monthly[STORAGE] else 0
)
monthly = compute_monthly + storage_monthly
budget_monthly = budgets_per_gcp_project.get(key)
Expand Down
20 changes: 12 additions & 8 deletions db/python/tables/bq/billing_daily.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from google.cloud import bigquery

from api.settings import BQ_AGGREG_VIEW, BQ_DAYS_BACK_OPTIMAL
from db.python.tables.bq.billing_base import BillingBaseTable
from api.settings import BQ_AGGREG_VIEW
from db.python.tables.bq.billing_base import (
BillingBaseTable,
time_optimisation_parameter,
)


class BillingDailyTable(BillingBaseTable):
Expand Down Expand Up @@ -31,7 +34,7 @@ async def get_topics(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand All @@ -42,12 +45,13 @@ async def get_topics(self):
return []

async def get_invoice_months(self):
"""Get all invoice months in database"""
"""Get all invoice months in database
Aggregated views contain invoice_month field
"""

_query = f"""
SELECT DISTINCT FORMAT_DATE("%Y%m", day) as invoice_month
SELECT DISTINCT invoice_month
FROM `{self.table_name}`
WHERE EXTRACT(day from day) = 1
ORDER BY invoice_month DESC;
"""

Expand Down Expand Up @@ -76,7 +80,7 @@ async def get_cost_categories(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down Expand Up @@ -114,7 +118,7 @@ async def get_skus(
_query += ' OFFSET @offset_val'

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
bigquery.ScalarQueryParameter('limit_val', 'INT64', limit),
bigquery.ScalarQueryParameter('offset_val', 'INT64', offset),
]
Expand Down
11 changes: 6 additions & 5 deletions db/python/tables/bq/billing_daily_extended.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from google.cloud import bigquery

from api.settings import BQ_AGGREG_EXT_VIEW, BQ_DAYS_BACK_OPTIMAL
from db.python.tables.bq.billing_base import BillingBaseTable
from api.settings import BQ_AGGREG_EXT_VIEW
from db.python.tables.bq.billing_base import (
BillingBaseTable,
time_optimisation_parameter,
)
from models.models import BillingColumn


Expand Down Expand Up @@ -39,7 +40,7 @@ async def get_extended_values(self, field: str):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down
20 changes: 16 additions & 4 deletions db/python/tables/bq/billing_gcp_daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

from google.cloud import bigquery

from api.settings import BQ_DAYS_BACK_OPTIMAL, BQ_GCP_BILLING_VIEW
from db.python.tables.bq.billing_base import BillingBaseTable
from api.settings import BQ_GCP_BILLING_VIEW
from db.python.tables.bq.billing_base import (
BillingBaseTable,
time_optimisation_parameter,
)
from db.python.tables.bq.billing_filter import BillingFilter
from db.python.tables.bq.generic_bq_filter import GenericBQFilter
from models.models import BillingTotalCostQueryModel
Expand Down Expand Up @@ -39,6 +42,15 @@ def _query_to_partitioned_filter(
if query.end_date
else None,
)
# add day filter after partition filter is applied
billing_filter.day = GenericBQFilter[datetime](
gte=datetime.strptime(query.start_date, '%Y-%m-%d')
if query.start_date
else None,
lte=datetime.strptime(query.end_date, '%Y-%m-%d')
if query.end_date
else None,
)
return billing_filter

async def _last_loaded_day(self):
Expand All @@ -56,7 +68,7 @@ async def _last_loaded_day(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down Expand Up @@ -121,7 +133,7 @@ async def get_gcp_projects(self):
"""

query_parameters = [
bigquery.ScalarQueryParameter('days', 'INT64', -int(BQ_DAYS_BACK_OPTIMAL)),
time_optimisation_parameter(),
]
query_job_result = self._execute_query(_query, query_parameters)

Expand Down
2 changes: 1 addition & 1 deletion db/python/tables/bq/function_bq_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _sql_cond_prep(key: str, value: Any) -> str:
def _sql_value_prep(key: str, value: Any) -> bigquery.ScalarQueryParameter:
""" """
if isinstance(value, Enum):
return bigquery.ScalarQueryParameter(key, 'STRING', value.value)
return FunctionBQFilter._sql_value_prep(key, value.value)
if isinstance(value, int):
return bigquery.ScalarQueryParameter(key, 'INT64', value)
if isinstance(value, float):
Expand Down
Loading

0 comments on commit 3f658ee

Please sign in to comment.