Skip to content

Commit

Permalink
Require semantic_model context when getting time dimension's defined …
Browse files Browse the repository at this point in the history
…granularity

And a lot of downstream changes as a result. Previously, we were getting a time dimension's defined granularity from just its element_name. This is not sufficient information, since there might be multiple dimensions with the same name across semantic models, all with potentially different granularities. This updates those methods to take in a SemanticModelElementReference, which contains both the time dimension's element_name and its semantic model name. This caused a lot of downsream impact, since we needed to get the semantic model context wherever those methods were called. I also did some type cleanup along the way to try to simplify the code with this change in mind.
  • Loading branch information
courtneyholcomb committed Oct 22, 2024
1 parent b1c9aba commit 4bdd2fa
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.protocols.metric import Metric, MetricInputMeasure, MetricType
from dbt_semantic_interfaces.protocols.semantic_manifest import SemanticManifest
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
from dbt_semantic_interfaces.references import (
MeasureReference,
MetricReference,
SemanticModelElementReference,
)
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity

from metricflow_semantics.collection_helpers.lru_cache import LruCache
Expand Down Expand Up @@ -211,18 +215,17 @@ def contains_cumulative_or_time_offset_metric(self, metric_references: Sequence[
return True
return False

def _get_agg_time_dimension_specs_for_metric(
def get_agg_time_dimensions_for_metric(
self, metric_reference: MetricReference
) -> Sequence[TimeDimensionSpec]:
) -> Set[SemanticModelElementReference]:
"""Retrieves the aggregate time dimensions associated with the metric's measures."""
metric = self.get_metric(metric_reference)
specs: Set[TimeDimensionSpec] = set()
agg_time_dimensions: Set[SemanticModelElementReference] = set()
for input_measure in metric.input_measures:
time_dimension_specs = self._semantic_model_lookup.get_agg_time_dimension_specs_for_measure(
measure_reference=input_measure.measure_reference
agg_time_dimensions.add(
self._semantic_model_lookup.get_agg_time_dimension_for_measure(input_measure.measure_reference)
)
specs.update(time_dimension_specs)
return list(specs)
return agg_time_dimensions

def get_valid_agg_time_dimensions_for_metric(
self, metric_reference: MetricReference
Expand All @@ -240,21 +243,16 @@ def get_valid_agg_time_dimensions_for_metric(
def _get_valid_agg_time_dimensions_for_metric(
self, metric_reference: MetricReference
) -> Sequence[TimeDimensionSpec]:
agg_time_dimension_specs = self._get_agg_time_dimension_specs_for_metric(metric_reference)
distinct_agg_time_dimension_identifiers = set(
[(spec.reference, spec.entity_links) for spec in agg_time_dimension_specs]
)
if len(distinct_agg_time_dimension_identifiers) != 1:
agg_time_dimensions = self.get_agg_time_dimensions_for_metric(metric_reference)
if len(agg_time_dimensions) != 1:
# If the metric's input measures have different agg_time_dimensions, user must use metric_time.
return []

agg_time_dimension_reference, agg_time_dimension_entity_links = distinct_agg_time_dimension_identifiers.pop()
valid_agg_time_dimension_specs = TimeDimensionSpec.generate_possible_specs_for_time_dimension(
time_dimension_reference=agg_time_dimension_reference,
entity_links=agg_time_dimension_entity_links,
custom_granularities=self._custom_granularities,
return tuple(
self._semantic_model_lookup.generate_possible_time_dimension_specs_from_element_reference(
agg_time_dimensions.pop()
)
)
return valid_agg_time_dimension_specs

def get_min_queryable_time_granularity(self, metric_reference: MetricReference) -> TimeGranularity:
"""The minimum grain that can be queried with this metric.
Expand All @@ -270,21 +268,21 @@ def get_min_queryable_time_granularity(self, metric_reference: MetricReference)
return result

def _get_min_queryable_time_granularity(self, metric_reference: MetricReference) -> TimeGranularity:
agg_time_dimension_specs = self._get_agg_time_dimension_specs_for_metric(metric_reference)
assert (
agg_time_dimension_specs
), f"No agg_time_dimension found for metric {metric_reference}. Something has been misconfigured."

minimum_queryable_granularity = self._semantic_model_lookup.get_defined_time_granularity(
agg_time_dimension_specs[0].reference
)
if len(agg_time_dimension_specs) > 1:
for agg_time_dimension_spec in agg_time_dimension_specs[1:]:
defined_time_granularity = self._semantic_model_lookup.get_defined_time_granularity(
agg_time_dimension_spec.reference
)
if defined_time_granularity.to_int() > minimum_queryable_granularity.to_int():
minimum_queryable_granularity = defined_time_granularity
agg_time_dimensions = self.get_agg_time_dimensions_for_metric(metric_reference)

minimum_queryable_granularity: Optional[TimeGranularity] = None
for agg_time_dimension in agg_time_dimensions:
defined_time_granularity = self._semantic_model_lookup.get_defined_time_granularity(agg_time_dimension)
if (
not minimum_queryable_granularity
or defined_time_granularity.to_int() > minimum_queryable_granularity.to_int()
):
minimum_queryable_granularity = defined_time_granularity

if not minimum_queryable_granularity:
raise ValueError(
f"No agg_time_dimension found for metric '{metric_reference.element_name}'. Something has been misconfigured."
)

return minimum_queryable_granularity

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import logging
from typing import Dict, List, Optional, Sequence, Set
from typing import Dict, List, Optional, Sequence, Set, Tuple

from dbt_semantic_interfaces.protocols.dimension import Dimension
from dbt_semantic_interfaces.protocols.entity import Entity
Expand Down Expand Up @@ -67,27 +67,29 @@ def __init__(
self._add_semantic_model(semantic_model)

# Cache for defined time granularity.
self._time_dimension_to_defined_time_granularity: Dict[TimeDimensionReference, TimeGranularity] = {}
self._time_dimension_to_defined_time_granularity: Dict[SemanticModelElementReference, TimeGranularity] = {}

# Cache for agg. time dimension for measure.
self._measure_reference_to_agg_time_dimension_specs: Dict[MeasureReference, Sequence[TimeDimensionSpec]] = {}
self._measure_reference_to_agg_time_dimension_specs: Dict[MeasureReference, Set[TimeDimensionSpec]] = {}

def get_dimension_references(self) -> Sequence[DimensionReference]:
"""Retrieve all dimension references from the collection of semantic models."""
return tuple(self._dimension_index.keys())

@staticmethod
def get_dimension_from_semantic_model(
semantic_model: SemanticModel, dimension_reference: LinkableElementReference
semantic_model: SemanticModel, dimension_reference: DimensionReference
) -> Dimension:
"""Get dimension from semantic model."""
for dim in semantic_model.dimensions:
if dim.reference == dimension_reference:
# Check element_name instead of direct reference in case the input is a TimeDimensionReference
if dim.reference.element_name == dimension_reference.element_name:
return dim
raise ValueError(
f"No dimension with name ({dimension_reference}) in semantic_model with name ({semantic_model.name})"
f"No dimension with name '{dimension_reference.element_name}' in semantic_model '{semantic_model.name}'."
)

# TODO: remove this method
def get_dimension(self, dimension_reference: DimensionReference) -> Dimension:
"""Retrieves a full dimension object by name."""
# If the reference passed is a TimeDimensionReference, convert to DimensionReference.
Expand All @@ -105,6 +107,7 @@ def get_dimension(self, dimension_reference: DimensionReference) -> Dimension:
dimension_reference=dimension_reference,
)

# TODO: remove this method
def get_time_dimension(self, time_dimension_reference: TimeDimensionReference) -> Dimension:
"""Retrieves a full dimension object by name."""
return self.get_dimension(dimension_reference=time_dimension_reference.dimension_reference)
Expand Down Expand Up @@ -154,13 +157,31 @@ def get_semantic_model_for_measure(self, measure_reference: MeasureReference) ->
)
return semantic_model

def get_agg_time_dimension_for_measure(self, measure_reference: MeasureReference) -> TimeDimensionReference:
def get_semantic_model(self, semantic_model_reference: SemanticModelReference) -> SemanticModel:
"""Retrieve the semantic model object matching the input semantic model reference."""
semantic_model = self._semantic_model_reference_to_semantic_model.get(semantic_model_reference)
assert semantic_model, f"Semantic model {semantic_model_reference} not found in manifest."
return semantic_model

def get_agg_time_dimension_for_measure(self, measure_reference: MeasureReference) -> SemanticModelElementReference:
"""Retrieves the aggregate time dimension that is associated with the measure reference.
This is the time dimension along which the measure will be aggregated when a metric built on this measure
is queried with metric_time.
"""
return self._measure_agg_time_dimension[measure_reference]
agg_time_dimension = self._measure_agg_time_dimension.get(measure_reference)
if not agg_time_dimension:
raise ValueError(
f"No agg_time_dimension found for measure {measure_reference}. This indicates internal misconfiguration"
" since validations should ensure an agg_time_dimension for every measure."
)

# A measure's agg_time_dimension is required to be in the same semantic model as the measure,
# so we can assume the same semantic model for both measure and dimension.
semantic_model = self.get_semantic_model_for_measure(measure_reference)
return SemanticModelElementReference.create_from_references(
element_reference=agg_time_dimension, semantic_model_reference=semantic_model.reference
)

def get_entity_in_semantic_model(self, ref: SemanticModelElementReference) -> Optional[Entity]:
"""Retrieve the entity matching the element -> semantic model mapping, if any."""
Expand Down Expand Up @@ -365,49 +386,63 @@ def get_element_spec_for_name(self, element_name: str) -> LinkableInstanceSpec:
else:
raise ValueError(f"Unable to find linkable element {element_name} in manifest")

def get_agg_time_dimension_specs_for_measure(
self, measure_reference: MeasureReference
) -> Sequence[TimeDimensionSpec]:
def local_entity_links_for_time_dimension(
self, time_dimension_element_reference: SemanticModelElementReference
) -> Tuple[EntityReference]:
"""Return the primary entity as entity links that should be used for the time dimension when no joins are involved."""
semantic_model = self.get_semantic_model(time_dimension_element_reference.semantic_model_reference)
return (self.get_primary_entity_else_error(semantic_model),)

def generate_possible_time_dimension_specs_from_element_reference(
self,
time_dimension_element_reference: SemanticModelElementReference,
) -> Set[TimeDimensionSpec]:
"""Generate all possible time dimension specs that use the given time dimension.
This means a spec for every possible granularity and date part.
"""
return set(
TimeDimensionSpec.generate_possible_specs_for_time_dimension(
time_dimension_reference=TimeDimensionReference(time_dimension_element_reference.element_name),
entity_links=self.local_entity_links_for_time_dimension(time_dimension_element_reference),
custom_granularities=self._custom_granularities,
)
)

def get_agg_time_dimension_specs_for_measure(self, measure_reference: MeasureReference) -> Set[TimeDimensionSpec]:
"""Get the agg time dimension specs that can be used in place of metric time for this measure."""
result = self._measure_reference_to_agg_time_dimension_specs.get(measure_reference)
if result is not None:
return result

result = self._get_agg_time_dimension_specs_for_measure(measure_reference)
self._measure_reference_to_agg_time_dimension_specs[measure_reference] = result
return result

def _get_agg_time_dimension_specs_for_measure(
self, measure_reference: MeasureReference
) -> Sequence[TimeDimensionSpec]:
agg_time_dimension = self.get_agg_time_dimension_for_measure(measure_reference)
# A measure's agg_time_dimension is required to be in the same semantic model as the measure,
# so we can assume the same semantic model for both measure and dimension.
semantic_model = self.get_semantic_model_for_measure(measure_reference)
entity_link = self.resolved_primary_entity(semantic_model)
assert entity_link is not None, (
f"Expected semantic model {semantic_model} to have a primary entity since it has a "
"measure requiring an agg_time_dimension, but found none.",
)
return TimeDimensionSpec.generate_possible_specs_for_time_dimension(
time_dimension_reference=agg_time_dimension,
entity_links=(entity_link,),
custom_granularities=self._custom_granularities,
agg_time_element_reference = self.get_agg_time_dimension_for_measure(measure_reference)
agg_time_dimension_specs = self.generate_possible_time_dimension_specs_from_element_reference(
agg_time_element_reference
)
self._measure_reference_to_agg_time_dimension_specs[measure_reference] = agg_time_dimension_specs
return agg_time_dimension_specs

def get_defined_time_granularity(self, time_dimension_reference: TimeDimensionReference) -> TimeGranularity:
def get_defined_time_granularity(
self, time_dimension_element_reference: SemanticModelElementReference
) -> TimeGranularity:
"""Time granularity from the time dimension's YAML definition. If not set, defaults to DAY."""
result = self._time_dimension_to_defined_time_granularity.get(time_dimension_reference)
result = self._time_dimension_to_defined_time_granularity.get(time_dimension_element_reference)

if result is not None:
return result

result = self._get_defined_time_granularity(time_dimension_reference)
self._time_dimension_to_defined_time_granularity[time_dimension_reference] = result
result = self._get_defined_time_granularity(time_dimension_element_reference)
self._time_dimension_to_defined_time_granularity[time_dimension_element_reference] = result
return result

def _get_defined_time_granularity(self, time_dimension_reference: TimeDimensionReference) -> TimeGranularity:
time_dimension = self.get_dimension(time_dimension_reference)
def _get_defined_time_granularity(
self, time_dimension_element_reference: SemanticModelElementReference
) -> TimeGranularity:
semantic_model = self.get_semantic_model(time_dimension_element_reference.semantic_model_reference)
time_dimension = self.get_dimension_from_semantic_model(
dimension_reference=TimeDimensionReference(time_dimension_element_reference.element_name),
semantic_model=semantic_model,
)

defined_time_granularity = DEFAULT_TIME_GRANULARITY
if time_dimension.type_params and time_dimension.type_params.time_granularity:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def element_path_key(self) -> ElementPathKey:
date_part=self.date_part,
)

# TODO: remove this method
@staticmethod
def from_reference(reference: TimeDimensionReference) -> TimeDimensionSpec:
"""Initialize from a time dimension reference instance."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def test_get_valid_agg_time_dimensions_for_metric( # noqa: D103
)
if len(measure_agg_time_dims) == 1:
for metric_agg_time_dim in metric_agg_time_dims:
assert metric_agg_time_dim.reference == measure_agg_time_dims[0]
assert metric_agg_time_dim.reference.element_name == measure_agg_time_dims[0].element_name
else:
assert len(metric_agg_time_dims) == 0

Expand All @@ -243,6 +243,6 @@ def test_get_agg_time_dimension_specs_for_measure(semantic_model_lookup: Semanti
for measure_name in ["bookings", "views", "listings"]:
measure_reference = MeasureReference(measure_name)
agg_time_dim_specs = semantic_model_lookup.get_agg_time_dimension_specs_for_measure(measure_reference)
agg_time_dim_reference = semantic_model_lookup.get_agg_time_dimension_for_measure(measure_reference)
agg_time_dim_element_reference = semantic_model_lookup.get_agg_time_dimension_for_measure(measure_reference)
for spec in agg_time_dim_specs:
assert spec.reference == agg_time_dim_reference
assert spec.reference.element_name == agg_time_dim_element_reference.element_name
Loading

0 comments on commit 4bdd2fa

Please sign in to comment.