Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add validations for time spines #320

Merged
merged 2 commits into from
Jul 30, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions dbt_semantic_interfaces/protocols/__init__.py
Original file line number Diff line number Diff line change
@@ -32,6 +32,10 @@
SemanticModelDefaults,
SemanticModelT,
)
from dbt_semantic_interfaces.protocols.time_spine import ( # noqa:F401
TimeSpine,
TimeSpinePrimaryColumn,
)
from dbt_semantic_interfaces.protocols.where_filter import ( # noqa:F401
WhereFilter,
WhereFilterIntersection,
4 changes: 2 additions & 2 deletions dbt_semantic_interfaces/validations/metrics.py
Original file line number Diff line number Diff line change
@@ -39,10 +39,10 @@


class CumulativeMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that cumulative sum metrics are configured properly."""
"""Checks that cumulative metrics are configured properly."""

@staticmethod
@validate_safely(whats_being_done="running model validation ensuring cumulative sum metrics are valid")
@validate_safely(whats_being_done="running model validation ensuring cumulative metrics are valid")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []

Original file line number Diff line number Diff line change
@@ -37,6 +37,7 @@
SemanticModelDefaultsRule,
SemanticModelValidityWindowRule,
)
from dbt_semantic_interfaces.validations.time_spines import TimeSpineRule
from dbt_semantic_interfaces.validations.unique_valid_name import (
PrimaryEntityDimensionPairs,
UniqueAndValidNameRule,
@@ -91,6 +92,7 @@ class SemanticManifestValidator(Generic[SemanticManifestT]):
SemanticModelLabelsRule[SemanticManifestT](),
EntityLabelsRule[SemanticManifestT](),
ConversionMetricRule[SemanticManifestT](),
TimeSpineRule[SemanticManifestT](),
)

def __init__(
@@ -100,7 +102,7 @@ def __init__(

Args:
rules: List of validation rules to run. Defaults to DEFAULT_RULES
max_workers: sets the max number of rules to run against the model concurrently
max_workers: sets the max number of rules to run against the semantic_manifest concurrently
"""
# Raises an error if 'rules' is an empty sequence or None
if not rules:
@@ -147,7 +149,7 @@ def _validate_multi_process( # noqa: D

def checked_validations(self, semantic_manifest: SemanticManifestT) -> None:
"""Similar to validate(), but throws an exception if validation fails."""
model_copy = copy.deepcopy(semantic_manifest)
model_issues = self.validate_semantic_manifest(model_copy)
if model_issues.has_blocking_issues:
raise SemanticManifestValidationException(issues=tuple(model_issues.all_issues))
semantic_manifest_copy = copy.deepcopy(semantic_manifest)
semantic_manifest_issues = self.validate_semantic_manifest(semantic_manifest_copy)
if semantic_manifest_issues.has_blocking_issues:
raise SemanticManifestValidationException(issues=tuple(semantic_manifest_issues.all_issues))
89 changes: 89 additions & 0 deletions dbt_semantic_interfaces/validations/time_spines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from typing import Dict, Generic, List, Sequence, Set

from dbt_semantic_interfaces.protocols import SemanticManifestT, TimeSpine
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.validations.validator_helpers import (
SemanticManifestValidationRule,
ValidationIssue,
ValidationWarning,
validate_safely,
)


class TimeSpineRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that time spines are configured properly."""

@staticmethod
@validate_safely(whats_being_done="running model validation to ensure that time spines are valid")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]:
"""Validate time spine configs.

Note that some validation happens separately in the core parser before building this object:
- error if no time spine configured and legacy time spine model doeesn't exist
- error if granularity is missing for primary column
- error if primary column does not exist in the model
"""
issues: List[ValidationIssue] = []

if not semantic_manifest.semantic_models:
return issues

time_spines = semantic_manifest.project_configuration.time_spines
if not time_spines:
# TODO: update docs link when new one is available!
docs_message = "See documentation to configure: https://docs.getdbt.com/docs/build/metricflow-time-spine"
# If they have the old time spine configured and need to migrate
if semantic_manifest.project_configuration.time_spine_table_configurations:
issues.append(
ValidationWarning(
message="Time spines without YAML configuration are in the process of deprecation. Please add "
"YAML configuration for your 'metricflow_time_spine' model. " + docs_message
)
)
return issues

# Verify that there is only one time spine per granularity
time_spines_by_granularity: Dict[TimeGranularity, List[TimeSpine]] = {}
granularities_with_multiple_time_spines: Set[TimeGranularity] = set()
for time_spine in time_spines:
granularity = time_spine.primary_column.time_granularity
if granularity in time_spines_by_granularity:
time_spines_by_granularity[granularity].append(time_spine)
else:
time_spines_by_granularity[granularity] = [time_spine]
if len(time_spines_by_granularity[granularity]) > 1:
granularities_with_multiple_time_spines.add(granularity)

if granularities_with_multiple_time_spines:
duplicate_granularity_time_spines: Dict[str, List[str]] = {}
for granularity in granularities_with_multiple_time_spines:
duplicate_granularity_time_spines[granularity.name] = [
time_spine.node_relation.relation_name for time_spine in time_spines_by_granularity[granularity]
]
issues.append(
ValidationWarning(
message=f"Only one time spine is supported per granularity. Got duplicates: "
f"{duplicate_granularity_time_spines}"
)
)

# Warn if there is a time dimension configured with a smaller granularity than the smallest time spine
dimension_granularities = {
dimension.type_params.time_granularity
for semantic_model in semantic_manifest.semantic_models
for dimension in semantic_model.dimensions
if dimension.type_params
}
smallest_dim_granularity = min(dimension_granularities)
smallest_time_spine_granularity = min(time_spines_by_granularity.keys())
if smallest_dim_granularity < smallest_time_spine_granularity:
issues.append(
ValidationWarning(
message=f"To avoid unexpected query errors, configuring a time spine at or below the smallest time "
f"dimension granularity is recommended. Smallest time dimension granularity: "
f"{smallest_dim_granularity.name}; Smallest time spine granularity: "
f"{smallest_time_spine_granularity}"
)
)

return issues
175 changes: 175 additions & 0 deletions tests/validations/test_time_spines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from dbt_semantic_interfaces.implementations.elements.dimension import (
PydanticDimension,
PydanticDimensionTypeParams,
)
from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity
from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure
from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation
from dbt_semantic_interfaces.implementations.project_configuration import (
PydanticProjectConfiguration,
PydanticTimeSpineTableConfiguration,
)
from dbt_semantic_interfaces.implementations.semantic_manifest import (
PydanticSemanticManifest,
)
from dbt_semantic_interfaces.implementations.time_spine import (
PydanticTimeSpine,
PydanticTimeSpinePrimaryColumn,
)
from dbt_semantic_interfaces.test_utils import semantic_model_with_guaranteed_meta
from dbt_semantic_interfaces.type_enums import (
AggregationType,
DimensionType,
EntityType,
TimeGranularity,
)
from dbt_semantic_interfaces.validations.semantic_manifest_validator import (
SemanticManifestValidator,
)


def test_valid_time_spines() -> None: # noqa: D
semantic_manifest = PydanticSemanticManifest(
semantic_models=[
semantic_model_with_guaranteed_meta(
name="sum_measure",
measures=[
PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True)
],
dimensions=[
PydanticDimension(
name="dim",
type=DimensionType.TIME,
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND),
)
],
entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)],
),
],
metrics=[],
project_configuration=PydanticProjectConfiguration(
time_spine_table_configurations=[],
time_spines=[
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.DAY),
),
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine2", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ts", time_granularity=TimeGranularity.SECOND),
),
],
),
)
SemanticManifestValidator[PydanticSemanticManifest]().checked_validations(semantic_manifest)


def test_only_legacy_time_spine() -> None: # noqa: D
validator = SemanticManifestValidator[PydanticSemanticManifest]()
semantic_manifest = PydanticSemanticManifest(
semantic_models=[
semantic_model_with_guaranteed_meta(
name="sum_measure",
measures=[
PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True)
],
dimensions=[
PydanticDimension(
name="dim",
type=DimensionType.TIME,
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND),
)
],
entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)],
),
],
metrics=[],
project_configuration=PydanticProjectConfiguration(
time_spine_table_configurations=[
PydanticTimeSpineTableConfiguration(location="hurrr", column_name="fun_col", grain=TimeGranularity.DAY)
]
),
)
issues = validator.validate_semantic_manifest(semantic_manifest)
assert not issues.has_blocking_issues
assert len(issues.warnings) == 1
assert "Time spines without YAML configuration are in the process of deprecation." in issues.warnings[0].message


def test_duplicate_time_spine_granularity() -> None: # noqa: D
validator = SemanticManifestValidator[PydanticSemanticManifest]()
semantic_manifest = PydanticSemanticManifest(
semantic_models=[
semantic_model_with_guaranteed_meta(
name="sum_measure",
measures=[
PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True)
],
dimensions=[
PydanticDimension(
name="dim",
type=DimensionType.TIME,
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND),
)
],
entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)],
),
],
metrics=[],
project_configuration=PydanticProjectConfiguration(
time_spine_table_configurations=[],
time_spines=[
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.SECOND),
),
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine2", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ts", time_granularity=TimeGranularity.SECOND),
),
],
),
)
issues = validator.validate_semantic_manifest(semantic_manifest)
assert not issues.has_blocking_issues
assert len(issues.warnings) == 1
assert "Only one time spine is supported per granularity." in issues.warnings[0].message


def test_dimension_granularity_smaller_than_time_spine() -> None: # noqa: D
validator = SemanticManifestValidator[PydanticSemanticManifest]()
semantic_manifest = PydanticSemanticManifest(
semantic_models=[
semantic_model_with_guaranteed_meta(
name="sum_measure",
measures=[
PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True)
],
dimensions=[
PydanticDimension(
name="dim",
type=DimensionType.TIME,
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND),
)
],
entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)],
),
],
metrics=[],
project_configuration=PydanticProjectConfiguration(
time_spine_table_configurations=[],
time_spines=[
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.DAY),
),
],
),
)
issues = validator.validate_semantic_manifest(semantic_manifest)
assert not issues.has_blocking_issues
assert len(issues.warnings) == 1
assert (
"configuring a time spine at or below the smallest time dimension granularity is recommended"
in issues.warnings[0].message
)