Skip to content

Commit 63a4f38

Browse files
committed
MINOR: Looker: Ingest All Views from Repository (#24558)
1 parent 45a0cb1 commit 63a4f38

File tree

2 files changed

+604
-5
lines changed

2 files changed

+604
-5
lines changed

ingestion/src/metadata/ingestion/source/dashboard/looker/metadata.py

Lines changed: 262 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -448,8 +448,12 @@ def list_datamodels(self) -> Iterable[LookmlModelExplore]:
448448
# Then, gather their information and build the parser
449449
self.parser = all_lookml_models
450450

451+
# Store the models for later processing of standalone views
452+
self._all_lookml_models = all_lookml_models
453+
451454
# Finally, iterate through them to ingest Explores and Views
452455
yield from self.fetch_lookml_explores(all_lookml_models)
456+
453457
except Exception as err:
454458
logger.debug(traceback.format_exc())
455459
logger.error(f"Unexpected error fetching LookML models - {err}")
@@ -487,6 +491,111 @@ def fetch_lookml_explores(
487491
f"Error fetching LookML Explore [{explore_nav.name}] in model [{lookml_model.name}] - {err}"
488492
)
489493

494+
def yield_standalone_datamodels(
495+
self,
496+
) -> Iterable[Either[CreateDashboardDataModelRequest]]:
497+
"""
498+
Post-process method to ingest all views from the cloned repository that
499+
haven't been processed yet. This allows ingesting standalone views that
500+
are not associated with any explore.
501+
502+
This is called as a post-process step after all explores have been processed.
503+
"""
504+
if not self.repository_credentials or not self._project_parsers:
505+
return
506+
507+
if not hasattr(self, "_all_lookml_models") or not self._all_lookml_models:
508+
return
509+
510+
logger.info("Processing all standalone views from cloned repositories")
511+
512+
# Use the first project for standalone views
513+
first_project = (
514+
list(self._project_parsers.keys())[0] if self._project_parsers else None
515+
)
516+
if not first_project:
517+
return
518+
519+
# Get the first model name for naming purposes
520+
first_model_name = (
521+
self._all_lookml_models[0].name if self._all_lookml_models else "default"
522+
)
523+
524+
project_parser = self._project_parsers.get(first_project)
525+
if not project_parser:
526+
return
527+
528+
# Iterate through all cached views
529+
for view_name, view in project_parser._views_cache.items():
530+
# Skip if view was already processed
531+
if view_name in self._views_cache:
532+
logger.debug(f"View [{view_name}] already processed, skipping")
533+
continue
534+
535+
# Check if filtered
536+
if filter_by_datamodel(
537+
self.source_config.dataModelFilterPattern, view_name
538+
):
539+
self.status.filter(view_name, "Data model (View) filtered out.")
540+
continue
541+
542+
try:
543+
logger.info(f"Processing standalone view: {view_name}")
544+
545+
if view.tags and self.source_config.includeTags:
546+
yield from self.yield_data_model_tags(view.tags or [])
547+
548+
datamodel_view_name = f"{first_model_name}_{view.name}_view"
549+
550+
data_model_request = CreateDashboardDataModelRequest(
551+
name=EntityName(datamodel_view_name),
552+
displayName=view.name,
553+
description=(
554+
Markdown(view.description) if view.description else None
555+
),
556+
service=self.context.get().dashboard_service,
557+
tags=get_tag_labels(
558+
metadata=self.metadata,
559+
tags=view.tags or [],
560+
classification_name=LOOKER_TAG_CATEGORY,
561+
include_tags=self.source_config.includeTags,
562+
),
563+
dataModelType=DataModelType.LookMlView.value,
564+
serviceType=DashboardServiceType.Looker.value,
565+
columns=get_columns_from_model(view),
566+
sql=project_parser.parsed_files.get(Includes(view.source_file)),
567+
project=first_project,
568+
)
569+
570+
yield Either(right=data_model_request)
571+
self.register_record_datamodel(datamodel_request=data_model_request)
572+
573+
# Build and cache the view model
574+
view_data_model = self._build_data_model(datamodel_view_name)
575+
self._views_cache[view.name] = view_data_model
576+
577+
# Add lineage for standalone views
578+
yield from self._add_standalone_view_lineage(
579+
view, first_project, first_model_name
580+
)
581+
582+
except ValidationError as err:
583+
yield Either(
584+
left=StackTraceError(
585+
name=view_name,
586+
error=f"Validation error yielding standalone view [{view_name}]: {err}",
587+
stackTrace=traceback.format_exc(),
588+
)
589+
)
590+
except Exception as err:
591+
yield Either(
592+
left=StackTraceError(
593+
name=view_name,
594+
error=f"Error yielding standalone view [{view_name}]: {err}",
595+
stackTrace=traceback.format_exc(),
596+
)
597+
)
598+
490599
def _build_data_model(self, data_model_name):
491600
fqn_datamodel = fqn.build(
492601
self.metadata,
@@ -522,8 +631,15 @@ def yield_bulk_datamodel(
522631
) -> Iterable[Either[CreateDashboardDataModelRequest]]:
523632
"""
524633
Get the Explore and View information and prepare
525-
the model creation request
634+
the model creation request.
635+
636+
After processing all explores, this method also processes standalone views
637+
from the repository that aren't associated with any explore.
526638
"""
639+
# Initialize the flag to track if we've started processing standalone views
640+
if not hasattr(self, "_standalone_views_processed"):
641+
self._standalone_views_processed = False
642+
527643
try:
528644
datamodel_name = build_datamodel_name(model.model_name, model.name)
529645
if filter_by_datamodel(
@@ -612,6 +728,30 @@ def yield_bulk_datamodel(
612728
stackTrace=traceback.format_exc(),
613729
)
614730
)
731+
finally:
732+
# After processing the last explore, process standalone views
733+
# This is a sentinel pattern - we check if this is the last model
734+
if not self._standalone_views_processed and hasattr(
735+
self, "_all_lookml_models"
736+
):
737+
# Count how many explores we've processed
738+
if not hasattr(self, "_explores_processed_count"):
739+
self._explores_processed_count = 0
740+
self._explores_processed_count += 1
741+
742+
# Calculate total explores
743+
total_explores = sum(
744+
len(m.explores) if m.explores else 0
745+
for m in self._all_lookml_models
746+
)
747+
748+
# If this is the last explore, process standalone views
749+
if self._explores_processed_count >= total_explores:
750+
self._standalone_views_processed = True
751+
logger.info(
752+
"All explores processed, now processing standalone views"
753+
)
754+
yield from self.yield_standalone_datamodels()
615755

616756
def _get_explore_sql(self, explore: LookmlModelExplore) -> Optional[str]:
617757
"""
@@ -862,6 +1002,106 @@ def _get_explore_column_lineage(
8621002
continue
8631003
return processed_column_lineage
8641004

1005+
def _add_standalone_view_lineage(
1006+
self, view: LookMlView, project_name: str, model_name: str
1007+
) -> Iterable[Either[AddLineageRequest]]:
1008+
"""
1009+
Add lineage for standalone views that are not associated with explores.
1010+
This handles view-to-table lineage and view-to-view lineage via extends.
1011+
"""
1012+
try:
1013+
# Set the current view data model for lineage processing
1014+
datamodel_view_name = f"{model_name}_{view.name}_view"
1015+
self._view_data_model = self._build_data_model(datamodel_view_name)
1016+
1017+
# Handle view-to-view lineage via extends
1018+
if view.extends__all:
1019+
for extended_views_list in view.extends__all:
1020+
for extended_view_name in extended_views_list:
1021+
extended_view_model = self._views_cache.get(extended_view_name)
1022+
1023+
# If not in cache, try to fetch from OpenMetadata
1024+
if not extended_view_model:
1025+
try:
1026+
# Try with _view suffix first (common pattern for views)
1027+
extended_datamodel_name = (
1028+
f"{model_name}_{extended_view_name}_view"
1029+
)
1030+
extended_view_model = self._build_data_model(
1031+
extended_datamodel_name
1032+
)
1033+
1034+
if extended_view_model:
1035+
logger.debug(
1036+
f"Extended view [{extended_view_name}] found in OpenMetadata for standalone view [{view.name}]"
1037+
)
1038+
except Exception:
1039+
logger.debug(
1040+
f"Extended view [{extended_view_name}] not found in cache or OpenMetadata for standalone view [{view.name}]"
1041+
)
1042+
1043+
if extended_view_model:
1044+
logger.debug(
1045+
f"Building lineage from extended view {extended_view_name} to standalone view {self._view_data_model.name}"
1046+
)
1047+
yield self._get_add_lineage_request(
1048+
from_entity=extended_view_model,
1049+
to_entity=self._view_data_model,
1050+
column_lineage=[],
1051+
)
1052+
1053+
db_service_prefixes = self.get_db_service_prefixes()
1054+
1055+
if view.sql_table_name:
1056+
sql_table_name = self._render_table_name(view.sql_table_name)
1057+
1058+
for db_service_prefix in db_service_prefixes or []:
1059+
db_service_name, *_ = self.parse_db_service_prefix(
1060+
db_service_prefix
1061+
)
1062+
dialect = self._get_db_dialect(db_service_name)
1063+
source_table_name = self._clean_table_name(sql_table_name, dialect)
1064+
self._parsed_views[view.name] = source_table_name
1065+
1066+
column_lineage = self._extract_column_lineage(view)
1067+
1068+
lineage_request = self.build_lineage_request(
1069+
source=source_table_name,
1070+
db_service_prefix=db_service_prefix,
1071+
to_entity=self._view_data_model,
1072+
column_lineage=column_lineage,
1073+
)
1074+
if lineage_request:
1075+
yield lineage_request
1076+
1077+
elif view.derived_table:
1078+
sql_query = view.derived_table.sql
1079+
if not sql_query:
1080+
return
1081+
if find_derived_references(sql_query):
1082+
sql_query = self.replace_derived_references(sql_query)
1083+
if view_references := find_derived_references(sql_query):
1084+
self._add_dependency_edge(view.name, view_references)
1085+
logger.warning(
1086+
f"Not all references are replaced for standalone view [{view.name}]. Parsing it later."
1087+
)
1088+
return
1089+
logger.debug(
1090+
f"Processing standalone view [{view.name}] with SQL: \n[{sql_query}]"
1091+
)
1092+
yield from self._build_lineage_for_view(view.name, sql_query)
1093+
if self._unparsed_views:
1094+
self.build_lineage_for_unparsed_views()
1095+
1096+
except Exception as err:
1097+
yield Either(
1098+
left=StackTraceError(
1099+
name=view.name,
1100+
error=f"Error yielding lineage for standalone view [{view.name}]: {err}",
1101+
stackTrace=traceback.format_exc(),
1102+
)
1103+
)
1104+
8651105
def add_view_lineage(
8661106
self, view: LookMlView, explore: LookmlModelExplore
8671107
) -> Iterable[Either[AddLineageRequest]]:
@@ -897,6 +1137,27 @@ def add_view_lineage(
8971137
for extended_views_list in view.extends__all:
8981138
for extended_view_name in extended_views_list:
8991139
extended_view_model = self._views_cache.get(extended_view_name)
1140+
1141+
# If not in cache, try to fetch from OpenMetadata
1142+
if not extended_view_model:
1143+
try:
1144+
# Try with _view suffix first (common pattern for views)
1145+
extended_datamodel_name = (
1146+
f"{explore.model_name}_{extended_view_name}_view"
1147+
)
1148+
extended_view_model = self._build_data_model(
1149+
extended_datamodel_name
1150+
)
1151+
1152+
if extended_view_model:
1153+
logger.debug(
1154+
f"Extended view [{extended_view_name}] found in OpenMetadata for view [{view.name}]"
1155+
)
1156+
except Exception:
1157+
logger.debug(
1158+
f"Extended view [{extended_view_name}] not found in cache or OpenMetadata for view [{view.name}]"
1159+
)
1160+
9001161
if extended_view_model:
9011162
logger.debug(
9021163
f"Building lineage from extended view {extended_view_name} to view {self._view_data_model.name}"
@@ -906,10 +1167,6 @@ def add_view_lineage(
9061167
to_entity=self._view_data_model,
9071168
column_lineage=[],
9081169
)
909-
else:
910-
logger.debug(
911-
f"Extended view [{extended_view_name}] not found in cache for view [{view.name}]"
912-
)
9131170

9141171
db_service_prefixes = self.get_db_service_prefixes()
9151172

0 commit comments

Comments
 (0)