dlt-hub
diff --git a/‎dlt/common/destination/client.py‎
Lines changed: 132 additions & 29 deletions b/‎dlt/common/destination/client.py‎
Lines changed: 132 additions & 29 deletions
diff --git a/‎dlt/common/schema/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎dlt/common/schema/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎dlt/common/schema/schema.py‎
Lines changed: 17 additions & 4 deletions b/‎dlt/common/schema/schema.py‎
Lines changed: 17 additions & 4 deletions
diff --git a/‎dlt/common/schema/typing.py‎
Lines changed: 1 addition & 0 deletions b/‎dlt/common/schema/typing.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎dlt/destinations/impl/athena/athena.py‎
Lines changed: 7 additions & 2 deletions b/‎dlt/destinations/impl/athena/athena.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎dlt/destinations/impl/bigquery/bigquery.py‎
Lines changed: 4 additions & 1 deletion b/‎dlt/destinations/impl/bigquery/bigquery.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎dlt/destinations/impl/clickhouse/clickhouse.py‎
Lines changed: 4 additions & 1 deletion b/‎dlt/destinations/impl/clickhouse/clickhouse.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎dlt/destinations/impl/databricks/databricks.py‎
Lines changed: 4 additions & 1 deletion b/‎dlt/destinations/impl/databricks/databricks.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎dlt/destinations/impl/dremio/dremio.py‎
Lines changed: 2 additions & 1 deletion b/‎dlt/destinations/impl/dremio/dremio.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎dlt/destinations/impl/duckdb/duck.py‎
Lines changed: 2 additions & 1 deletion b/‎dlt/destinations/impl/duckdb/duck.py‎
Lines changed: 2 additions & 1 deletion
@@ -33,11 +33,14 @@
 from dlt.common.metrics import LoadJobMetrics
 from dlt.common.normalizers.naming import NamingConvention
 
-from dlt.common.schema import Schema, TSchemaTables
+from dlt.common.schema import Schema, TSchemaTables, TSchemaDrop
 from dlt.common.schema.typing import (
+    C_DLT_ID,
     C_DLT_LOAD_ID,
     TLoaderReplaceStrategy,
     TTableFormat,
+    TTableSchemaColumns,
+    TPartialTableSchema,
 )
 from dlt.common.destination.capabilities import DestinationCapabilitiesContext
 from dlt.common.destination.exceptions import (
@@ -539,36 +542,120 @@ def update_stored_schema(
             )
         return expected_update
 
-    def update_stored_schema_destructively(
+    def update_dlt_schema(
         self,
-    ) -> None:
-        """
-        Compare the schema we think we should have (`self.schema`)
-        with what actually exists in the destination, and drop any
-        columns that disappeared.
-        """
-        for table in self.schema.data_tables():
-            table_name = table["name"]
-
-            actual_columns = self._get_actual_columns(table_name)
-            schema_columns = self.schema.get_table_columns(table_name)
-            dropped_columns = set(schema_columns.keys()) - set(actual_columns)
-            if dropped_columns:
-                for dropped_col in dropped_columns:
-                    if schema_columns[dropped_col].get("increment"):
-                        logger.warning(
-                            "An incremental field is being removed from schema."
-                            "You should unset the"
-                            " incremental with `incremental=dlt.sources.incremental.EMPTY`"
-                        )
-                self.schema.drop_columns(table_name, list(dropped_columns))
-
-    def _get_actual_columns(self, table_name: str) -> List[str]:  # noqa: B027, optional override
-        """
-        Return a list of column names that currently exist in the
-        destination for `table_name`.
+        table_names: Iterable[str] = None,
+        dry_run: bool = False,
+    ) -> Optional[TSchemaDrop]:
+        """Updates schema to the storage.
+
+        Compare the schema we think we should have (`self.schema`) with what actually exists in the destination,
+        and drop any tables and/or columns that disappeared.
+
+        Args:
+            table_names (Iterable[str], optional): Check only listed tables. Defaults to None and checks all tables.
+
+        Returns:
+            Optional[TSchemaTables]: Returns an update that was applied to the schema.
         """
-        pass
+        from dlt.destinations.sql_client import WithSqlClient
+
+        if not (isinstance(self, WithTableReflection) and isinstance(self, WithSqlClient)):
+            raise NotImplementedError
+
+        def _diff_between_actual_and_dlt_schema(
+            table_name: str, actual_col_names: set[str], disregard_dlt_columns: bool = True
+        ) -> TPartialTableSchema:
+            """Returns a partial table schema containing columns that exist in the dlt schema
+            but are missing from the actual table. Skips dlt internal columns by default.
+            """
+            col_schemas = self.schema.get_table_columns(table_name)
+
+            # Map escaped -> original names (actual_col_names are escaped)
+            escaped_to_original = {
+                self.sql_client.escape_column_name(col, quote=False): col
+                for col in col_schemas.keys()
+            }
+            dropped_col_names = set(escaped_to_original.keys()) - actual_col_names
+
+            if not dropped_col_names:
+                return {}
+
+            partial_table: TPartialTableSchema = {"name": table_name, "columns": {}}
+
+            for esc_name in dropped_col_names:
+                orig_name = escaped_to_original[esc_name]
+
+                # Athena doesn't have dlt columns in actual columns. Don't drop them anyway.
+                if disregard_dlt_columns and orig_name in [C_DLT_ID, C_DLT_LOAD_ID]:
+                    continue
+
+                col_schema = col_schemas[orig_name]
+                if col_schema.get("increment"):
+                    # We can warn within the for loop,
+                    # since there's only one incremental field per table
+                    logger.warning(
+                        f"An incremental field {orig_name} is being removed from schema."
+                        "You should unset the"
+                        " incremental with `incremental=dlt.sources.incremental.EMPTY`"
+                    )
+                partial_table["columns"][orig_name] = col_schema
+
+            return partial_table if partial_table["columns"] else {}
+
+        tables = table_names if table_names else self.schema.data_table_names()
+
+        table_drops: TSchemaDrop = {}  # includes entire tables to drop
+        column_drops: TSchemaDrop = {}  # includes parts of tables to drop as partial tables
+
+        # 1. Detect what needs to be dropped
+        for table_name in tables:
+            _, actual_col_schemas = list(self.get_storage_tables([table_name]))[0]
+
+            # no actual column schemas ->
+            # table doesn't exist ->
+            # we take entire table schema as a schema drop
+            if not actual_col_schemas:
+                table = self.schema.get_table(table_name)
+                table_drops[table_name] = table
+                continue
+
+            # actual column schemas present ->
+            # we compare actual schemas with dlt ones ->
+            # we take the difference as a partial table
+            else:
+                partial_table = _diff_between_actual_and_dlt_schema(
+                    table_name,
+                    set(actual_col_schemas.keys()),
+                )
+                if partial_table:
+                    column_drops[table_name] = partial_table
+
+        # 2. For entire table drops, we make sure no orphaned tables remain
+        for table_name in table_drops.copy():
+            child_tables = self.schema.get_child_tables(table_name)
+            orphaned_table_names: List[str] = []
+            for child_table in child_tables:
+                if child_table["name"] not in table_drops:
+                    orphaned_table_names.append(child_table["name"])
+            if orphaned_table_names:
+                table_drops.pop(table_name)
+                logger.warning(
+                    f"Removing table '{table_name}' from the dlt schema would leave orphan"
+                    f" table(s): {'.'.join(repr(t) for t in orphaned_table_names)}. Drop these"
+                    " child tables in the destination and sync the dlt schema again."
+                )
+
+        # 3. If it's not a dry run, we actually drop fromt the dlt schema
+        if not dry_run:
+            for table_name in table_drops:
+                self.schema.tables.pop(table_name)
+            for table_name, partial_table in column_drops.items():
+                col_schemas = partial_table["columns"]
+                col_names = [col for col in col_schemas]
+                self.schema.drop_columns(table_name, col_names)
+
+        return {**table_drops, **column_drops}
 
     def prepare_load_table(self, table_name: str) -> PreparedTableSchema:
         """Prepares a table schema to be loaded by filling missing hints and doing other modifications requires by given destination.
@@ -639,6 +726,22 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]:
         pass
 
 
+class WithTableReflection(ABC):
+    @abstractmethod
+    def get_storage_tables(
+        self, table_names: Iterable[str]
+    ) -> Iterable[Tuple[str, TTableSchemaColumns]]:
+        """Uses INFORMATION_SCHEMA to retrieve table and column information for tables in `table_names` iterator.
+        Table names should be normalized according to naming convention and will be further converted to desired casing
+        in order to (in most cases) create case-insensitive name suitable for search in information schema.
+
+        The column names are returned as in information schema. To match those with columns in existing table, you'll need to use
+        `schema.get_new_table_columns` method and pass the correct casing. Most of the casing function are irreversible so it is not
+        possible to convert identifiers into INFORMATION SCHEMA back into case sensitive dlt schema.
+        """
+        pass
+
+
 class WithStagingDataset(ABC):
     """Adds capability to use staging dataset and request it from the loader"""
 
 
@@ -8,13 +8,15 @@
     TColumnHint,
     TColumnSchema,
     TColumnSchemaBase,
+    TSchemaDrop,
 )
 from dlt.common.schema.typing import COLUMN_HINTS
 from dlt.common.schema.schema import Schema, DEFAULT_SCHEMA_CONTRACT_MODE
 from dlt.common.schema.exceptions import DataValidationError
 from dlt.common.schema.utils import verify_schema_hash
 
 __all__ = [
+    "TSchemaDrop",
     "TSchemaUpdate",
     "TSchemaTables",
     "TTableSchema",
 
@@ -463,11 +463,24 @@ def drop_tables(
                 self.data_item_normalizer.remove_table(table_name)
         return result
 
-    def drop_columns(self, table_name: str, column_names: Sequence[str]) -> List[TColumnSchema]:
-        """Drops columns from the table schema and returns the dropped columns"""
+    def drop_columns(self, table_name: str, column_names: Sequence[str]) -> TPartialTableSchema:
+        """Drops columns from the table schema and returns the table schema with the dropped columns"""
+        table: TPartialTableSchema = {"name": table_name}
+        dropped_col_schemas: TTableSchemaColumns = {}
+
+        for col in column_names:
+            col_schema = self._schema_tables[table["name"]]["columns"].pop(col)
+            dropped_col_schemas[col] = col_schema
+
+        table["columns"] = dropped_col_schemas
+        return table
+
+    def get_child_tables(self, table_name: str) -> List[TTableSchema]:
+        """Returns child tables"""
         result = []
-        for col_name in column_names:
-            result.append(self._schema_tables[table_name]["columns"].pop(col_name))
+        for table in self.data_tables():
+            if table.get("parent", None) == table_name:
+                result.append(table)
         return result
 
     def filter_row_with_hint(
 
@@ -315,6 +315,7 @@ class TPartialTableSchema(TTableSchema):
 
 TSchemaTables = Dict[str, TTableSchema]
 TSchemaUpdate = Dict[str, List[TPartialTableSchema]]
+TSchemaDrop = Dict[str, TPartialTableSchema]
 TColumnDefaultHint = Literal["not_null", TColumnHint]
 """Allows using not_null in default hints setting section"""
 
 
@@ -37,7 +37,12 @@
     TSortOrder,
 )
 from dlt.common.destination import DestinationCapabilitiesContext, PreparedTableSchema
-from dlt.common.destination.client import FollowupJobRequest, SupportsStagingDestination, LoadJob
+from dlt.common.destination.client import (
+    FollowupJobRequest,
+    SupportsStagingDestination,
+    LoadJob,
+    WithTableReflection,
+)
 from dlt.destinations.sql_jobs import (
     SqlStagingCopyFollowupJob,
     SqlStagingReplaceFollowupJob,
@@ -191,7 +196,7 @@ def _parse_and_log_lf_response(
         logger.debug(f"Success: {verb} LF tags {lf_tags} to " + resource_msg)
 
 
-class AthenaClient(SqlJobClientWithStagingDataset, SupportsStagingDestination):
+class AthenaClient(SqlJobClientWithStagingDataset, SupportsStagingDestination, WithTableReflection):
     def __init__(
         self,
         schema: Schema,
 
@@ -16,6 +16,7 @@
     RunnableLoadJob,
     SupportsStagingDestination,
     LoadJob,
+    WithTableReflection,
 )
 from dlt.common.json import json
 from dlt.common.runtime.signals import sleep
@@ -174,7 +175,9 @@ def gen_key_table_clauses(
         return sql
 
 
-class BigQueryClient(SqlJobClientWithStagingDataset, SupportsStagingDestination):
+class BigQueryClient(
+    SqlJobClientWithStagingDataset, SupportsStagingDestination, WithTableReflection
+):
     def __init__(
         self,
         schema: Schema,
 
@@ -21,6 +21,7 @@
     RunnableLoadJob,
     FollowupJobRequest,
     LoadJob,
+    WithTableReflection,
 )
 from dlt.common.schema import Schema, TColumnSchema
 from dlt.common.schema.typing import (
@@ -212,7 +213,9 @@ def requires_temp_table_for_delete(cls) -> bool:
         return True
 
 
-class ClickHouseClient(SqlJobClientWithStagingDataset, SupportsStagingDestination):
+class ClickHouseClient(
+    SqlJobClientWithStagingDataset, SupportsStagingDestination, WithTableReflection
+):
     def __init__(
         self,
         schema: Schema,
 
@@ -13,6 +13,7 @@
     RunnableLoadJob,
     SupportsStagingDestination,
     LoadJob,
+    WithTableReflection,
 )
 from dlt.common.configuration.specs import (
     AwsCredentialsWithoutDefaults,
@@ -302,7 +303,9 @@ def gen_delete_from_sql(
         """
 
 
-class DatabricksClient(SqlJobClientWithStagingDataset, SupportsStagingDestination):
+class DatabricksClient(
+    SqlJobClientWithStagingDataset, SupportsStagingDestination, WithTableReflection
+):
     def __init__(
         self,
         schema: Schema,
 
@@ -10,6 +10,7 @@
     SupportsStagingDestination,
     FollowupJobRequest,
     LoadJob,
+    WithTableReflection,
 )
 from dlt.common.schema import TColumnSchema, Schema
 from dlt.common.schema.typing import TColumnType, TTableFormat
@@ -97,7 +98,7 @@ def run(self) -> None:
             """)
 
 
-class DremioClient(SqlJobClientWithStagingDataset, SupportsStagingDestination):
+class DremioClient(SqlJobClientWithStagingDataset, SupportsStagingDestination, WithTableReflection):
     def __init__(
         self,
         schema: Schema,
 
@@ -9,6 +9,7 @@
     RunnableLoadJob,
     HasFollowupJobs,
     LoadJob,
+    WithTableReflection,
 )
 from dlt.common.schema.typing import TColumnSchema, TColumnType, TTableFormat
 from dlt.common.schema.utils import has_default_column_prop_value
@@ -49,7 +50,7 @@ def run(self) -> None:
             )
 
 
-class DuckDbClient(InsertValuesJobClient):
+class DuckDbClient(InsertValuesJobClient, WithTableReflection):
     def __init__(
         self,
         schema: Schema,
Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@`
`9`	`9`	`RunnableLoadJob,`
`10`	`10`	`HasFollowupJobs,`
`11`	`11`	`LoadJob,`
	`12`	`+ WithTableReflection,`
`12`	`13`	`)`
`13`	`14`	`from dlt.common.schema.typing import TColumnSchema, TColumnType, TTableFormat`
`14`	`15`	`from dlt.common.schema.utils import has_default_column_prop_value`
`@@ -49,7 +50,7 @@ def run(self) -> None:`
`49`	`50`	`)`
`50`	`51`
`51`	`52`
`52`		`-class DuckDbClient(InsertValuesJobClient):`
	`53`	`+class DuckDbClient(InsertValuesJobClient, WithTableReflection):`
`53`	`54`	`def __init__(`
`54`	`55`	`self,`
`55`	`56`	`schema: Schema,`