dlt-hub · anuunchin · Jul 17, 2025 · Jul 24, 2025 · Sep 1, 2025 · Sep 1, 2025
diff --git a/dlt/common/destination/client.py b/dlt/common/destination/client.py
@@ -33,12 +33,16 @@
 from dlt.common.metrics import LoadJobMetrics
 from dlt.common.normalizers.naming import NamingConvention
 
-from dlt.common.schema import Schema, TSchemaTables
+from dlt.common.schema import Schema, TSchemaTables, TSchemaDrop
 from dlt.common.schema.typing import (
+    C_DLT_ID,
     C_DLT_LOAD_ID,
     TLoaderReplaceStrategy,
     TTableFormat,
+    TTableSchemaColumns,
+    TPartialTableSchema,
 )
+from dlt.common.schema.utils import get_nested_tables
 from dlt.common.destination.capabilities import DestinationCapabilitiesContext
 from dlt.common.destination.exceptions import (
     DestinationSchemaTampered,
@@ -608,6 +612,20 @@ def get_stored_state(self, pipeline_name: str) -> Optional[StateInfo]:
         pass
 
 
+class WithTableReflection(ABC):
+    @abstractmethod
+    def get_storage_tables(
+        self, table_names: Iterable[str]
+    ) -> Iterable[Tuple[str, TTableSchemaColumns]]:
+        """Retrieves table and column information for the specified tables.
+
+        Returns an iterator of tuples (table_name, columns_dict) where columns_dict
+        contains column schemas for existing tables, or is empty for non-existent tables.
+        Implementations use database introspection (INFORMATION_SCHEMA, table reflection) or file metadata.
+        """
+        pass
+
+
 class WithStagingDataset(ABC):
     """Adds capability to use staging dataset and request it from the loader"""
 

diff --git a/dlt/common/destination/exceptions.py b/dlt/common/destination/exceptions.py
@@ -243,3 +243,11 @@ def __init__(self, pipeline_name: str, destination_name: str) -> None:
             f"Filesystem Client not available for destination `{destination_name}` in pipeline"
             f" `{pipeline_name}`",
         )
+
+
+class DestinationTableReflectionNotSupported(DestinationTerminalException):
+    def __init__(self, destination_name: str) -> None:
+        super().__init__(
+            f"Destination `{destination_name}` does not support table reflection. "
+            "Schema synchronization from destination is not available for this destination type."
+        )
diff --git a/dlt/common/libs/deltalake.py b/dlt/common/libs/deltalake.py
@@ -6,9 +6,9 @@
 from dlt import version, Pipeline
 from dlt.common import logger
 from dlt.common.libs.pyarrow import pyarrow as pa
-from dlt.common.libs.pyarrow import cast_arrow_schema_types
+from dlt.common.libs.pyarrow import cast_arrow_schema_types, py_arrow_to_table_schema_columns
 from dlt.common.libs.utils import load_open_tables
-from dlt.common.schema.typing import TWriteDisposition, TTableSchema
+from dlt.common.schema.typing import TWriteDisposition, TTableSchema, TTableSchemaColumns
 from dlt.common.schema.utils import get_first_column_name_with_prop, get_columns_names_with_prop
 from dlt.common.exceptions import MissingDependencyException, ValueErrorWithKnownValues
 from dlt.common.storages import FilesystemConfiguration
@@ -217,3 +217,8 @@ def evolve_delta_table_schema(delta_table: DeltaTable, arrow_schema: pa.Schema)
     if new_fields:
         delta_table.alter.add_columns(new_fields)
     return delta_table
+
+
+def get_table_columns(table: DeltaTable) -> TTableSchemaColumns:
+    arrow_schema = table.schema().to_pyarrow()
+    return py_arrow_to_table_schema_columns(arrow_schema)
diff --git a/dlt/common/libs/pyiceberg.py b/dlt/common/libs/pyiceberg.py
@@ -7,10 +7,10 @@
 from dlt.common import logger
 from dlt.common.destination.exceptions import DestinationUndefinedEntity
 from dlt.common.time import precise_time
-from dlt.common.libs.pyarrow import cast_arrow_schema_types
+from dlt.common.libs.pyarrow import cast_arrow_schema_types, py_arrow_to_table_schema_columns
 from dlt.common.libs.utils import load_open_tables
 from dlt.common.pipeline import SupportsPipeline
-from dlt.common.schema.typing import TWriteDisposition, TTableSchema
+from dlt.common.schema.typing import TWriteDisposition, TTableSchema, TTableSchemaColumns
 from dlt.common.schema.utils import get_first_column_name_with_prop, get_columns_names_with_prop
 from dlt.common.utils import assert_min_pkg_version
 from dlt.common.exceptions import MissingDependencyException
@@ -248,3 +248,8 @@ def make_location(path: str, config: FilesystemConfiguration) -> str:
         # pyiceberg cannot deal with windows absolute urls
         location = location.replace("file:///", "file://")
     return location
+
+
+def get_table_columns(table: IcebergTable) -> TTableSchemaColumns:
+    arrow_schema = table.schema().as_arrow()
+    return py_arrow_to_table_schema_columns(arrow_schema)
diff --git a/dlt/common/schema/__init__.py b/dlt/common/schema/__init__.py
@@ -8,13 +8,15 @@
     TColumnHint,
     TColumnSchema,
     TColumnSchemaBase,
+    TSchemaDrop,
 )
 from dlt.common.schema.typing import COLUMN_HINTS
 from dlt.common.schema.schema import Schema, DEFAULT_SCHEMA_CONTRACT_MODE
 from dlt.common.schema.exceptions import DataValidationError
 from dlt.common.schema.utils import verify_schema_hash
 
 __all__ = [
+    "TSchemaDrop",
     "TSchemaUpdate",
     "TSchemaTables",
     "TTableSchema",

diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py
@@ -367,14 +367,26 @@ def drop_tables(
     ) -> List[TTableSchema]:
         """Drops tables from the schema and returns the dropped tables"""
         result = []
-        # TODO: make sure all nested tables to table_names are also dropped
+        # TODO: make sure all nested tables to table_names are also dropped,
         for table_name in table_names:
             table = self.get_table(table_name)
             if table and (not seen_data_only or utils.has_table_seen_data(table)):
                 result.append(self._schema_tables.pop(table_name))
                 self.data_item_normalizer.remove_table(table_name)
         return result
 
+    def drop_columns(self, table_name: str, column_names: Sequence[str]) -> TPartialTableSchema:
+        """Drops columns from the table schema in place and returns the table schema with the dropped columns"""
+        table: TPartialTableSchema = {"name": table_name}
+        dropped_col_schemas: TTableSchemaColumns = {}
+
+        for col in column_names:
+            col_schema = self._schema_tables[table["name"]]["columns"].pop(col)
+            dropped_col_schemas[col] = col_schema
+
+        table["columns"] = dropped_col_schemas
+        return table
+
     def filter_row_with_hint(
         self, table_name: str, hint_type: TColumnDefaultHint, row: StrAny
     ) -> StrAny:

diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py
@@ -315,6 +315,7 @@ class TPartialTableSchema(TTableSchema):
 
 TSchemaTables = Dict[str, TTableSchema]
 TSchemaUpdate = Dict[str, List[TPartialTableSchema]]
+TSchemaDrop = Dict[str, TPartialTableSchema]
 TColumnDefaultHint = Literal["not_null", TColumnHint]
 """Allows using not_null in default hints setting section"""
 

diff --git a/dlt/common/warnings.py b/dlt/common/warnings.py
@@ -65,6 +65,15 @@ def __init__(self, message: str, *args: typing.Any, expected_due: VersionString
         )
 
 
+class Dlt1160DeprecationWarning(DltDeprecationWarning):
+    V1160 = semver.Version.parse("1.16.0")
+
+    def __init__(self, message: str, *args: typing.Any, expected_due: VersionString = None) -> None:
+        super().__init__(
+            message, *args, since=Dlt1160DeprecationWarning.V1160, expected_due=expected_due
+        )
+
+
 # show dlt deprecations once
 warnings.simplefilter("once", DltDeprecationWarning)
 

diff --git a/dlt/destinations/impl/athena/athena.py b/dlt/destinations/impl/athena/athena.py
@@ -37,7 +37,11 @@
     TSortOrder,
 )
 from dlt.common.destination import DestinationCapabilitiesContext, PreparedTableSchema
-from dlt.common.destination.client import FollowupJobRequest, SupportsStagingDestination, LoadJob
+from dlt.common.destination.client import (
+    FollowupJobRequest,
+    SupportsStagingDestination,
+    LoadJob,
+)
 from dlt.destinations.sql_jobs import (
     SqlStagingCopyFollowupJob,
     SqlStagingReplaceFollowupJob,

diff --git a/dlt/destinations/impl/destination/destination.py b/dlt/destinations/impl/destination/destination.py
@@ -8,7 +8,7 @@
 from dlt.common.storages.load_storage import ParsedLoadJobFileName
 from dlt.common.configuration import create_resolved_partial
 
-from dlt.common.schema import Schema, TSchemaTables
+from dlt.common.schema import Schema, TSchemaTables, TSchemaDrop
 from dlt.common.destination import DestinationCapabilitiesContext
 
 from dlt.destinations.impl.destination.configuration import CustomDestinationClientConfiguration

diff --git a/dlt/destinations/impl/dummy/dummy.py b/dlt/destinations/impl/dummy/dummy.py
@@ -15,7 +15,7 @@
 import time
 from dlt.common.metrics import LoadJobMetrics
 from dlt.common.pendulum import pendulum
-from dlt.common.schema import Schema, TSchemaTables
+from dlt.common.schema import Schema, TSchemaTables, TSchemaDrop
 from dlt.common.storages import FileStorage
 from dlt.common.storages.load_package import LoadJobInfo
 from dlt.common.destination import DestinationCapabilitiesContext

diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py
@@ -23,10 +23,13 @@
 from dlt.common.metrics import LoadJobMetrics
 from dlt.common.schema.exceptions import TableNotFound
 from dlt.common.schema.typing import (
+    C_DLT_ID,
     C_DLT_LOAD_ID,
     C_DLT_LOADS_TABLE_LOAD_ID,
     TTableFormat,
     TTableSchemaColumns,
+    TSchemaDrop,
+    TPartialTableSchema,
 )
 from dlt.common.storages.exceptions import (
     CurrentLoadPackageStateNotAvailable,
@@ -60,6 +63,7 @@
     StorageSchemaInfo,
     StateInfo,
     LoadJob,
+    WithTableReflection,
 )
 from dlt.common.destination.exceptions import (
     DestinationUndefinedEntity,
@@ -279,6 +283,7 @@ class FilesystemClient(
     WithStagingDataset,
     WithStateSync,
     SupportsOpenTables,
+    WithTableReflection,
 ):
     fs_client: AbstractFileSystem
     # a path (without the scheme) to a location in the bucket where dataset is present
@@ -468,7 +473,12 @@ def drop_tables(self, *tables: str, delete_schema: bool = True) -> None:
     def get_storage_tables(
         self, table_names: Iterable[str]
     ) -> Iterable[Tuple[str, TTableSchemaColumns]]:
-        """Yields tables that have files in storage, returns columns from current schema"""
+        """Yield (table_name, column_schemas) pairs for tables that have files in storage.
+
+        For Delta and Iceberg tables, the columns present in the actual table metadata
+        are returned. For tables using regular file formats, the column schemas come from the
+        dlt schema instead, since their real schema cannot be reflected directly.
+        """
         for table_name in table_names:
             table_dir = self.get_table_dir(table_name)
             if (
@@ -478,7 +488,34 @@ def get_storage_tables(
                 and len(self.list_table_files(table_name)) > 0
             ):
                 if table_name in self.schema.tables:
-                    yield (table_name, self.schema.get_table_columns(table_name))
+                    # If it's an open table, only actually exsiting columns
+                    if self.is_open_table("iceberg", table_name):
+                        from dlt.common.libs.pyiceberg import (
+                            get_table_columns as get_iceberg_table_columns,
+                        )
+
+                        iceberg_table = self.load_open_table("iceberg", table_name)
+                        col_schemas = get_iceberg_table_columns(iceberg_table)
+                        yield (table_name, col_schemas)
+
+                    elif self.is_open_table("delta", table_name):
+                        from dlt.common.libs.deltalake import (
+                            get_table_columns as get_delta_table_columns,
+                        )
+
+                        delta_table = self.load_open_table("delta", table_name)
+                        col_schemas = get_delta_table_columns(delta_table)
+                        yield (table_name, col_schemas)
+
+                    else:
+                        logger.warning(
+                            f"Table '{table_name}' does not use a table format and does not support"
+                            " true schema reflection. Returning column schemas from the dlt"
+                            " schema, which may be stale if the underlying files were manually"
+                            " modified. "
+                        )
+                        yield (table_name, self.schema.get_table_columns(table_name))
+
                 else:
                     yield (table_name, {"_column": {}})
             else:

diff --git a/dlt/destinations/job_client_impl.py b/dlt/destinations/job_client_impl.py
@@ -27,6 +27,7 @@
 from dlt.common.destination.utils import resolve_replace_strategy
 from dlt.common.json import json
 from dlt.common.schema.typing import (
+    C_DLT_ID,
     C_DLT_LOAD_ID,
     C_DLT_LOADS_TABLE_LOAD_ID,
     COLUMN_HINTS,
@@ -44,7 +45,13 @@
 from dlt.common.utils import read_dialect_and_sql
 from dlt.common.storages import FileStorage
 from dlt.common.storages.load_package import LoadJobInfo, ParsedLoadJobFileName
-from dlt.common.schema import TColumnSchema, Schema, TTableSchemaColumns, TSchemaTables
+from dlt.common.schema import (
+    TColumnSchema,
+    Schema,
+    TTableSchemaColumns,
+    TSchemaTables,
+    TSchemaDrop,
+)
 from dlt.common.schema import TColumnHint
 from dlt.common.destination.client import (
     PreparedTableSchema,
@@ -60,6 +67,7 @@
     JobClientBase,
     HasFollowupJobs,
     CredentialsConfiguration,
+    WithTableReflection,
 )
 
 from dlt.destinations.exceptions import DatabaseUndefinedRelation
@@ -74,6 +82,7 @@
     info_schema_null_to_bool,
     verify_schema_merge_disposition,
     verify_schema_replace_disposition,
+    update_dlt_schema,
 )
 
 import sqlglot
@@ -240,7 +249,7 @@ def __init__(
         self._bucket_path = ReferenceFollowupJobRequest.resolve_reference(file_path)
 
 
-class SqlJobClientBase(WithSqlClient, JobClientBase, WithStateSync):
+class SqlJobClientBase(WithSqlClient, JobClientBase, WithStateSync, WithTableReflection):
     def __init__(
         self,
         schema: Schema,