@@ -13,7 +13,7 @@ use etl::test_utils::test_schema::{
1313 build_expected_users_inserts, get_n_integers_sum, get_users_age_sum_from_rows,
1414 insert_mock_data, insert_users_data, setup_test_database_schema,
1515} ;
16- use etl:: types:: { EventType , PipelineId } ;
16+ use etl:: types:: { Event , EventType , InsertEvent , PipelineId } ;
1717use etl_config:: shared:: BatchConfig ;
1818use etl_postgres:: below_version;
1919use etl_postgres:: replication:: slots:: EtlReplicationSlot ;
@@ -987,3 +987,109 @@ async fn table_without_primary_key_is_errored() {
987987 let events = destination. get_events ( ) . await ;
988988 assert ! ( events. is_empty( ) ) ;
989989}
990+
991+ #[ tokio:: test( flavor = "multi_thread" ) ]
992+ async fn pipeline_respects_column_level_publication ( ) {
993+ init_test_tracing ( ) ;
994+ let database = spawn_source_database ( ) . await ;
995+
996+ // Column filters in publication are only available from Postgres 15+.
997+ if below_version ! ( database. server_version( ) , POSTGRES_15 ) {
998+ eprintln ! ( "Skipping test: PostgreSQL 15+ required for column filters" ) ;
999+ return ;
1000+ }
1001+
1002+ // Create a table with multiple columns including a sensitive 'email' column.
1003+ let table_name = test_table_name ( "users" ) ;
1004+ let table_id = database
1005+ . create_table (
1006+ table_name. clone ( ) ,
1007+ true ,
1008+ & [
1009+ ( "name" , "text not null" ) ,
1010+ ( "age" , "integer not null" ) ,
1011+ ( "email" , "text not null" ) ,
1012+ ] ,
1013+ )
1014+ . await
1015+ . unwrap ( ) ;
1016+
1017+ // Create publication with only a subset of columns (excluding 'email').
1018+ let publication_name = "test_pub" . to_string ( ) ;
1019+ database
1020+ . run_sql ( & format ! (
1021+ "create publication {publication_name} for table {} (id, name, age)" ,
1022+ table_name. as_quoted_identifier( )
1023+ ) )
1024+ . await
1025+ . expect ( "Failed to create publication with column filter" ) ;
1026+
1027+ let state_store = NotifyingStore :: new ( ) ;
1028+ let destination = TestDestinationWrapper :: wrap ( MemoryDestination :: new ( ) ) ;
1029+
1030+ let pipeline_id: PipelineId = random ( ) ;
1031+ let mut pipeline = create_pipeline (
1032+ & database. config ,
1033+ pipeline_id,
1034+ publication_name. clone ( ) ,
1035+ state_store. clone ( ) ,
1036+ destination. clone ( ) ,
1037+ ) ;
1038+
1039+ // Wait for the table to finish syncing.
1040+ let sync_done_notify = state_store
1041+ . notify_on_table_state_type ( table_id, TableReplicationPhaseType :: SyncDone )
1042+ . await ;
1043+
1044+ pipeline. start ( ) . await . unwrap ( ) ;
1045+
1046+ sync_done_notify. notified ( ) . await ;
1047+
1048+ // Wait for two insert events to be processed.
1049+ let insert_events_notify = destination
1050+ . wait_for_events_count ( vec ! [ ( EventType :: Insert , 2 ) ] )
1051+ . await ;
1052+
1053+ // Insert test data with all columns (including email).
1054+ database
1055+ . run_sql ( & format ! (
1056+ "insert into {} (name, age, email) values ('Alice', 25, '[email protected] '), ('Bob', 30, '[email protected] ')" , 1057+ table_name. as_quoted_identifier( )
1058+ ) )
1059+ . await
1060+ . unwrap ( ) ;
1061+
1062+ insert_events_notify. notified ( ) . await ;
1063+
1064+ pipeline. shutdown_and_wait ( ) . await . unwrap ( ) ;
1065+
1066+ // Verify the events and check that only published columns are included.
1067+ let events = destination. get_events ( ) . await ;
1068+ let grouped_events = group_events_by_type_and_table_id ( & events) ;
1069+ let insert_events = grouped_events. get ( & ( EventType :: Insert , table_id) ) . unwrap ( ) ;
1070+ assert_eq ! ( insert_events. len( ) , 2 ) ;
1071+
1072+ // Check that each insert event contains only the published columns (id, name, age).
1073+ // Since Cell values don't include column names, we verify by checking the count.
1074+ for event in insert_events {
1075+ if let Event :: Insert ( InsertEvent { table_row, .. } ) = event {
1076+ // Verify exactly 3 columns (id, name, age).
1077+ // If email was included, there would be 4 values.
1078+ assert_eq ! ( table_row. values. len( ) , 3 ) ;
1079+ }
1080+ }
1081+
1082+ // Also verify the stored table schema only includes published columns.
1083+ let table_schemas = state_store. get_table_schemas ( ) . await ;
1084+ let stored_schema = table_schemas. get ( & table_id) . unwrap ( ) ;
1085+ let column_names: Vec < & str > = stored_schema
1086+ . column_schemas
1087+ . iter ( )
1088+ . map ( |c| c. name . as_str ( ) )
1089+ . collect ( ) ;
1090+ assert ! ( column_names. contains( & "id" ) ) ;
1091+ assert ! ( column_names. contains( & "name" ) ) ;
1092+ assert ! ( column_names. contains( & "age" ) ) ;
1093+ assert ! ( !column_names. contains( & "email" ) ) ;
1094+ assert_eq ! ( stored_schema. column_schemas. len( ) , 3 ) ;
1095+ }
0 commit comments