@@ -81,25 +81,68 @@ std::shared_ptr<Field> FieldForArray(const std::shared_ptr<Array>& array,
8181}
8282
8383std::vector<WriteConfig> GetWriteConfigurations () {
84+   auto  default_properties_builder = [] {
85+     auto  builder = WriterProperties::Builder ();
86+     //  Override current default of 1MB
87+     builder.data_pagesize (10'000 );
88+     //  Reduce max dictionary page size so that less pages are dict-encoded.
89+     builder.dictionary_pagesize_limit (1'000 );
90+     //  Emit various physical types for decimal columns
91+     builder.enable_store_decimal_as_integer ();
92+     //  DataPageV2 has more interesting features such as selective compression
93+     builder.data_page_version (parquet::ParquetDataPageVersion::V2);
94+     return  builder;
95+   };
96+ 
97+   auto  default_arrow_properties_builder = [] {
98+     auto  builder = ArrowWriterProperties::Builder ();
99+     //  Store the Arrow schema so as to exercise more data types when reading
100+     builder.store_schema ();
101+     return  builder;
102+   };
103+ 
84104  //  clang-format off
85-   auto  w_brotli = WriterProperties::Builder ()
86-       .disable_dictionary (" no_dict"  )
87-       ->compression (" compressed"  , Compression::BROTLI)
88-       //  Override current default of 1MB
89-       ->data_pagesize (20'000 )
90-       //  Reduce max dictionary page size so that less pages are dict-encoded.
91-       ->dictionary_pagesize_limit (1'000 )
92-       //  Emit various physical types for decimal columns
93-       ->enable_store_decimal_as_integer ()
105+   auto  w_uncompressed = default_properties_builder ()
106+       .build ();
107+   //  compressed columns with dictionary disabled
108+   auto  w_brotli = default_properties_builder ()
109+       .disable_dictionary ()
110+       ->compression (Compression::BROTLI)
111+       ->build ();
112+   auto  w_gzip = default_properties_builder ()
113+       .disable_dictionary ()
114+       ->compression (Compression::GZIP)
94115      ->build ();
95-   //  Store the Arrow schema so as to exercise more data types when reading 
96-   auto  a_default = ArrowWriterProperties::Builder{} 
97-       . store_schema ( )
116+   auto  w_lz4 =  default_properties_builder () 
117+       . disable_dictionary () 
118+       -> compression (Compression::LZ4 )
98119      ->build ();
120+   auto  w_snappy = default_properties_builder ()
121+       .disable_dictionary ()
122+       ->compression (Compression::SNAPPY)
123+       ->build ();
124+   auto  w_zstd = default_properties_builder ()
125+       .disable_dictionary ()
126+       ->compression (Compression::ZSTD)
127+       ->build ();
128+   //  v1 data pages
129+   auto  w_pages_v1 = default_properties_builder ()
130+       .disable_dictionary ()
131+       ->compression (Compression::LZ4)
132+       ->data_page_version (parquet::ParquetDataPageVersion::V1)
133+       ->build ();
134+ 
135+   auto  a_default = default_arrow_properties_builder ().build ();
99136  //  clang-format on
100137
101138  std::vector<WriteConfig> configs;
139+   configs.push_back ({w_uncompressed, a_default});
102140  configs.push_back ({w_brotli, a_default});
141+   configs.push_back ({w_gzip, a_default});
142+   configs.push_back ({w_lz4, a_default});
143+   configs.push_back ({w_snappy, a_default});
144+   configs.push_back ({w_zstd, a_default});
145+   configs.push_back ({w_pages_v1, a_default});
103146  return  configs;
104147}
105148
@@ -255,8 +298,6 @@ Result<std::vector<Column>> ExampleColumns(int32_t length,
255298
256299  //  TODO extension types: UUID, JSON, GEOMETRY, GEOGRAPHY
257300
258-   //  A non-dict-encoded column (see GetWriteConfigurations)
259-   columns.push_back ({" no_dict"  , gen.String (length, 0 , 30 , null_probability)});
260301  //  A column that should be quite compressible (see GetWriteConfigurations)
261302  columns.push_back ({" compressed"  , gen.Int64 (length, -10 , 10 , null_probability)});
262303
0 commit comments