@@ -585,6 +585,82 @@ More specifically, Parquet C++ supports:
585
585
* EncryptionWithFooterKey and EncryptionWithColumnKey modes.
586
586
* Encrypted Footer and Plaintext Footer modes.
587
587
588
+ Configuration
589
+ ~~~~~~~~~~~~~
590
+
591
+ An example for writing a dataset using encrypted Parquet file format:
592
+
593
+ .. code-block :: cpp
594
+
595
+ #include <arrow/util/logging.h>
596
+
597
+ #include "arrow/dataset/file_parquet.h"
598
+ #include "arrow/dataset/parquet_encryption_config.h"
599
+ #include "arrow/testing/gtest_util.h"
600
+ #include "parquet/encryption/crypto_factory.h"
601
+
602
+ using arrow::internal::checked_pointer_cast;
603
+
604
+ auto crypto_factory = std::make_shared<parquet::encryption::CryptoFactory>();
605
+ parquet::encryption::KmsClientFactory kms_client_factory = ...;
606
+ crypto_factory->RegisterKmsClientFactory(std::move(kms_client_factory));
607
+ auto kms_connection_config = std::make_shared<parquet::encryption::KmsConnectionConfig>();
608
+
609
+ // Set write options with encryption configuration.
610
+ auto encryption_config =
611
+ std::make_shared<parquet::encryption::EncryptionConfiguration>(
612
+ std::string("footer_key"));
613
+ encryption_config->column_keys = "col_key: a";
614
+ auto parquet_encryption_config = std::make_shared<ParquetEncryptionConfig>();
615
+ // Directly assign shared_ptr objects to ParquetEncryptionConfig members
616
+ parquet_encryption_config->crypto_factory = crypto_factory;
617
+ parquet_encryption_config->kms_connection_config = kms_connection_config;
618
+ parquet_encryption_config->encryption_config = std::move(encryption_config);
619
+
620
+ auto file_format = std::make_shared<ParquetFileFormat>();
621
+ auto parquet_file_write_options =
622
+ checked_pointer_cast<ParquetFileWriteOptions>(file_format->DefaultWriteOptions());
623
+ parquet_file_write_options->parquet_encryption_config =
624
+ std::move(parquet_encryption_config);
625
+
626
+ // Write dataset.
627
+ arrow::Table table = ...;
628
+ auto dataset = std::make_shared<InMemoryDataset>(table);
629
+ EXPECT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
630
+ EXPECT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
631
+
632
+ FileSystemDatasetWriteOptions write_options;
633
+ write_options.file_write_options = parquet_file_write_options;
634
+ write_options.base_dir = "example.parquet";
635
+ ARROW_CHECK_OK(FileSystemDataset::Write(write_options, std::move(scanner)));
636
+
637
+ Column encryption is configured by setting ``encryption_config->column_keys `` to a string
638
+ of the format ``"masterKeyID:colName,colName;masterKeyID:colName..." ``.
639
+
640
+ Encrypting columns that have nested fields (for instance struct, map, or even list data types)
641
+ require configuring column keys for the inner fields, not the column itself.
642
+ Configuring a column key for the column itself causes this error (here column name is ``col ``):
643
+
644
+ .. code-block ::
645
+
646
+ OSError: Encrypted column col not in file schema
647
+
648
+ An example encryption configuration for columns with nested fields:
649
+
650
+ .. code-block :: cpp
651
+
652
+ auto table_schema = schema({
653
+ field("ListColumn", list(int32())),
654
+ field("MapColumn", map(utf8(), int32())),
655
+ field("StructColumn", struct_({field("f1", int32()), field("f2", utf8())})),
656
+ });
657
+
658
+ encryption_config->column_keys = "column_key_name: "
659
+ "ListColumn.list.element, "
660
+ "MapColumn.key_value.key, MapColumn.key_value.value, "
661
+ "StructColumn.f1, StructColumn.f2"
662
+
663
+
588
664
Miscellaneous
589
665
-------------
590
666
0 commit comments