From 5269be5c5113c1d7d1ff331dc4fdecdea92388a1 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Thu, 4 Dec 2025 12:41:35 +1300 Subject: [PATCH 1/4] Run docs build on PRs --- .../workflows/{publish-docs.yml => docs.yml} | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) rename .github/workflows/{publish-docs.yml => docs.yml} (65%) diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/docs.yml similarity index 65% rename from .github/workflows/publish-docs.yml rename to .github/workflows/docs.yml index 817dac3b..0210c784 100644 --- a/.github/workflows/publish-docs.yml +++ b/.github/workflows/docs.yml @@ -1,17 +1,11 @@ -name: Publish Docs +name: Build documentation on: push: - branches: - - master - -permissions: - actions: read - pages: write - id-token: write + pull_request: jobs: - build-and-deploy: + build: runs-on: ubuntu-latest steps: - name: Checkout Repository @@ -42,5 +36,17 @@ jobs: with: path: './docs/_site' + deploy: + if: github.event_name == 'push' && github.ref == 'refs/heads/master' && !github.event.repository.fork + runs-on: ubuntu-latest + needs: build + permissions: + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: - name: Deploy to GitHub Pages + id: deployment uses: actions/deploy-pages@v4 From 139aff6c696c6f85575cdeef8a7903e6e86c3748 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Thu, 4 Dec 2025 12:44:07 +1300 Subject: [PATCH 2/4] Run preprocess docs script --- docs/guides/Arrow.md | 28 ++++++++++++++-------------- docs/guides/Encryption.md | 34 +++++++++++++++++----------------- docs/guides/Nested.md | 10 +++++----- docs/guides/PowerShell.md | 6 +++--- docs/guides/Reading.md | 16 ++++++++-------- docs/guides/RowOriented.md | 4 ++-- docs/guides/TimeSpan.md | 4 ++-- docs/guides/TypeFactories.md | 12 ++++++------ docs/guides/Writing.md | 26 +++++++++++++------------- 9 files changed, 70 insertions(+), 70 deletions(-) diff --git a/docs/guides/Arrow.md b/docs/guides/Arrow.md index d517199f..63b3337b 100644 --- a/docs/guides/Arrow.md +++ b/docs/guides/Arrow.md @@ -4,14 +4,14 @@ The Apache Parquet C++ library provides APIs for reading and writing data in the These are wrapped by ParquetSharp using the [Arrow C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) to allow high performance reading and writing of Arrow data with zero copying of array data between C++ and .NET. -The Arrow API is contained in the `ParquetSharp.Arrow` namespace, +The Arrow API is contained in the @ParquetSharp.Arrow namespace, and included in the [ParquetSharp NuGet package](https://www.nuget.org/packages/ParquetSharp/). ## Reading Arrow data -Reading Parquet data in Arrow format uses a `ParquetSharp.Arrow.FileReader`. -This can be constructed using a file path, a .NET `System.IO.Stream`, -or a subclass of `ParquetSharp.IO.RandomAccessFile`. +Reading Parquet data in Arrow format uses a @ParquetSharp.Arrow.FileReader. +This can be constructed using a file path, a .NET @System.IO.Stream, +or a subclass of @ParquetSharp.IO.RandomAccessFile. In this example, we'll open a file using a path: ```csharp @@ -68,9 +68,9 @@ the reader properties, discussed below. ### Reader properties -The `ParquetSharp.Arrow.FileReader` constructor accepts an instance of -`ParquetSharp.ReaderProperties` to control standard Parquet reading behaviour, -and additionally accepts an instance of `ParquetSharp.Arrow.ArrowReaderProperties` +The @ParquetSharp.Arrow.FileReader constructor accepts an instance of +@ParquetSharp.ReaderProperties to control standard Parquet reading behaviour, +and additionally accepts an instance of @ParquetSharp.Arrow.ArrowReaderProperties to customise Arrow specific behaviour: ```csharp @@ -94,7 +94,7 @@ using var fileReader = new FileReader( ## Writing Arrow data -The `ParquetSharp.Arrow.FileWriter` class allows writing Parquet files +The @ParquetSharp.Arrow.FileWriter class allows writing Parquet files using Arrow format data. In this example we'll walk through writing a file with a timestamp, @@ -134,15 +134,15 @@ RecordBatch GetBatch(int batchNumber) => }, numIds); ``` -Now we create a `ParquetSharp.Arrow.FileWriter`, specifying the path to write to and the +Now we create a @ParquetSharp.Arrow.FileWriter, specifying the path to write to and the file schema: ```csharp using var writer = new FileWriter("data.parquet", schema); ``` -Rather than specifying a file path, we could also write to a .NET `System.IO.Stream` -or a subclass of `ParquetSharp.IO.OutputStream`. +Rather than specifying a file path, we could also write to a .NET @System.IO.Stream +or a subclass of @ParquetSharp.IO.OutputStream. ### Writing data in batches @@ -207,9 +207,9 @@ writer.Close(); ### Writer properties -The `ParquetSharp.Arrow.FileWriter` constructor accepts an instance of -`ParquetSharp.WriterProperties` to control standard Parquet writing behaviour, -and additionally accepts an instance of `ParquetSharp.Arrow.ArrowWriterProperties` +The @ParquetSharp.Arrow.FileWriter constructor accepts an instance of +@ParquetSharp.WriterProperties to control standard Parquet writing behaviour, +and additionally accepts an instance of @ParquetSharp.Arrow.ArrowWriterProperties to customise Arrow specific behaviour: ```csharp diff --git a/docs/guides/Encryption.md b/docs/guides/Encryption.md index 3ed7b7d1..4f3af070 100644 --- a/docs/guides/Encryption.md +++ b/docs/guides/Encryption.md @@ -27,7 +27,7 @@ Double wrapping is enabled by default. For further details, see the [Key Management Tools design document](https://docs.google.com/document/d/1bEu903840yb95k9q2X-BlsYKuXoygE4VnMDl9xz_zhk). -The Key Management Tools API is contained in the `ParquetSharp.Encryption` namespace. +The Key Management Tools API is contained in the @ParquetSharp.Encryption namespace. In order to use this API, a client for a Key Management Service must be implemented: @@ -55,7 +55,7 @@ internal sealed class MyKmsClient : IKmsClient ``` The main entrypoint for the Key Management Tools API is the -`ParquetSharp.Encryption.CryptoFactory` class. +@ParquetSharp.Encryption.CryptoFactory class. This requires a factory method for creating KMS clients, which are cached internally and periodically recreated: @@ -76,7 +76,7 @@ kmsConnectionConfig.KmsInstanceUrl = ...; kmsConnectionConfig.KeyAccessToken = ...; ``` -Then to configure how the file is encrypted, an `ParquetSharp.Encryption.EncryptionConfiguration` is created: +Then to configure how the file is encrypted, an @ParquetSharp.Encryption.EncryptionConfiguration is created: ```c# string footerKeyId = ...; @@ -113,7 +113,7 @@ encryptionConfig.PlaintextFooter = true; ``` The `kmsConnectionConfig` and `encryptionConfiguration` are used to generate -file encryption properties, which are used to build the `ParquetSharp.WriterProperties`: +file encryption properties, which are used to build the @ParquetSharp.WriterProperties: ```c# using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( @@ -126,7 +126,7 @@ using var writerProperties = writerPropertiesBuilder .Build(); ``` -Finally, the Parquet file can be written using the `ParquetSharp.WriterProperties`: +Finally, the Parquet file can be written using the @ParquetSharp.WriterProperties: ```c# Column[] columns = ...; @@ -136,9 +136,9 @@ using var fileWriter = new ParquetFileWriter(parquetFilePath, columns, writerPro ### Reading Encrypted Files -Reading encrypted files requires creating `ParquetSharp.FileDecryptionProperties` -with a `ParquetSharp.Encryption.CryptoFactory`, and adding these to the -`ParquetSharp.ReaderProperties`: +Reading encrypted files requires creating @ParquetSharp.FileDecryptionProperties +with a @ParquetSharp.Encryption.CryptoFactory, and adding these to the +@ParquetSharp.ReaderProperties: ```c# using var decryptionConfig = new DecryptionConfiguration(); @@ -164,7 +164,7 @@ Key material is stored inside the Parquet file metadata by default, but key material can also be stored in separate JSON files alongside Parquet files, to allow rotation of master keys without needing to rewrite the Parquet files. -This is configured in the `ParquetSharp.Encryption.EncryptionConfiguration`: +This is configured in the @ParquetSharp.Encryption.EncryptionConfiguration: ```c# using var encryptionConfig = new EncryptionConfiguration(footerKeyId); @@ -172,8 +172,8 @@ encryptionConfig.InternalKeyMaterial = false; // External key material ``` When using external key material, the path to the Parquet file being written or read -must be specified when creating `ParquetSharp.FileEncryptionProperties` and -`ParquetSharp.FileDecryptionProperties`: +must be specified when creating @ParquetSharp.FileEncryptionProperties and +@ParquetSharp.FileDecryptionProperties: ```c# using var fileEncryptionProperties = cryptoFactory.GetFileEncryptionProperties( @@ -247,7 +247,7 @@ using var fileDecryptionProperties = builder ``` Rather than having to specify decryption keys directly, a -`ParquetSharp.DecryptionKeyRetriever` can be used to retrieve keys +@ParquetSharp.DecryptionKeyRetriever can be used to retrieve keys based on the key metadata, to allow more flexibility: ```c# @@ -298,7 +298,7 @@ using var fileDecryptionProperties = builder If the AAD prefix doesn't match the expected prefix an exception will be thrown when reading the file. -Alternatively, you can implement an `ParquetSharp.AadPrefixVerifier` if you have more complex verification logic: +Alternatively, you can implement an @ParquetSharp.AadPrefixVerifier if you have more complex verification logic: ```c# internal sealed class MyAadVerifier : ParquetSharp.AadPrefixVerifier @@ -324,8 +324,8 @@ using var fileDecryptionProperties = builder ## Arrow API Compatibility -Note that the above examples use the `ParquetSharp.ParquetFileReader` and -`ParquetSharp.ParquetFileWriter` classes, but encryption may also be used with the Arrow API. -The `ParquetSharp.Arrow.FileReader` and `ParquetSharp.Arrow.FileWriter` constructors -accept `ParquetSharp.ReaderProperties` and `ParquetSharp.WriterProperties` parameters +Note that the above examples use the @ParquetSharp.ParquetFileReader and +@ParquetSharp.ParquetFileWriter classes, but encryption may also be used with the Arrow API. +The @ParquetSharp.Arrow.FileReader and @ParquetSharp.Arrow.FileWriter constructors +accept @ParquetSharp.ReaderProperties and @ParquetSharp.WriterProperties parameters respectively, which can have encryption properties configured. diff --git a/docs/guides/Nested.md b/docs/guides/Nested.md index 61ca337a..5a0da19a 100644 --- a/docs/guides/Nested.md +++ b/docs/guides/Nested.md @@ -7,7 +7,7 @@ but the Parquet format can be used to represent data with a complex nested struc In order to write a file with nested columns, we must define the Parquet file schema explicitly as a graph structure using schema nodes, -rather than using ParquetSharp's `ParquetSharp.Column` type. +rather than using ParquetSharp's @ParquetSharp.Column type. Imagine we have the following JSON object we would like to store as Parquet: @@ -41,8 +41,8 @@ or we had a non-null object with a null `message` and null `ids`. Instead, we will represent this data in Parquet with a single `objects` column. -In order to define the schema, we will be using `ParquetSharp.Schema.PrimitiveNode` -and `ParquetSharp.Schema.GroupNode`. +In order to define the schema, we will be using @ParquetSharp.Schema.PrimitiveNode +and @ParquetSharp.Schema.GroupNode. In the Parquet schema, we have one one top-level group node named `objects`, which contains two nested fields, `ids` and `message`. @@ -74,7 +74,7 @@ using var schema = new GroupNode( ### Writing data -We can then create a `ParquetSharp.ParquetFileWriter` with this schema: +We can then create a @ParquetSharp.ParquetFileWriter with this schema: ```csharp using var propertiesBuilder = new WriterPropertiesBuilder(); @@ -85,7 +85,7 @@ using var fileWriter = new ParquetFileWriter("objects.parquet", schema, writerPr When writing data to this file, the leaf-level values written must be nested within ParquetSharp's -`ParquetSharp.Nested` type to indicate they are contained in a group, +@ParquetSharp.Nested type to indicate they are contained in a group, and allow nullable nested structures to be represented unambiguously. For example, both the `objects` and `message` fields are optional, diff --git a/docs/guides/PowerShell.md b/docs/guides/PowerShell.md index ca737a2f..33cd611a 100644 --- a/docs/guides/PowerShell.md +++ b/docs/guides/PowerShell.md @@ -1,6 +1,6 @@ # ParquetSharp in PowerShell -The main requirement to using ParquetSharp from PowerShell is that `ParquetSharpNative.dll` is in the `PATH` or in the same directory as `ParquetSharp.dll`. The following guide shows one possible approach to achieve this: +The main requirement to using ParquetSharp from PowerShell is that @ParquetSharpNative.dll is in the `PATH` or in the same directory as @ParquetSharp.dll. The following guide shows one possible approach to achieve this: ### Installation @@ -23,7 +23,7 @@ Copy-Item -Path ".\lib\System.Runtime.CompilerServices.Unsafe.4.5.3\lib\net461\S Copy-Item -Path ".\lib\System.ValueTuple.4.5.0\lib\net461\System.ValueTuple.dll" -Destination ".\bin" ``` -Finally, copy `ParquetSharp.dll` and `ParquetSharpNative.dll` into `bin`. This will depend on the current version of ParquetSharp, as well as your architecture and OS: +Finally, copy @ParquetSharp.dll and @ParquetSharpNative.dll into `bin`. This will depend on the current version of ParquetSharp, as well as your architecture and OS: ```powershell # Replace path with the appropriate version of ParquetSharp @@ -36,7 +36,7 @@ Copy-Item -Path ".\lib\ParquetSharp.12.1.0\runtimes\win-x64\native\ParquetSharpN The available runtime architectures are `win-x64`, `linux-x64`, `linux-arm64`, `osx-x64`, and `osx-arm64`. ### Usage -Use `Add-Type` to load `ParquetSharp.dll`. Note that we're using custom directories: +Use `Add-Type` to load @ParquetSharp.dll. Note that we're using custom directories: ```powershell # Replace path with the appropriate versions of ParquetSharp diff --git a/docs/guides/Reading.md b/docs/guides/Reading.md index c6d1fc91..74cc1013 100644 --- a/docs/guides/Reading.md +++ b/docs/guides/Reading.md @@ -1,8 +1,8 @@ # Reading Parquet files -The low-level ParquetSharp API provides the `ParquetSharp.ParquetFileReader` class for reading Parquet files. +The low-level ParquetSharp API provides the @ParquetSharp.ParquetFileReader class for reading Parquet files. This is usually constructed from a file path, but may also be constructed from a -`ParquetSharp.IO.ManagedRandomAccessFile`, which wraps a .NET `System.IO.Stream` that supports seeking. +@ParquetSharp.IO.ManagedRandomAccessFile, which wraps a .NET @System.IO.Stream that supports seeking. ```csharp using var fileReader = new ParquetFileReader("data.parquet"); @@ -15,7 +15,7 @@ using var fileReader = new ParquetFileReader(input); ### Obtaining file metadata -The `ParquetSharp.FileMetaData` property of a `ParquetFileReader` exposes information about the Parquet file and its schema: +The @ParquetSharp.FileMetaData property of a `ParquetFileReader` exposes information about the Parquet file and its schema: ```csharp int numColumns = fileReader.FileMetaData.NumColumns; @@ -34,7 +34,7 @@ for (int columnIndex = 0; columnIndex < schema.NumColumns; ++columnIndex) { Parquet files store data in separate row groups, which all share the same schema, so if you wish to read all data in a file, you generally want to loop over all of the row groups -and create a `ParquetSharp.RowGroupReader` for each one: +and create a @ParquetSharp.RowGroupReader for each one: ```csharp for (int rowGroup = 0; rowGroup < fileReader.FileMetaData.NumRowGroups; ++rowGroup) { @@ -45,10 +45,10 @@ for (int rowGroup = 0; rowGroup < fileReader.FileMetaData.NumRowGroups; ++rowGro ### Reading columns directly -The `Column` method of `RowGroupReader` takes an integer column index and returns a `ParquetSharp.ColumnReader` object, +The `Column` method of `RowGroupReader` takes an integer column index and returns a @ParquetSharp.ColumnReader object, which can read primitive values from the column, as well as raw definition level and repetition level data. Usually you will not want to use a `ColumnReader` directly, but instead call its `LogicalReader` method to -create a `ParquetSharp.LogicalColumnReader` that can read logical values. +create a @ParquetSharp.LogicalColumnReader that can read logical values. There are two variations of this `LogicalReader` method; the plain `LogicalReader` method returns an abstract `LogicalColumnReader`, whereas the generic `LogicalReader` method returns a typed `LogicalColumnReader`, which reads values of the specified element type. @@ -96,7 +96,7 @@ When reading Timestamp to a DateTime, ParquetSharp sets the DateTimeKind based o If `IsAdjustedToUtc` is `true` the DateTimeKind will be set to `DateTimeKind.Utc` otherwise it will be set to `DateTimeKind.Unspecified`. -This behavior can be overwritten by setting the AppContext switch `ParquetSharp.ReadDateTimeKindAsUnspecified` to `true`, so the DateTimeKind will be always set to `DateTimeKind.Unspecified` regardless of the value of `IsAdjustedToUtc`. +This behavior can be overwritten by setting the AppContext switch @ParquetSharp.ReadDateTimeKindAsUnspecified to `true`, so the DateTimeKind will be always set to `DateTimeKind.Unspecified` regardless of the value of `IsAdjustedToUtc`. This also matches the old behavior of [ParquetSharp < 7.0.0](https://github.com/G-Research/ParquetSharp/pull/261) ```csharp @@ -117,7 +117,7 @@ Some legacy implementations of Parquet write timestamps using the Int96 primitiv which has been [deprecated](https://issues.apache.org/jira/browse/PARQUET-323). ParquetSharp doesn't support reading Int96 values as .NET `DateTime`s as not all Int96 timestamp values are representable as a `DateTime`. -However, there is limited support for reading raw Int96 values using the `ParquetSharp.Int96` type +However, there is limited support for reading raw Int96 values using the @ParquetSharp.Int96 type and it is left to applications to decide how to interpret these values. ## Long path handling diff --git a/docs/guides/RowOriented.md b/docs/guides/RowOriented.md index 6e2a1db0..354bba32 100644 --- a/docs/guides/RowOriented.md +++ b/docs/guides/RowOriented.md @@ -70,8 +70,8 @@ using (var rowReader = ParquetFile.CreateRowReader("example.parquet")) ## Reading and writing custom types -The row-oriented API supports reading and writing custom types by providing a `ParquetSharp.LogicalTypeFactory` -and a `ParquetSharp.LogicalReadConverterFactory` or `ParquetSharp.LogicalWriteConverterFactory`. +The row-oriented API supports reading and writing custom types by providing a @ParquetSharp.LogicalTypeFactory +and a @ParquetSharp.LogicalReadConverterFactory or @ParquetSharp.LogicalWriteConverterFactory. ### Writing custom types diff --git a/docs/guides/TimeSpan.md b/docs/guides/TimeSpan.md index 5344410e..fbd19b18 100644 --- a/docs/guides/TimeSpan.md +++ b/docs/guides/TimeSpan.md @@ -110,6 +110,6 @@ Note that when using this approach, if you read the file back with ParquetSharp the data will be read as `long` values as there's no way to tell it was originally `TimeSpan` data. To read the data back as `TimeSpan`s, you'll also need to implement -a custom `ParquetSharp.LogicalReadConverterFactory` and use the `LogicalReadOverride` method -or provide a custom `ParquetSharp.LogicalTypeFactory`. +a custom @ParquetSharp.LogicalReadConverterFactory and use the `LogicalReadOverride` method +or provide a custom @ParquetSharp.LogicalTypeFactory. See the [type factories documentation](TypeFactories.md) for more details. diff --git a/docs/guides/TypeFactories.md b/docs/guides/TypeFactories.md index d57a261f..024f0c99 100644 --- a/docs/guides/TypeFactories.md +++ b/docs/guides/TypeFactories.md @@ -10,22 +10,22 @@ This means that: ## API -The API at the core of this is encompassed by the `ParquetSharp.LogicalTypeFactory`, -`ParquetSharp.LogicalReadConverterFactory` and `ParquetSharp.LogicalWriteConverterFactory` classes. +The API at the core of this is encompassed by the @ParquetSharp.LogicalTypeFactory, +@ParquetSharp.LogicalReadConverterFactory and @ParquetSharp.LogicalWriteConverterFactory classes. These classes implement the default ParquetSharp type mapping and conversion logic, but may be subclassed in order to implement custom type mapping logic. The `LogicalTypeFactory` class also allows some customization of the default type mappings without needing to subclass it. -Both `ParquetSharp.ParquetFileReader` and `ParquetSharp.ParquetFileWriter` have +Both @ParquetSharp.ParquetFileReader and @ParquetSharp.ParquetFileWriter have `LogicalTypeFactory` properties that can be set to an instance of the `LogicalTypeFactory` class, while `LogicalReadConverterFactory` is only used by `ParquetFileReader`, and `LogicalWriteConverterFactory` is only used by `ParquetFileWriter`. Whenever the user uses a custom type to read or write values to a Parquet file, a `LogicalReadConverterFactory` or `LogicalWriteConverterFactory` needs to be provided, respectively. -This converter factory tells to the `ParquetSharp.LogicalColumnReader` or -`ParquetSharp.LogicalColumnWriter` how to convert between the user's custom type and a physical type +This converter factory tells to the @ParquetSharp.LogicalColumnReader or +@ParquetSharp.LogicalColumnWriter how to convert between the user's custom type and a physical type that is understood by Parquet. On top of that, if the custom type is used for creating the schema (when writing), @@ -33,7 +33,7 @@ or if accessing a `LogicalColumnReader` or `LogicalColumnWriter` without explici (e.g. `columnWriter.LogicalReaderOverride()`), then a `LogicalTypeFactory` is needed in order to establish the proper logical type mapping. -In other words, the `LogicalTypeFactory` is required if the user provides a `ParquetSharp.Column` class with a custom type when writing, +In other words, the `LogicalTypeFactory` is required if the user provides a @ParquetSharp.Column class with a custom type when writing, or gets the `LogicalColumnReader` or `LogicalColumnWriter` via the non type-overriding methods (in which case the factory is needed to know the full type of the logical column reader/writer). The corresponding converter factory is always needed if using a custom type that the default converter doesn't know how to handle. diff --git a/docs/guides/Writing.md b/docs/guides/Writing.md index 66e246e0..20514899 100644 --- a/docs/guides/Writing.md +++ b/docs/guides/Writing.md @@ -1,6 +1,6 @@ # Writing Parquet files -The low-level ParquetSharp API provides the `ParquetSharp.ParquetFileWriter` class for writing Parquet files. +The low-level ParquetSharp API provides the @ParquetSharp.ParquetFileWriter class for writing Parquet files. ## Defining the schema @@ -8,7 +8,7 @@ When writing a Parquet file, you must define the schema up-front, which specifie in the file along with their names and types. ParquetSharp provides a convenient higher level API for defining the schema as an array -of `ParquetSharp.Column` objects. +of @ParquetSharp.Column objects. A `Column` can be constructed using only a name and a type parameter that is used to determine the logical Parquet type to write: @@ -23,13 +23,13 @@ var columns = new Column[] using var file = new ParquetFileWriter("float_timeseries.parquet", columns); ``` -The schema can also be defined using a graph of `ParquetSharp.Schema.Node` instances, -starting from a root `ParquetSharp.Schema.GroupNode`. For concrete examples, see [How to write a file with nested columns](Nested.md). +The schema can also be defined using a graph of @ParquetSharp.Schema.Node instances, +starting from a root @ParquetSharp.Schema.GroupNode. For concrete examples, see [How to write a file with nested columns](Nested.md). ### Overriding logical types For more control over how values are represented in the Parquet file, -you can pass a `ParquetSharp.LogicalType` instance as the `logicalTypeOverride` parameter of the `Column` constructor. +you can pass a @ParquetSharp.LogicalType instance as the `logicalTypeOverride` parameter of the `Column` constructor. For example, you may wish to write times or timestamps with millisecond resolution rather than the default microsecond resolution: @@ -50,7 +50,7 @@ var decimalColumn = new Column("Values", LogicalType.Decimal(precision: ### Metadata As well as defining the file schema, you may optionally provide key-value metadata that is stored in the file when creating -a `ParquetSharp.ParquetFileWriter`: +a @ParquetSharp.ParquetFileWriter: ```csharp var metadata = new Dictionary @@ -61,14 +61,14 @@ using var file = new ParquetFileWriter("float_timeseries.parquet", columns, keyV ``` `ParquetFileWriter` constructor overrides are provided that allow specifying the type of compression to use, or for more -fine-grained control over how files are written, you can provide a `ParquetSharp.WriterProperties` instance, which can -be constructed with a `ParquetSharp.WriterPropertiesBuilder`. +fine-grained control over how files are written, you can provide a @ParquetSharp.WriterProperties instance, which can +be constructed with a @ParquetSharp.WriterPropertiesBuilder. This allows defining the compression and encoding on a per-column basis for example, or configuring file encryption. ## Writing to a stream -As well as writing to a file path, ParquetSharp supports writing to a .NET `System.IO.Stream` using a -`ParquetSharp.IO.ManagedOutputStream`: +As well as writing to a file path, ParquetSharp supports writing to a .NET @System.IO.Stream using a +@ParquetSharp.IO.ManagedOutputStream: ```csharp using (var stream = new FileStream("float_timeseries.parquet", FileMode.Create)) @@ -104,15 +104,15 @@ using (var valueWriter = rowGroup.NextColumn().LogicalWriter()) } ``` -Once all data for a row group has been written and the `ParquetSharp.RowGroupWriter` disposed, +Once all data for a row group has been written and the @ParquetSharp.RowGroupWriter disposed, you may append another row group to the file and repeat the row group writing process. -The `NextColumn` method of `RowGroupWriter` returns a `ParquetSharp.ColumnWriter`, which writes physical values to the file, +The `NextColumn` method of `RowGroupWriter` returns a @ParquetSharp.ColumnWriter, which writes physical values to the file, and can write definition level and repetition level data to support nullable and array values. ### Using LogicalColumnWriter -Rather than working with a `ColumnWriter` directly, it's usually more convenient to create a `ParquetSharp.LogicalColumnWriter` +Rather than working with a `ColumnWriter` directly, it's usually more convenient to create a @ParquetSharp.LogicalColumnWriter with the `ColumnWriter.LogicalWriter` method. This allows writing an array or `ReadOnlySpan` of `TElement` to the column data, where `TElement` is the .NET type corresponding to the column's logical element type. From 41c0d53b5c35e32f024baef6f8bf93c0a1346a50 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Thu, 4 Dec 2025 12:44:26 +1300 Subject: [PATCH 3/4] Remove preprocess docs script --- .github/workflows/docs.yml | 8 ------- docs/tools/preprocess_docs.py | 43 ----------------------------------- 2 files changed, 51 deletions(-) delete mode 100644 docs/tools/preprocess_docs.py diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0210c784..481194da 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -11,14 +11,6 @@ jobs: - name: Checkout Repository uses: actions/checkout@v6 - - name: Setup Python - uses: actions/setup-python@v6 - with: - python-version: 3.x - - - name: Run Preprocessing Script - run: python docs/tools/preprocess_docs.py - - name: Setup .NET uses: actions/setup-dotnet@v5 with: diff --git a/docs/tools/preprocess_docs.py b/docs/tools/preprocess_docs.py deleted file mode 100644 index 4ac2bcb2..00000000 --- a/docs/tools/preprocess_docs.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -import re - -def process_markdown_file(filepath): - ''' - Preprocesses a markdown file by replacing inline code blocks with a special token, - for namespaces starting with "ParquetSharp" or "System". - - Args: - filepath (str): The path to the markdown file. - ''' - with open(filepath, 'r', encoding='utf-8') as file: - content = file.read() - - def replace_namespace(match): - code = match.group(1) - if code.startswith("ParquetSharp") or code.startswith("System"): - return f"@{code}" - return f"`{code}`" - - processed_content = re.sub(r'(? Date: Thu, 4 Dec 2025 12:45:35 +1300 Subject: [PATCH 4/4] Warnings as errors when building docs --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 481194da..12307ea5 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -20,7 +20,7 @@ jobs: run: dotnet tool update -g docfx - name: Build Documentation - run: docfx docfx.json + run: docfx --warningsAsErrors docfx.json working-directory: ./docs - name: Upload Site Artifact