@@ -14,6 +14,8 @@ import type {
14
14
LazyJoinOptions ,
15
15
LazyOptions ,
16
16
LazySameNameColumnJoinOptions ,
17
+ SinkIpcOptions ,
18
+ SinkJsonOptions ,
17
19
SinkParquetOptions ,
18
20
} from "../types" ;
19
21
import {
@@ -616,10 +618,6 @@ export interface LazyDataFrame<S extends Schema = any>
616
618
*
617
619
* Evaluate the query in streaming mode and write to a Parquet file.
618
620
619
- .. warning::
620
- Streaming mode is considered **unstable**. It may be changed
621
- at any point without it being considered a breaking change.
622
-
623
621
This allows streaming results that are larger than RAM to be written to disk.
624
622
625
623
Parameters
@@ -667,6 +665,82 @@ export interface LazyDataFrame<S extends Schema = any>
667
665
>>> lf.sinkParquet("out.parquet").collect() # doctest: +SKIP
668
666
*/
669
667
sinkParquet ( path : string , options ?: SinkParquetOptions ) : LazyDataFrame ;
668
+
669
+ /**
670
+ *
671
+ * Evaluate the query in streaming mode and write to an NDJSON file.
672
+ * This allows streaming results that are larger than RAM to be written to disk.
673
+ *
674
+ * Parameters
675
+ @param path - File path to which the file should be written.
676
+ @param options.maintainOrder - Maintain the order in which data is processed. Default -> true
677
+ Setting this to `False` will be slightly faster.
678
+ @param options.mkdir - Recursively create all the directories in the path. Default -> false
679
+ @param options.retries - Number of retries if accessing a cloud instance fails. Default = 2
680
+ @param options.syncOnClose - { None, 'data', 'all' } Default -> 'all'
681
+ Sync to disk when before closing a file.
682
+
683
+ * `None` does not sync.
684
+ * `data` syncs the file contents.
685
+ * `all` syncs the file contents and metadata.
686
+ @param options.cloudOptions - Options that indicate how to connect to a cloud provider.
687
+ If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`.
688
+
689
+ The cloud providers currently supported are AWS, GCP, and Azure.
690
+ See supported keys here:
691
+
692
+ * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
693
+ * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
694
+ * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
695
+
696
+ If `cloudOptions` is not provided, Polars will try to infer the information from environment variables.
697
+ @return DataFrame
698
+ Examples
699
+ --------
700
+ >>> const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") # doctest: +SKIP
701
+ >>> lf.sinkNdJson("out.ndjson").collect()
702
+ */
703
+ sinkNdJson ( path : string , options ?: SinkJsonOptions ) : LazyDataFrame ;
704
+ /**
705
+ *
706
+ * Evaluate the query in streaming mode and write to an IPC file.
707
+ * This allows streaming results that are larger than RAM to be written to disk.
708
+ *
709
+ * Parameters
710
+ @param path - File path to which the file should be written.
711
+ @param options.compression : {'uncompressed', 'lz4', 'zstd'}
712
+ Choose "zstd" for good compression performance.
713
+ Choose "lz4" for fast compression/decompression.
714
+ @param options.compatLevel : { 'newest', 'oldest' } Default -> newest
715
+ Use a specific compatibility level when exporting Polars' internal data structures.
716
+ @param options.maintainOrder - Maintain the order in which data is processed. Default -> true
717
+ Setting this to `False` will be slightly faster.
718
+ @param options.mkdir - Recursively create all the directories in the path. Default -> false
719
+ @param options.retries - Number of retries if accessing a cloud instance fails. Default = 2
720
+ @param options.syncOnClose - { None, 'data', 'all' } Default -> 'all'
721
+ Sync to disk when before closing a file.
722
+
723
+ * `None` does not sync.
724
+ * `data` syncs the file contents.
725
+ * `all` syncs the file contents and metadata.
726
+ @param options.cloudOptions - Options that indicate how to connect to a cloud provider.
727
+ If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`.
728
+
729
+ The cloud providers currently supported are AWS, GCP, and Azure.
730
+ See supported keys here:
731
+
732
+ * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
733
+ * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
734
+ * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
735
+
736
+ If `cloudOptions` is not provided, Polars will try to infer the information from environment variables.
737
+ @return DataFrame
738
+ Examples
739
+ --------
740
+ >>> const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") # doctest: +SKIP
741
+ >>> lf.sinkIpc("out.arrow").collect()
742
+ */
743
+ sinkIpc ( path : string , options ?: SinkIpcOptions ) : LazyDataFrame ;
670
744
}
671
745
672
746
const prepareGroupbyInputs = ( by ) => {
@@ -1179,6 +1253,22 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => {
1179
1253
} ;
1180
1254
return _ldf . sinkParquet ( path , options ) ;
1181
1255
} ,
1256
+ sinkNdJson ( path : string , options : SinkJsonOptions = { } ) {
1257
+ options . retries = options . retries ?? 2 ;
1258
+ options . syncOnClose = options . syncOnClose ?? "all" ;
1259
+ options . maintainOrder = options . maintainOrder ?? true ;
1260
+ options . mkdir = options . mkdir ?? true ;
1261
+ return _ldf . sinkJson ( path , options ) ;
1262
+ } ,
1263
+ sinkIpc ( path : string , options : SinkIpcOptions = { } ) {
1264
+ options . compatLevel = options . compatLevel ?? "newest" ;
1265
+ options . compression = options . compression ?? "uncompressed" ;
1266
+ options . retries = options . retries ?? 2 ;
1267
+ options . syncOnClose = options . syncOnClose ?? "all" ;
1268
+ options . maintainOrder = options . maintainOrder ?? true ;
1269
+ options . mkdir = options . mkdir ?? true ;
1270
+ return _ldf . sinkIpc ( path , options ) ;
1271
+ } ,
1182
1272
} ;
1183
1273
} ;
1184
1274
0 commit comments