@@ -14,6 +14,8 @@ import type {
14
14
LazyJoinOptions ,
15
15
LazyOptions ,
16
16
LazySameNameColumnJoinOptions ,
17
+ SinkIpcOptions ,
18
+ SinkJsonOptions ,
17
19
SinkParquetOptions ,
18
20
} from "../types" ;
19
21
import {
@@ -615,10 +617,6 @@ export interface LazyDataFrame<S extends Schema = any>
615
617
*
616
618
* Evaluate the query in streaming mode and write to a Parquet file.
617
619
618
- .. warning::
619
- Streaming mode is considered **unstable**. It may be changed
620
- at any point without it being considered a breaking change.
621
-
622
620
This allows streaming results that are larger than RAM to be written to disk.
623
621
624
622
Parameters
@@ -666,6 +664,82 @@ export interface LazyDataFrame<S extends Schema = any>
666
664
>>> lf.sinkParquet("out.parquet").collect() # doctest: +SKIP
667
665
*/
668
666
sinkParquet ( path : string , options ?: SinkParquetOptions ) : LazyDataFrame ;
667
+
668
+ /**
669
+ *
670
+ * Evaluate the query in streaming mode and write to an NDJSON file.
671
+ * This allows streaming results that are larger than RAM to be written to disk.
672
+ *
673
+ * Parameters
674
+ @param path - File path to which the file should be written.
675
+ @param options.maintainOrder - Maintain the order in which data is processed. Default -> true
676
+ Setting this to `False` will be slightly faster.
677
+ @param options.mkdir - Recursively create all the directories in the path. Default -> false
678
+ @param options.retries - Number of retries if accessing a cloud instance fails. Default = 2
679
+ @param options.syncOnClose - { None, 'data', 'all' } Default -> 'all'
680
+ Sync to disk when before closing a file.
681
+
682
+ * `None` does not sync.
683
+ * `data` syncs the file contents.
684
+ * `all` syncs the file contents and metadata.
685
+ @param options.cloudOptions - Options that indicate how to connect to a cloud provider.
686
+ If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`.
687
+
688
+ The cloud providers currently supported are AWS, GCP, and Azure.
689
+ See supported keys here:
690
+
691
+ * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
692
+ * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
693
+ * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
694
+
695
+ If `cloudOptions` is not provided, Polars will try to infer the information from environment variables.
696
+ @return DataFrame
697
+ Examples
698
+ --------
699
+ >>> const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") # doctest: +SKIP
700
+ >>> lf.sinkNdJson("out.ndjson").collect()
701
+ */
702
+ sinkNdJson ( path : string , options ?: SinkJsonOptions ) : LazyDataFrame ;
703
+ /**
704
+ *
705
+ * Evaluate the query in streaming mode and write to an IPC file.
706
+ * This allows streaming results that are larger than RAM to be written to disk.
707
+ *
708
+ * Parameters
709
+ @param path - File path to which the file should be written.
710
+ @param options.compression : {'uncompressed', 'lz4', 'zstd'}
711
+ Choose "zstd" for good compression performance.
712
+ Choose "lz4" for fast compression/decompression.
713
+ @param options.compatLevel : { 'newest', 'oldest' } Default -> newest
714
+ Use a specific compatibility level when exporting Polars' internal data structures.
715
+ @param options.maintainOrder - Maintain the order in which data is processed. Default -> true
716
+ Setting this to `False` will be slightly faster.
717
+ @param options.mkdir - Recursively create all the directories in the path. Default -> false
718
+ @param options.retries - Number of retries if accessing a cloud instance fails. Default = 2
719
+ @param options.syncOnClose - { None, 'data', 'all' } Default -> 'all'
720
+ Sync to disk when before closing a file.
721
+
722
+ * `None` does not sync.
723
+ * `data` syncs the file contents.
724
+ * `all` syncs the file contents and metadata.
725
+ @param options.cloudOptions - Options that indicate how to connect to a cloud provider.
726
+ If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`.
727
+
728
+ The cloud providers currently supported are AWS, GCP, and Azure.
729
+ See supported keys here:
730
+
731
+ * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
732
+ * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
733
+ * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
734
+
735
+ If `cloudOptions` is not provided, Polars will try to infer the information from environment variables.
736
+ @return DataFrame
737
+ Examples
738
+ --------
739
+ >>> const lf = pl.scanCsv("/path/to/my_larger_than_ram_file.csv") # doctest: +SKIP
740
+ >>> lf.sinkIpc("out.arrow").collect()
741
+ */
742
+ sinkIpc ( path : string , options ?: SinkIpcOptions ) : LazyDataFrame ;
669
743
}
670
744
671
745
const prepareGroupbyInputs = ( by ) => {
@@ -1173,6 +1247,22 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => {
1173
1247
} ;
1174
1248
return _ldf . sinkParquet ( path , options ) ;
1175
1249
} ,
1250
+ sinkNdJson ( path : string , options : SinkJsonOptions = { } ) {
1251
+ options . retries = options . retries ?? 2 ;
1252
+ options . syncOnClose = options . syncOnClose ?? "all" ;
1253
+ options . maintainOrder = options . maintainOrder ?? true ;
1254
+ options . mkdir = options . mkdir ?? true ;
1255
+ return _ldf . sinkJson ( path , options ) ;
1256
+ } ,
1257
+ sinkIpc ( path : string , options : SinkIpcOptions = { } ) {
1258
+ options . compatLevel = options . compatLevel ?? "newest" ;
1259
+ options . compression = options . compression ?? "uncompressed" ;
1260
+ options . retries = options . retries ?? 2 ;
1261
+ options . syncOnClose = options . syncOnClose ?? "all" ;
1262
+ options . maintainOrder = options . maintainOrder ?? true ;
1263
+ options . mkdir = options . mkdir ?? true ;
1264
+ return _ldf . sinkIpc ( path , options ) ;
1265
+ } ,
1176
1266
} ;
1177
1267
} ;
1178
1268
0 commit comments