From 9388ecdc87a953a7ef641f6e2b4b13ac001d2a29 Mon Sep 17 00:00:00 2001 From: Bidek56 Date: Tue, 10 Dec 2024 20:30:35 -0500 Subject: [PATCH] Adding cloudOptions to docs --- polars/io.ts | 4 +-- polars/lazy/dataframe.ts | 12 +++++++ polars/types.ts | 4 ++- src/dataframe.rs | 74 +++++++++++++++------------------------- 4 files changed, 45 insertions(+), 49 deletions(-) diff --git a/polars/io.ts b/polars/io.ts index e711953d..799b5e6b 100644 --- a/polars/io.ts +++ b/polars/io.ts @@ -503,7 +503,7 @@ export function readAvro(pathOrBody, options = {}) { @param options.rechunk - In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks. @param options.lowMemory - Reduce memory pressure at the expense of performance. @param options.cache - Cache the result after reading. - @param options.storageOptions - Options that indicate how to connect to a cloud provider. + @param options.cloudOptions - Options that indicate how to connect to a cloud provider. If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`. The cloud providers currently supported are AWS, GCP, and Azure. @@ -513,7 +513,7 @@ export function readAvro(pathOrBody, options = {}) { * `gcp `_ * `azure `_ - If `storage_options` is not provided, Polars will try to infer the information from environment variables. + If `cloudOptions` is not provided, Polars will try to infer the information from environment variables. @param retries - Number of retries if accessing a cloud instance fails. @param includeFilePaths - Include the path of the source file(s) as a column with this name. */ diff --git a/polars/lazy/dataframe.ts b/polars/lazy/dataframe.ts index cc0df477..7bde5746 100644 --- a/polars/lazy/dataframe.ts +++ b/polars/lazy/dataframe.ts @@ -580,6 +580,18 @@ export interface LazyDataFrame extends Serialize, GroupByOps { @param simplifyExpression - Run simplify expressions optimization. Default -> true @param slicePushdown - Slice pushdown optimization. Default -> true @param noOptimization - Turn off (certain) optimizations. Default -> false + @param cloudOptions - Options that indicate how to connect to a cloud provider. + If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`. + + The cloud providers currently supported are AWS, GCP, and Azure. + See supported keys here: + + * `aws `_ + * `gcp `_ + * `azure `_ + + If `cloudOptions` is not provided, Polars will try to infer the information from environment variables. + @param retries - Number of retries if accessing a cloud instance fails. Examples -------- diff --git a/polars/types.ts b/polars/types.ts index 6a329568..f29fdb41 100644 --- a/polars/types.ts +++ b/polars/types.ts @@ -74,6 +74,8 @@ export interface SinkParquetOptions { simplifyExpression?: boolean; slicePushdown?: boolean; noOptimization?: boolean; + cloudOptions?: Map; + retries?: number; } /** * Options for {@link DataFrame.writeJSON} @@ -136,7 +138,7 @@ export interface ScanParquetOptions { rechunk?: boolean; lowMemory?: boolean; useStatistics?: boolean; - cloudOptions?: unknown; + cloudOptions?: Map; retries?: number; includeFilePaths?: string; allowMissingColumns?: boolean; diff --git a/src/dataframe.rs b/src/dataframe.rs index 39369578..948b3119 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -1339,18 +1339,6 @@ impl JsDataFrame { options: Wrap, env: Env, ) -> napi::Result<()> { - let include_header = options.0.include_header; - let separator = options.0.serialize_options.separator; - let quote = options.0.serialize_options.quote_char; - let include_bom = options.0.include_bom; - let line_terminator = options.0.serialize_options.line_terminator; - let batch_size = options.0.batch_size; - let date_format = options.0.serialize_options.date_format; - let time_format = options.0.serialize_options.time_format; - let datetime_format = options.0.serialize_options.datetime_format; - let float_precision: Option = options.0.serialize_options.float_precision; - let null_value = options.0.serialize_options.null; - match path_or_buffer.get_type()? { ValueType::String => { let path: napi::JsString = unsafe { path_or_buffer.cast() }; @@ -1359,17 +1347,17 @@ impl JsDataFrame { let f = std::fs::File::create(path).unwrap(); let f = BufWriter::new(f); CsvWriter::new(f) - .include_bom(include_bom) - .include_header(include_header) - .with_separator(separator) - .with_line_terminator(line_terminator) - .with_batch_size(batch_size) - .with_datetime_format(datetime_format) - .with_date_format(date_format) - .with_time_format(time_format) - .with_float_precision(float_precision) - .with_null_value(null_value) - .with_quote_char(quote) + .include_bom(options.0.include_bom) + .include_header(options.0.include_header) + .with_separator(options.0.serialize_options.separator) + .with_line_terminator(options.0.serialize_options.line_terminator) + .with_batch_size(options.0.batch_size) + .with_datetime_format(options.0.serialize_options.datetime_format) + .with_date_format(options.0.serialize_options.date_format) + .with_time_format(options.0.serialize_options.time_format) + .with_float_precision(options.0.serialize_options.float_precision) + .with_null_value(options.0.serialize_options.null) + .with_quote_char(options.0.serialize_options.quote_char) .finish(&mut self.df) .map_err(JsPolarsErr::from)?; } @@ -1378,17 +1366,17 @@ impl JsDataFrame { let writeable = JsWriteStream { inner, env: &env }; CsvWriter::new(writeable) - .include_bom(include_bom) - .include_header(include_header) - .with_separator(separator) - .with_line_terminator(line_terminator) - .with_batch_size(batch_size) - .with_datetime_format(datetime_format) - .with_date_format(date_format) - .with_time_format(time_format) - .with_float_precision(float_precision) - .with_null_value(null_value) - .with_quote_char(quote) + .include_bom(options.0.include_bom) + .include_header(options.0.include_header) + .with_separator(options.0.serialize_options.separator) + .with_line_terminator(options.0.serialize_options.line_terminator) + .with_batch_size(options.0.batch_size) + .with_datetime_format(options.0.serialize_options.datetime_format) + .with_date_format(options.0.serialize_options.date_format) + .with_time_format(options.0.serialize_options.time_format) + .with_float_precision(options.0.serialize_options.float_precision) + .with_null_value(options.0.serialize_options.null) + .with_quote_char(options.0.serialize_options.quote_char) .finish(&mut self.df) .map_err(JsPolarsErr::from)?; } @@ -1404,8 +1392,6 @@ impl JsDataFrame { compression: Wrap, env: Env, ) -> napi::Result<()> { - let compression = compression.0; - match path_or_buffer.get_type()? { ValueType::String => { let path: napi::JsString = unsafe { path_or_buffer.cast() }; @@ -1414,7 +1400,7 @@ impl JsDataFrame { let f = std::fs::File::create(path).unwrap(); let f = BufWriter::new(f); ParquetWriter::new(f) - .with_compression(compression) + .with_compression(compression.0) .finish(&mut self.df) .map_err(JsPolarsErr::from)?; } @@ -1423,7 +1409,7 @@ impl JsDataFrame { let writeable = JsWriteStream { inner, env: &env }; ParquetWriter::new(writeable) - .with_compression(compression) + .with_compression(compression.0) .finish(&mut self.df) .map_err(JsPolarsErr::from)?; } @@ -1438,8 +1424,6 @@ impl JsDataFrame { compression: Wrap>, env: Env, ) -> napi::Result<()> { - let compression = compression.0; - match path_or_buffer.get_type()? { ValueType::String => { let path: napi::JsString = unsafe { path_or_buffer.cast() }; @@ -1447,7 +1431,7 @@ impl JsDataFrame { let f = std::fs::File::create(path).unwrap(); let f = BufWriter::new(f); IpcWriter::new(f) - .with_compression(compression) + .with_compression(compression.0) .finish(&mut self.df) .map_err(JsPolarsErr::from)?; } @@ -1455,7 +1439,7 @@ impl JsDataFrame { let inner: napi::JsObject = unsafe { path_or_buffer.cast() }; let writeable = JsWriteStream { inner, env: &env }; IpcWriter::new(writeable) - .with_compression(compression) + .with_compression(compression.0) .finish(&mut self.df) .map_err(JsPolarsErr::from)?; } @@ -1470,8 +1454,6 @@ impl JsDataFrame { compression: Wrap>, env: Env, ) -> napi::Result<()> { - let compression = compression.0; - match path_or_buffer.get_type()? { ValueType::String => { let path: napi::JsString = unsafe { path_or_buffer.cast() }; @@ -1479,7 +1461,7 @@ impl JsDataFrame { let f = std::fs::File::create(path).unwrap(); let f = BufWriter::new(f); IpcStreamWriter::new(f) - .with_compression(compression) + .with_compression(compression.0) .finish(&mut self.df) .map_err(JsPolarsErr::from)?; } @@ -1487,7 +1469,7 @@ impl JsDataFrame { let inner: napi::JsObject = unsafe { path_or_buffer.cast() }; let writeable = JsWriteStream { inner, env: &env }; IpcStreamWriter::new(writeable) - .with_compression(compression) + .with_compression(compression.0) .finish(&mut self.df) .map_err(JsPolarsErr::from)?; }