Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating to rs-0.45 #301

Merged
merged 9 commits into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- name: Install latest Rust nightly
uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly-2024-10-28
toolchain: nightly-2024-11-28
components: rustfmt, clippy
- name: Install ghp-import
uses: actions/setup-python@v5
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-js.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Install latest Rust nightly
uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly-2024-10-28
toolchain: nightly-2024-11-28
components: rustfmt, clippy
- name: Check yarn version
run: yarn --version
Expand All @@ -46,7 +46,7 @@ jobs:
- name: Install latest Rust nightly
uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly-2024-10-28
toolchain: nightly-2024-11-28
components: rustfmt, clippy
- name: Bun version
uses: oven-sh/setup-bun@v1
Expand Down
925 changes: 0 additions & 925 deletions .yarn/releases/yarn-4.5.0.cjs

This file was deleted.

934 changes: 934 additions & 0 deletions .yarn/releases/yarn-4.5.3.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion .yarnrc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ enableGlobalCache: false

nodeLinker: node-modules

yarnPath: .yarn/releases/yarn-4.5.0.cjs
yarnPath: .yarn/releases/yarn-4.5.3.cjs
12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ napi = { version = "2.16.13", default-features = false, features = [
"napi8",
"serde-json",
] }
napi-derive = { version = "2.16.12", default-features = false }
polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "2dce3d3b5c80ae7522a3435f844fac8fed9dc9e8", default-features = false }
polars-io = { git = "https://github.com/pola-rs/polars.git", rev = "2dce3d3b5c80ae7522a3435f844fac8fed9dc9e8", default-features = false }
polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "2dce3d3b5c80ae7522a3435f844fac8fed9dc9e8", default-features = false }
napi-derive = { version = "2.16.13", default-features = false }
polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "58a38af21dccaf3326514494a1db118601c8c2ca", default-features = false }
polars-io = { git = "https://github.com/pola-rs/polars.git", rev = "58a38af21dccaf3326514494a1db118601c8c2ca", default-features = false }
polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "58a38af21dccaf3326514494a1db118601c8c2ca", default-features = false }
thiserror = "1"
smartstring = { version = "1" }
serde_json = { version = "1" }
either = "1.13.0"
hashbrown = { version = "0.15.0", features = ["rayon", "serde"] }
hashbrown = { version = "0.15.2", features = ["rayon", "serde"] }

[dependencies.polars]
features = [
Expand Down Expand Up @@ -162,7 +162,7 @@ features = [
"azure"
]
git = "https://github.com/pola-rs/polars.git"
rev = "2dce3d3b5c80ae7522a3435f844fac8fed9dc9e8"
rev = "58a38af21dccaf3326514494a1db118601c8c2ca"

[build-dependencies]
napi-build = "2.1.3"
Expand Down
6 changes: 3 additions & 3 deletions __tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1680,7 +1680,7 @@ describe("io", () => {
expect(actual).toEqual(expected);
});
test("writeCSV:string:sep", () => {
const actual = df.clone().writeCSV({ sep: "X" }).toString();
const actual = df.clone().writeCSV({ separator: "X" }).toString();
const expected = "fooXbar\n1X6\n2X2\n9X8\n";
expect(actual).toEqual(expected);
});
Expand All @@ -1689,14 +1689,14 @@ describe("io", () => {
bar: ["a,b,c", "d,e,f", "g,h,i"],
foo: [1, 2, 3],
});
const actual = df.writeCSV({ quote: "^" }).toString();
const actual = df.writeCSV({ quoteChar: "^" }).toString();
const expected = "bar,foo\n^a,b,c^,1.0\n^d,e,f^,2.0\n^g,h,i^,3.0\n";
expect(actual).toEqual(expected);
});
test("writeCSV:string:header", () => {
const actual = df
.clone()
.writeCSV({ sep: "X", includeHeader: false, lineTerminator: "|" })
.writeCSV({ separator: "X", includeHeader: false, lineTerminator: "|" })
.toString();
const expected = "1X6|2X2|9X8|";
expect(actual).toEqual(expected);
Expand Down
15 changes: 15 additions & 0 deletions __tests__/io.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ describe("read:csv", () => {
csvString.slice(0, 22),
);
});
it("can read from a csv buffer with newline in the header", () => {
const csvBuffer = Buffer.from(
'"name\na","height\nb"\n"John",172.23\n"Anna",1653.34',
);
const df = pl.readCSV(csvBuffer, {
quoteChar: '"',
sep: ",",
hasHeader: false,
skipRows: 1,
});
expect(df.toRecords()).toEqual([
{ column_1: "John", column_2: 172.23 },
{ column_1: "Anna", column_2: 1653.34 },
]);
});
it("can read from a csv buffer", () => {
const csvBuffer = Buffer.from("foo,bar,baz\n1,2,3\n4,5,6\n", "utf-8");
const df = pl.readCSV(csvBuffer);
Expand Down
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,16 @@
"@napi-rs/cli": "^2.18.4",
"@types/chance": "^1.1.6",
"@types/jest": "^29.5.14",
"@types/node": "^22.8.6",
"@types/node": "^22.10.1",
"chance": "^1.1.12",
"jest": "^29.7.0",
"source-map-support": "^0.5.21",
"ts-jest": "^29.2.5",
"ts-node": "^10.9.2",
"typedoc": "^0.26.10",
"typescript": "5.6.3"
"typedoc": "^0.27.3",
"typescript": "5.7.2"
},
"packageManager": "[email protected].0",
"packageManager": "[email protected].3",
"workspaces": [
"benches"
]
Expand Down
10 changes: 5 additions & 5 deletions polars/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ import { type LazyDataFrame, _LazyDataFrame } from "./lazy/dataframe";
import { Expr } from "./lazy/expr";
import { Series, _Series } from "./series";
import type {
CsvWriterOptions,
FillNullStrategy,
JoinOptions,
WriteAvroOptions,
WriteCsvOptions,
WriteIPCOptions,
WriteParquetOptions,
} from "./types";
Expand Down Expand Up @@ -61,8 +61,8 @@ interface WriteMethods {
* @param options.includeBom - Whether to include UTF-8 BOM in the CSV output.
* @param options.lineTerminator - String used to end each row.
* @param options.includeHeader - Whether or not to include header in the CSV output.
* @param options.sep - Separate CSV fields with this symbol. _defaults to `,`
* @param options.quote - Character to use for quoting. Default: \" Note: it will note be used when sep is used
* @param options.separator - Separate CSV fields with this symbol. _defaults to `,`
* @param options.quoteChar - Character to use for quoting. Default: \" Note: it will note be used when sep is used
* @param options.batchSize - Number of rows that will be processed per thread.
* @param options.datetimeFormat - A format string, with the specifiers defined by the
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
Expand Down Expand Up @@ -109,8 +109,8 @@ interface WriteMethods {
* @category IO
*/
writeCSV(): Buffer;
writeCSV(options: WriteCsvOptions): Buffer;
writeCSV(dest: string | Writable, options?: WriteCsvOptions): void;
writeCSV(options: CsvWriterOptions): Buffer;
writeCSV(dest: string | Writable, options?: CsvWriterOptions): void;
/**
* Write Dataframe to JSON string, file, or write stream
* @param destination file or write stream
Expand Down
4 changes: 2 additions & 2 deletions polars/io.ts
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ export function readAvro(pathOrBody, options = {}) {
@param options.rechunk - In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks.
@param options.lowMemory - Reduce memory pressure at the expense of performance.
@param options.cache - Cache the result after reading.
@param options.storageOptions - Options that indicate how to connect to a cloud provider.
@param options.cloudOptions - Options that indicate how to connect to a cloud provider.
If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`.

The cloud providers currently supported are AWS, GCP, and Azure.
Expand All @@ -513,7 +513,7 @@ export function readAvro(pathOrBody, options = {}) {
* `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
* `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_

If `storage_options` is not provided, Polars will try to infer the information from environment variables.
If `cloudOptions` is not provided, Polars will try to infer the information from environment variables.
@param retries - Number of retries if accessing a cloud instance fails.
@param includeFilePaths - Include the path of the source file(s) as a column with this name.
*/
Expand Down
18 changes: 15 additions & 3 deletions polars/lazy/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import pli from "../internals/polars_internal";
import type { Series } from "../series";
import type { Deserialize, GroupByOps, Serialize } from "../shared_traits";
import type {
CsvWriterOptions,
LazyJoinOptions,
LazyOptions,
SinkCsvOptions,
SinkParquetOptions,
} from "../types";
import {
Expand Down Expand Up @@ -541,7 +541,7 @@ export interface LazyDataFrame extends Serialize, GroupByOps<LazyGroupBy> {
>>> lf.sinkCsv("out.csv")
*/

sinkCSV(path: string, options?: SinkCsvOptions): void;
sinkCSV(path: string, options?: CsvWriterOptions): void;

/***
*
Expand Down Expand Up @@ -580,6 +580,18 @@ export interface LazyDataFrame extends Serialize, GroupByOps<LazyGroupBy> {
@param simplifyExpression - Run simplify expressions optimization. Default -> true
@param slicePushdown - Slice pushdown optimization. Default -> true
@param noOptimization - Turn off (certain) optimizations. Default -> false
@param cloudOptions - Options that indicate how to connect to a cloud provider.
If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`.

The cloud providers currently supported are AWS, GCP, and Azure.
See supported keys here:

* `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
* `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
* `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_

If `cloudOptions` is not provided, Polars will try to infer the information from environment variables.
@param retries - Number of retries if accessing a cloud instance fails.

Examples
--------
Expand Down Expand Up @@ -1078,7 +1090,7 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => {
withRowCount(name = "row_nr") {
return _LazyDataFrame(_ldf.withRowCount(name));
},
sinkCSV(path, options: SinkCsvOptions = {}) {
sinkCSV(path, options: CsvWriterOptions = {}) {
options.maintainOrder = options.maintainOrder ?? false;
_ldf.sinkCsv(path, options);
},
Expand Down
4 changes: 2 additions & 2 deletions polars/lazy/functions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ export function spearmanRankCorr(a: ExprOrString, b: ExprOrString): Expr {
a = exprToLitOrExpr(a, false);
b = exprToLitOrExpr(b, false);

return _Expr(pli.spearmanRankCorr(a, b, null, false));
return _Expr(pli.spearmanRankCorr(a, b, false));
}

/** Get the last n rows of an Expression. */
Expand Down Expand Up @@ -964,7 +964,7 @@ export function sumHorizontal(exprs: ExprOrString | ExprOrString[]): Expr {

exprs = selectionToExprList(exprs);

return _Expr(pli.sumHorizontal(exprs));
return _Expr(pli.sumHorizontal(exprs, true));
}

// // export function collect_all() {}
Expand Down
29 changes: 7 additions & 22 deletions polars/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,33 +39,16 @@ export interface ConcatOptions {
how?: "vertical" | "horizontal" | "diagonal";
}
/**
* Options for {@link DataFrame.writeCSV}
* @category Options
*/
export interface WriteCsvOptions {
includeBom?: boolean;
includeHeader?: boolean;
sep?: string;
quote?: string;
lineTerminator?: string;
batchSize?: number;
datetimeFormat?: string;
dateFormat?: string;
timeFormat?: string;
floatPrecision?: number;
nullValue?: string;
}
/**
* Options for @see {@link DataFrame.writeCSV}
* Options for @see {@link LazyDataFrame.sinkCSV}
* @category Options
*/
export interface SinkCsvOptions {
includeHeader?: boolean;
quote?: string;
export interface CsvWriterOptions {
includeBom?: boolean;
includeHeader?: boolean;
separator?: string;
lineTerminator?: string;
quoteChar?: string;
lineTerminator?: string;
batchSize?: number;
datetimeFormat?: string;
dateFormat?: string;
Expand All @@ -91,6 +74,8 @@ export interface SinkParquetOptions {
simplifyExpression?: boolean;
slicePushdown?: boolean;
noOptimization?: boolean;
cloudOptions?: Map<string, string>;
retries?: number;
}
/**
* Options for {@link DataFrame.writeJSON}
Expand Down Expand Up @@ -153,7 +138,7 @@ export interface ScanParquetOptions {
rechunk?: boolean;
lowMemory?: boolean;
useStatistics?: boolean;
cloudOptions?: unknown;
cloudOptions?: Map<string, string>;
retries?: number;
includeFilePaths?: string;
allowMissingColumns?: boolean;
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain
Original file line number Diff line number Diff line change
@@ -1 +1 @@
nightly-2024-10-28
nightly-2024-11-28
Loading
Loading