diff --git a/__tests__/lazyframe.test.ts b/__tests__/lazyframe.test.ts index 74104327..c155740c 100644 --- a/__tests__/lazyframe.test.ts +++ b/__tests__/lazyframe.test.ts @@ -276,7 +276,10 @@ describe("lazyframe", () => { foo: [1], bar: ["a"], }); - const actual = await df.lazy().select("*").fetch(1); + const actual = await df + .lazy() + .select("*") + .fetch(1, { noOptimization: true }); expect(actual).toFrameEqual(expected); }); test("fetchSync", () => { @@ -357,7 +360,46 @@ describe("lazyframe", () => { }); expect(actual).toFrameEqual(expected); }); - describe("groupby", () => {}); + describe("groupby", () => { + test("groupBy", () => { + let actual = pl + .DataFrame({ + foo: [1, 2, 3], + ham: ["a", "a", "b"], + }) + .lazy() + .groupBy("ham") + .agg(pl.col("foo").sum()) + .collectSync(); + const expected = pl.DataFrame({ + ham: ["a", "b"], + foo: [3, 3], + }); + expect(actual).toFrameEqual(expected); + + actual = pl + .DataFrame({ + foo: [1, 2, 3], + ham: ["a", "a", "b"], + }) + .lazy() + .groupBy("ham", true) + .agg(pl.col("foo").sum()) + .collectSync(); + expect(actual).toFrameEqual(expected); + + actual = pl + .DataFrame({ + foo: [1, 2, 3], + ham: ["a", "a", "b"], + }) + .lazy() + .groupBy("ham", { maintainOrder: true }) + .agg(pl.col("foo").sum()) + .collectSync(); + expect(actual).toFrameEqual(expected); + }); + }); test("head", () => { const actual = pl .DataFrame({ @@ -1096,6 +1138,90 @@ describe("lazyframe", () => { ]); expect(actual).toFrameEqualIgnoringOrder(expected); }); + test("withColumn:series", async () => { + const actual: pl.DataFrame = pl + .DataFrame() + .lazy() + .withColumn(pl.Series("series1", [1, 2, 3, 4], pl.Int16)) + .collectSync(); + const expected: pl.DataFrame = pl.DataFrame([ + pl.Series("series1", [1, 2, 3, 4], pl.Int16), + ]); + expect(actual).toFrameEqual(expected); + }); + test("withColumns:series", async () => { + const actual: pl.DataFrame = pl + .DataFrame() + .lazy() + .withColumns( + pl.Series("series1", [1, 2, 3, 4], pl.Int16), + pl.Series("series2", [1, 2, 3, 4], pl.Int32), + ) + .collectSync(); + const expected: pl.DataFrame = pl.DataFrame([ + pl.Series("series1", [1, 2, 3, 4], pl.Int16), + pl.Series("series2", [1, 2, 3, 4], pl.Int32), + ]); + expect(actual).toFrameEqual(expected); + }); + test("select:series", async () => { + let actual: pl.DataFrame = pl + .DataFrame() + .lazy() + .select( + pl.Series("series1", [1, 2, 3, 4], pl.Int16), + pl.Series("series2", [1, 2, 3, 4], pl.Int32), + ) + .collectSync(); + let expected: pl.DataFrame = pl.DataFrame([ + pl.Series("series1", [1, 2, 3, 4], pl.Int16), + pl.Series("series2", [1, 2, 3, 4], pl.Int32), + ]); + expect(actual).toFrameEqual(expected); + actual = pl + .DataFrame({ text: ["hello"] }) + .lazy() + .select(pl.Series("series", [1, 2, 3])) + .collectSync(); + + expected = pl.DataFrame([pl.Series("series", [1, 2, 3])]); + expect(actual).toFrameEqual(expected); + + actual = pl + .DataFrame({ text: ["hello"] }) + .lazy() + .select("text", pl.Series("series", [1])) + .collectSync(); + expected = pl.DataFrame({ text: ["hello"], series: [1] }); + expect(actual).toFrameEqual(expected); + }); + test("select:lit", () => { + const df = pl.DataFrame({ a: [1] }, { schema: { a: pl.Float32 } }); + let actual = df.lazy().select(pl.col("a"), pl.lit(1)).collectSync(); + const expected = pl.DataFrame({ + a: [1], + literal: [1], + }); + expect(actual).toFrameEqual(expected); + actual = df + .lazy() + .select(pl.col("a").mul(2).alias("b"), pl.lit(2)) + .collectSync(); + const expected2 = pl.DataFrame({ + b: [2], + literal: [2], + }); + expect(actual).toFrameEqual(expected2); + }); + test("inspect", () => { + const actual = pl + .DataFrame({ + foo: [1, 2, 9], + bar: [6, 2, 8], + }) + .lazy(); + expect(actual).toBeDefined(); + }); test("withColumns", () => { const actual = pl .DataFrame({ @@ -1120,7 +1246,7 @@ describe("lazyframe", () => { bar: [6, 2, 8], }) .lazy() - .withColumns([pl.lit("a").alias("col_a"), pl.lit("b").alias("col_b")]) + .withColumns(pl.lit("a").alias("col_a"), pl.lit("b").alias("col_b")) .collectSync(); const expected = pl.DataFrame([ pl.Series("foo", [1, 2, 9], pl.Int16), @@ -1280,7 +1406,10 @@ describe("lazyframe", () => { .lazy(); await ldf.sinkCSV("./test.csv").collect(); const newDF: pl.DataFrame = pl.readCSV("./test.csv"); - const actualDf: pl.DataFrame = await ldf.collect({ streaming: true }); + const actualDf: pl.DataFrame = await ldf.collect({ + streaming: true, + noOptimization: true, + }); expect(newDF.sort("foo")).toFrameEqual(actualDf); fs.rmSync("./test.csv"); }); diff --git a/__tests__/series.test.ts b/__tests__/series.test.ts index fe1e46a8..ebcbf1ad 100644 --- a/__tests__/series.test.ts +++ b/__tests__/series.test.ts @@ -198,6 +198,7 @@ describe("typedArrays", () => { test("decimal", () => { const expected = [1n, 2n, 3n]; const expectedDtype = pl.Decimal(10, 2); + expect(expectedDtype.equals(expectedDtype)).toBeTruthy(); const actual = pl.Series("", expected, expectedDtype); expect(actual.dtype).toEqual(expectedDtype); try { @@ -219,6 +220,9 @@ describe("typedArrays", () => { expect(actual.dtype).toEqual(expectedDtype); const actualValues = actual.toArray(); expect(actualValues).toEqual(expected); + const lst = expectedDtype.asFixedSizeList(); + expect(lst?.inner[0]).toEqual(pl.Float32); + expect(expectedDtype.equals(expectedDtype)).toBeTruthy(); }); }); describe("series", () => { @@ -949,7 +953,7 @@ describe("series struct", () => { const expectedFields = [...expectedKeys]; expect(actualFields).toEqual(expectedFields); }); - test.skip("struct:field", () => { + test("struct:field", () => { const expected = [{ foo: 1, bar: 2, ham: "c" }]; const actual = pl.Series(expected).struct.field("foo").toArray(); expect(actual).toEqual([expected[0]["foo"]]); @@ -964,7 +968,7 @@ describe("series struct", () => { }); expect(actual).toFrameEqual(expected); }); - test.skip("struct:renameFields", () => { + test("struct:renameFields", () => { const expected = [{ foo: 1, bar: 2, ham: "c" }]; const actual = pl .Series(expected) @@ -972,7 +976,7 @@ describe("series struct", () => { .toArray(); expect(actual).toEqual(expected); }); - test.skip("struct:nth", () => { + test("struct:nth", () => { const arr = [ { foo: 1, bar: 2, ham: "c" }, { foo: null, bar: 10, ham: null }, diff --git a/polars/dataframe.ts b/polars/dataframe.ts index e8ef1f8b..51285d95 100644 --- a/polars/dataframe.ts +++ b/polars/dataframe.ts @@ -2853,7 +2853,7 @@ export const _DataFrame = (_df: any): DataFrame => { ); } return this.lazy() - .withColumns(columns) + .withColumns(...columns) .collectSync({ noOptimization: true }); }, withColumnRenamed(opt, replacement?): any { diff --git a/polars/lazy/dataframe.ts b/polars/lazy/dataframe.ts index 6baffd3e..8570ff3e 100644 --- a/polars/lazy/dataframe.ts +++ b/polars/lazy/dataframe.ts @@ -478,9 +478,9 @@ export interface LazyDataFrame * @see {@link DataFrame.select} */ select(...columns: U[]): LazyDataFrame<{ [P in U]: S[P] }>; - select(column: ExprOrString): LazyDataFrame; - select(columns: ExprOrString[]): LazyDataFrame; - select(...columns: ExprOrString[]): LazyDataFrame; + select(column: ExprOrString | Series): LazyDataFrame; + select(columns: (ExprOrString | Series)[]): LazyDataFrame; + select(...columns: (ExprOrString | Series)[]): LazyDataFrame; /** * @see {@link DataFrame.shift} */ @@ -553,13 +553,12 @@ export interface LazyDataFrame * Add or overwrite column in a DataFrame. * @param expr - Expression that evaluates to column. */ - withColumn(expr: Expr): LazyDataFrame; + withColumn(expr: Expr | Series): LazyDataFrame; /** * Add or overwrite multiple columns in a DataFrame. * @param exprs - List of Expressions that evaluate to columns. * */ - withColumns(exprs: (Expr | Series)[]): LazyDataFrame; withColumns(...exprs: (Expr | Series)[]): LazyDataFrame; withColumnRenamed( existing: Existing, @@ -927,15 +926,12 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => { return _LazyDataFrame(_ldf.filter(predicate)); }, - groupBy(opt, maintainOrder: any = true): LazyGroupBy { - if (opt?.by !== undefined) { - const by = selectionToExprList([opt.by], false); - - return _LazyGroupBy(_ldf.groupby(by, opt.maintainOrder)); + groupBy(by, maintainOrder: any = true): LazyGroupBy { + if (maintainOrder?.maintainOrder !== undefined) { + maintainOrder = maintainOrder.maintainOrder; } - const by = selectionToExprList([opt], false); - - return _LazyGroupBy(_ldf.groupby(by, maintainOrder)); + const expr = selectionToExprList([by], false); + return _LazyGroupBy(_ldf.groupby(expr, maintainOrder)); }, groupByRolling({ indexColumn, by, period, offset, closed }) { offset = offset ?? `-${period}`; @@ -1165,7 +1161,6 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => { }, select(...exprs) { const selections = selectionToExprList(exprs, false); - return _LazyDataFrame(_ldf.select(selections)); }, shift(periods) { @@ -1221,12 +1216,11 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => { serialize(format) { return _ldf.serialize(format); }, - withColumn(expr) { - return _LazyDataFrame(_ldf.withColumn(expr._expr)); + withColumn(column: Expr | Series) { + return this.withColumns(column); }, - withColumns(...columns) { + withColumns(...columns: (Expr | Series)[]) { const exprs = selectionToExprList(columns, false); - return _LazyDataFrame(_ldf.withColumns(exprs)); }, withColumnRenamed(existing, replacement) { diff --git a/polars/lazy/expr/index.ts b/polars/lazy/expr/index.ts index a766c2d1..2bf577bd 100644 --- a/polars/lazy/expr/index.ts +++ b/polars/lazy/expr/index.ts @@ -1944,7 +1944,7 @@ export const exprToLitOrExpr = (expr: any, stringToLit = true): Expr => { return expr; } if (Series.isSeries(expr)) { - return _Expr(pli.lit((expr as any)._s)); + return _Expr(pli.litSeries((expr as any)._s)); } return _Expr(pli.lit(expr)); }; diff --git a/polars/series/struct.ts b/polars/series/struct.ts index 38ff87bc..6a2e66c0 100644 --- a/polars/series/struct.ts +++ b/polars/series/struct.ts @@ -47,17 +47,17 @@ export const SeriesStructFunctions = (_s: any): SeriesStructFunctions => { }, field(name) { return DataFrame({}) - .select(_Expr(pli.lit(_s).structFieldByName(name))) + .select(_Expr(pli.litSeries(_s).structFieldByName(name))) .toSeries(); }, renameFields(names) { return DataFrame({}) - .select(_Expr(pli.lit(_s).structRenameFields(names))) + .select(_Expr(pli.litSeries(_s).structRenameFields(names))) .toSeries(); }, nth(index) { return DataFrame({}) - .select(_Expr(pli.lit(_s).structFieldByIndex(index))) + .select(_Expr(pli.litSeries(_s).structFieldByIndex(index))) .toSeries(); }, }; diff --git a/src/lazy/dsl.rs b/src/lazy/dsl.rs index 67ac31ad..f5f611b1 100644 --- a/src/lazy/dsl.rs +++ b/src/lazy/dsl.rs @@ -1,5 +1,6 @@ use crate::conversion::Wrap; use crate::prelude::*; +use crate::series::JsSeries; use crate::utils::reinterpret; use polars::lazy::dsl; use polars::lazy::dsl::Expr; @@ -1759,6 +1760,12 @@ pub fn arg_sort_by(by: Vec<&JsExpr>, descending: Vec) -> JsExpr { .into() } +#[napi(catch_unwind)] +pub fn lit_series(s: &JsSeries) -> JsResult { + Ok(s.clone().series.lit().into()) +} + + #[napi(catch_unwind)] pub fn lit(value: Wrap) -> JsResult { let lit: LiteralValue = value