From a675894375d3964f6ee0faeda489fe1d181b94a0 Mon Sep 17 00:00:00 2001 From: scarf Date: Mon, 18 Nov 2024 22:16:05 +0900 Subject: [PATCH] fix(dataframe): warn type of replaceAtIdx --- __tests__/dataframe.test.ts | 182 +++++++++++----------- __tests__/expr.test.ts | 303 ++++++++++++++++++++---------------- __tests__/series.test.ts | 124 ++++++++------- polars/dataframe.ts | 5 +- 4 files changed, 334 insertions(+), 280 deletions(-) diff --git a/__tests__/dataframe.test.ts b/__tests__/dataframe.test.ts index 66effa91..9539fa07 100644 --- a/__tests__/dataframe.test.ts +++ b/__tests__/dataframe.test.ts @@ -756,7 +756,7 @@ describe("dataframe", () => { expect(actual.columns).toEqual(["foo_new", "bar_new", "ham_new"]); }); test("replaceAtIdx", () => { - const actual = pl.DataFrame({ + const actual: pl.DataFrame = pl.DataFrame({ foo: [1, 2, 3], bar: [6, 7, 8], ham: ["a", "b", "c"], @@ -1315,101 +1315,109 @@ describe("dataframe", () => { expect(actual).toFrameEqual(expected); }); test("pivot", () => { - let df = pl.DataFrame({ - a: pl.Series([1, 2, 3]).cast(pl.Int32), - b: pl - .Series([ - [1, 1], - [2, 2], - [3, 3], - ]) - .cast(pl.List(pl.Int32)), - }); - - let expected = pl - .DataFrame({ + { + const df = pl.DataFrame({ a: pl.Series([1, 2, 3]).cast(pl.Int32), - "1": pl.Series([[1, 1], null, null]).cast(pl.List(pl.Int32)), - "2": pl.Series([null, [2, 2], null]).cast(pl.List(pl.Int32)), - "3": pl.Series([null, null, [3, 3]]).cast(pl.List(pl.Int32)), - }) - .select("a", "1", "2", "3"); + b: pl + .Series([ + [1, 1], + [2, 2], + [3, 3], + ]) + .cast(pl.List(pl.Int32)), + }); - let actual = df.pivot("b", { - index: "a", - on: "a", - aggregateFunc: "first", - sortColumns: true, - }); + const expected = pl + .DataFrame({ + a: pl.Series([1, 2, 3]).cast(pl.Int32), + "1": pl.Series([[1, 1], null, null]).cast(pl.List(pl.Int32)), + "2": pl.Series([null, [2, 2], null]).cast(pl.List(pl.Int32)), + "3": pl.Series([null, null, [3, 3]]).cast(pl.List(pl.Int32)), + }) + .select("a", "1", "2", "3"); - expect(actual).toFrameEqual(expected, true); + const actual = df.pivot("b", { + index: "a", + on: "a", + aggregateFunc: "first", + sortColumns: true, + }); - df = pl.DataFrame({ - a: ["beep", "bop"], - b: ["a", "b"], - c: ["s", "f"], - d: [7, 8], - e: ["x", "y"], - }); - actual = df.pivot(["a", "e"], { - index: "b", - on: ["b"], - aggregateFunc: "first", - separator: "|", - maintainOrder: true, - }); + expect(actual).toFrameEqual(expected, true); + } - expected = pl.DataFrame({ - b: ["a", "b"], - "a|a": ["beep", null], - "a|b": [null, "bop"], - "e|a": ["x", null], - "e|b": [null, "y"], - }); - expect(actual).toFrameEqual(expected, true); + { + const df = pl.DataFrame({ + a: ["beep", "bop"], + b: ["a", "b"], + c: ["s", "f"], + d: [7, 8], + e: ["x", "y"], + }); + const actual = df.pivot(["a", "e"], { + index: "b", + on: ["b"], + aggregateFunc: "first", + separator: "|", + maintainOrder: true, + }); - df = pl.DataFrame({ - foo: ["A", "A", "B", "B", "C"], - N: [1, 2, 2, 4, 2], - bar: ["k", "l", "m", "n", "o"], - }); - actual = df.pivot(["N"], { - index: "foo", - on: "bar", - aggregateFunc: "first", - }); - expected = pl.DataFrame({ - foo: ["A", "B", "C"], - k: [1, null, null], - l: [2, null, null], - m: [null, 2, null], - n: [null, 4, null], - o: [null, null, 2], - }); - expect(actual).toFrameEqual(expected, true); + const expected = pl.DataFrame({ + b: ["a", "b"], + "a|a": ["beep", null], + "a|b": [null, "bop"], + "e|a": ["x", null], + "e|b": [null, "y"], + }); + expect(actual).toFrameEqual(expected, true); + } + { + const df = pl.DataFrame({ + foo: ["A", "A", "B", "B", "C"], + N: [1, 2, 2, 4, 2], + bar: ["k", "l", "m", "n", "o"], + }); + const actual = df.pivot(["N"], { + index: "foo", + on: "bar", + aggregateFunc: "first", + }); - df = pl.DataFrame({ - ix: [1, 1, 2, 2, 1, 2], - col: ["a", "a", "a", "a", "b", "b"], - foo: [0, 1, 2, 2, 7, 1], - bar: [0, 2, 0, 0, 9, 4], - }); + const expected = pl.DataFrame({ + foo: ["A", "B", "C"], + k: [1, null, null], + l: [2, null, null], + m: [null, 2, null], + n: [null, 4, null], + o: [null, null, 2], + }); - actual = df.pivot(["foo", "bar"], { - index: "ix", - on: "col", - aggregateFunc: "sum", - separator: "/", - }); + expect(actual).toFrameEqual(expected, true); + } + { + const df = pl.DataFrame({ + ix: [1, 1, 2, 2, 1, 2], + col: ["a", "a", "a", "a", "b", "b"], + foo: [0, 1, 2, 2, 7, 1], + bar: [0, 2, 0, 0, 9, 4], + }); - expected = pl.DataFrame({ - ix: [1, 2], - "foo/a": [1, 4], - "foo/b": [7, 1], - "bar/a": [2, 0], - "bar/b": [9, 4], - }); - expect(actual).toFrameEqual(expected, true); + const actual = df.pivot(["foo", "bar"], { + index: "ix", + on: "col", + aggregateFunc: "sum", + separator: "/", + }); + + const expected = pl.DataFrame({ + ix: [1, 2], + "foo/a": [1, 4], + "foo/b": [7, 1], + "bar/a": [2, 0], + "bar/b": [9, 4], + }); + expect(actual).toFrameEqual(expected, true); + } }); }); describe("join", () => { diff --git a/__tests__/expr.test.ts b/__tests__/expr.test.ts index 49cd9ba8..0218be1c 100644 --- a/__tests__/expr.test.ts +++ b/__tests__/expr.test.ts @@ -1103,30 +1103,36 @@ describe("expr.str", () => { json: [{ a: 1, b: true }, "null", { a: 2, b: false }], }); expect(actual).toFrameEqual(expected); - let s = pl.Series(["[1, 2, 3]", null, "[4, 5, 6]"]); - let dtype = pl.List(pl.Int64); - const expSeries = pl.Series([[1, 2, 3], null, [4, 5, 6]]); - expect(s.str.jsonDecode()).toSeriesEqual(expSeries); - expect(s.str.jsonDecode(dtype)).toSeriesEqual(expSeries); - dtype = pl.Struct([ - new pl.Field("a", pl.Int64), - new pl.Field("b", pl.Bool), - ]); - s = pl.Series("json", [ - '{"a":1, "b": true}', - '{"a": null, "b": null }', - '{"a":2, "b": false}', - ]); - expect(s.str.jsonDecode().as("json")).toSeriesEqual( - expected.getColumn("json"), - ); - expect(s.str.jsonDecode(dtype).as("json")).toSeriesEqual( - expected.getColumn("json"), - ); - s = pl.Series("col_a", [], pl.Utf8); - const exp = pl.Series("col_a", []).cast(pl.List(pl.Int64)); - dtype = pl.List(pl.Int64); - expect(s.str.jsonDecode(dtype).as("col_a")).toSeriesEqual(exp); + { + const s = pl.Series(["[1, 2, 3]", null, "[4, 5, 6]"]); + const dtype = pl.List(pl.Int64); + const expSeries = pl.Series([[1, 2, 3], null, [4, 5, 6]]); + expect(s.str.jsonDecode()).toSeriesEqual(expSeries); + expect(s.str.jsonDecode(dtype)).toSeriesEqual(expSeries); + } + { + const dtype = pl.Struct([ + new pl.Field("a", pl.Int64), + new pl.Field("b", pl.Bool), + ]); + const s = pl.Series("json", [ + '{"a":1, "b": true}', + '{"a": null, "b": null }', + '{"a":2, "b": false}', + ]); + expect(s.str.jsonDecode().as("json")).toSeriesEqual( + expected.getColumn("json"), + ); + expect(s.str.jsonDecode(dtype).as("json")).toSeriesEqual( + expected.getColumn("json"), + ); + } + { + const s = pl.Series("col_a", [], pl.Utf8); + const exp = pl.Series("col_a", []).cast(pl.List(pl.Int64)); + const dtype = pl.List(pl.Int64); + expect(s.str.jsonDecode(dtype).as("col_a")).toSeriesEqual(exp); + } }); test("jsonPathMatch", () => { const df = pl.DataFrame({ @@ -1206,68 +1212,82 @@ describe("expr.str", () => { }); test("expr.replace", () => { const df = pl.DataFrame({ a: [1, 2, 2, 3], b: ["a", "b", "c", "d"] }); - let actual = df.withColumns(pl.col("a").replace(2, 100).alias("replaced")); - let expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: [1, 100, 100, 3], - }); - expect(actual).toFrameEqual(expected); - actual = df.withColumns( - pl - .col("a") - .replaceStrict([2, 3], [100, 200], -1, pl.Float64) - .alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: [-1, 100, 100, 200], - }); - expect(actual).toFrameEqual(expected); - actual = df.withColumns( - pl.col("b").replaceStrict("a", "c", "e", pl.Utf8).alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: ["c", "e", "e", "e"], - }); - expect(actual).toFrameEqual(expected); - actual = df.withColumns( - pl - .col("b") - .replaceStrict(["a", "b"], ["c", "d"], "e", pl.Utf8) - .alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: ["c", "d", "e", "e"], - }); - expect(actual).toFrameEqual(expected); + { + const actual = df.withColumns( + pl.col("a").replace(2, 100).alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: [1, 100, 100, 3], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = df.withColumns( + pl + .col("a") + .replaceStrict([2, 3], [100, 200], -1, pl.Float64) + .alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: [-1, 100, 100, 200], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = df.withColumns( + pl.col("b").replaceStrict("a", "c", "e", pl.Utf8).alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: ["c", "e", "e", "e"], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = df.withColumns( + pl + .col("b") + .replaceStrict(["a", "b"], ["c", "d"], "e", pl.Utf8) + .alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: ["c", "d", "e", "e"], + }); + expect(actual).toFrameEqual(expected); + } const mapping = { 2: 100, 3: 200 }; - actual = df.withColumns( - pl - .col("a") - .replaceStrict({ old: mapping, default_: -1, returnDtype: pl.Int64 }) - .alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: [-1, 100, 100, 200], - }); - expect(actual).toFrameEqual(expected); - actual = df.withColumns( - pl.col("a").replace({ old: mapping }).alias("replaced"), - ); - expected = pl.DataFrame({ - a: [1, 2, 2, 3], - b: ["a", "b", "c", "d"], - replaced: [1, 100, 100, 200], - }); - expect(actual).toFrameEqual(expected); + { + const actual = df.withColumns( + pl + .col("a") + .replaceStrict({ old: mapping, default_: -1, returnDtype: pl.Int64 }) + .alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: [-1, 100, 100, 200], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = df.withColumns( + pl.col("a").replace({ old: mapping }).alias("replaced"), + ); + const expected = pl.DataFrame({ + a: [1, 2, 2, 3], + b: ["a", "b", "c", "d"], + replaced: [1, 100, 100, 200], + }); + expect(actual).toFrameEqual(expected); + } }); test("slice", () => { const df = pl.DataFrame({ @@ -2196,69 +2216,76 @@ describe("Round", () => { describe("EWM", () => { test("ewmMean", () => { - let s = pl.Series("s", [2, 5, 3]); - let df = pl.DataFrame([s]); + const s = pl.Series("s", [2, 5, 3]); + const df = pl.DataFrame([s]); let expected = pl.DataFrame({ s, ewmMean: [2.0, 4.0, 3.4285714285714284] }); - - let seriesActual = df.getColumn("s").ewmMean().rename("ewmMean"); - let actual = df.withColumn(col("s").ewmMean().as("ewmMean")); - - expect(actual).toFrameEqual(expected); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); - - seriesActual = df - .getColumn("s") - .ewmMean({ alpha: 0.5, adjust: true, ignoreNulls: true }) - .rename("ewmMean"); - actual = df.withColumn( - col("s") + { + const seriesActual = df.getColumn("s").ewmMean().rename("ewmMean"); + const actual = df.withColumn(col("s").ewmMean().as("ewmMean")); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } + { + const seriesActual = df + .getColumn("s") .ewmMean({ alpha: 0.5, adjust: true, ignoreNulls: true }) - .as("ewmMean"), - ); - - expect(actual).toFrameEqual(expected); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + .rename("ewmMean"); + const actual = df.withColumn( + col("s") + .ewmMean({ alpha: 0.5, adjust: true, ignoreNulls: true }) + .as("ewmMean"), + ); - seriesActual = df - .getColumn("s") - .ewmMean({ alpha: 0.5, adjust: false, ignoreNulls: true }) - .rename("ewmMean"); - actual = df.withColumn( - col("s") + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } + { + const seriesActual = df + .getColumn("s") .ewmMean({ alpha: 0.5, adjust: false, ignoreNulls: true }) - .as("ewmMean"), - ); - - expected = pl.DataFrame({ s, ewmMean: [2.0, 3.5, 3.25] }); - expect(actual).toFrameEqual(expected); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); - - seriesActual = df - .getColumn("s") - .ewmMean(0.5, false, 1, true) - .rename("ewmMean"); - actual = df.withColumn(col("s").ewmMean(0.5, false, 1, true).as("ewmMean")); + .rename("ewmMean"); + const actual = df.withColumn( + col("s") + .ewmMean({ alpha: 0.5, adjust: false, ignoreNulls: true }) + .as("ewmMean"), + ); - expect(actual).toFrameEqual(expected); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + expected = pl.DataFrame({ s, ewmMean: [2.0, 3.5, 3.25] }); + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } + { + const seriesActual = df + .getColumn("s") + .ewmMean(0.5, false, 1, true) + .rename("ewmMean"); + const actual = df.withColumn( + col("s").ewmMean(0.5, false, 1, true).as("ewmMean"), + ); - s = pl.Series("a", [2, 3, 5, 7, 4]); - df = pl.DataFrame([s]); + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } + { + const s = pl.Series("a", [2, 3, 5, 7, 4]); + const df = pl.DataFrame([s]); - seriesActual = df - .getColumn("a") - .ewmMean({ adjust: true, minPeriods: 2, ignoreNulls: true }) - .round(5) - .rename("ewmMean"); - actual = df.withColumn( - col("a") + const seriesActual = df + .getColumn("a") .ewmMean({ adjust: true, minPeriods: 2, ignoreNulls: true }) .round(5) - .as("ewmMean"), - ); + .rename("ewmMean"); + const actual = df.withColumn( + col("a") + .ewmMean({ adjust: true, minPeriods: 2, ignoreNulls: true }) + .round(5) + .as("ewmMean"), + ); - expected = pl.DataFrame({ ewmMean: [null, 2.66667, 4, 5.6, 4.77419], s }); - expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + expected = pl.DataFrame({ ewmMean: [null, 2.66667, 4, 5.6, 4.77419], s }); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + } }); test("ewmStd", () => { diff --git a/__tests__/series.test.ts b/__tests__/series.test.ts index 4db2ab2d..3c7b368d 100644 --- a/__tests__/series.test.ts +++ b/__tests__/series.test.ts @@ -573,50 +573,62 @@ describe("series", () => { expect(() => pl.Series("dt", [null], pl.Date).describe()).toThrow( "Invalid operation: describe is not supported for DataType(Date)", ); - let actual = pl.Series([true, false, true]).describe(); - let expected = pl.DataFrame({ - statistic: ["sum", "null_count", "count"], - value: [false, null, null], - }); - - expect(actual).toFrameEqual(expected); - actual = pl.Series(["a", "b", "c", null]).describe(); - expected = pl.DataFrame({ - statistic: ["unique", "null_count", "count"], - value: [4, 1, 4], - }); - expect(actual).toFrameEqual(expected); + { + const actual = pl.Series([true, false, true]).describe(); + const expected = pl.DataFrame({ + statistic: ["sum", "null_count", "count"], + value: [false, null, null], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = pl.Series(["a", "b", "c", null]).describe(); + const expected = pl.DataFrame({ + statistic: ["unique", "null_count", "count"], + value: [4, 1, 4], + }); + expect(actual).toFrameEqual(expected); + } }); it("series:valueCounts", () => { - let actual = pl.Series("a", [1, 2, 2, 3]).valueCounts(true); - let expected = pl.DataFrame({ - a: [2, 1, 3], - count: [2, 1, 1], - }); - expect(actual).toFrameEqual(expected); - - actual = pl - .Series("a", [1, 2, 2, 3]) - .valueCounts(true, true, undefined, true); - expected = pl.DataFrame({ - a: [2, 1, 3], - proportion: [0.5, 0.25, 0.25], - }); - expect(actual).toFrameEqual(expected); - - actual = pl.Series("a", [1, 2, 2, 3]).valueCounts(true, true, "foo", false); - expected = pl.DataFrame({ - a: [2, 1, 3], - foo: [2, 1, 1], - }); - expect(actual).toFrameEqual(expected); - - actual = pl.Series("a", [1, 2, 2, 3]).valueCounts(true, true, "foo", true); - expected = pl.DataFrame({ - a: [2, 1, 3], - foo: [0.5, 0.25, 0.25], - }); - expect(actual).toFrameEqual(expected); + { + const actual = pl.Series("a", [1, 2, 2, 3]).valueCounts(true); + const expected = pl.DataFrame({ + a: [2, 1, 3], + count: [2, 1, 1], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = pl + .Series("a", [1, 2, 2, 3]) + .valueCounts(true, true, undefined, true); + const expected = pl.DataFrame({ + a: [2, 1, 3], + proportion: [0.5, 0.25, 0.25], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = pl + .Series("a", [1, 2, 2, 3]) + .valueCounts(true, true, "foo", false); + const expected = pl.DataFrame({ + a: [2, 1, 3], + foo: [2, 1, 1], + }); + expect(actual).toFrameEqual(expected); + } + { + const actual = pl + .Series("a", [1, 2, 2, 3]) + .valueCounts(true, true, "foo", true); + const expected = pl.DataFrame({ + a: [2, 1, 3], + foo: [0.5, 0.25, 0.25], + }); + expect(actual).toFrameEqual(expected); + } }); it("set: expected matches actual", () => { const expected = pl.Series([99, 2, 3]); @@ -702,18 +714,22 @@ describe("series", () => { }); test("toDummies", () => { const s = pl.Series("a", [1, 2, 3]); - let actual = s.toDummies(); - let expected = pl.DataFrame( - { "a_1.0": [1, 0, 0], "a_2.0": [0, 1, 0], "a_3.0": [0, 0, 1] }, - { schema: { "a_1.0": pl.UInt8, "a_2.0": pl.UInt8, "a_3.0": pl.UInt8 } }, - ); - expect(actual).toFrameEqual(expected); - actual = s.toDummies(":", true); - expected = pl.DataFrame( - { "a:2.0": [0, 1, 0], "a:3.0": [0, 0, 1] }, - { schema: { "a:2.0": pl.UInt8, "a:3.0": pl.UInt8 } }, - ); - expect(actual).toFrameEqual(expected); + { + const actual = s.toDummies(); + const expected = pl.DataFrame( + { "a_1.0": [1, 0, 0], "a_2.0": [0, 1, 0], "a_3.0": [0, 0, 1] }, + { schema: { "a_1.0": pl.UInt8, "a_2.0": pl.UInt8, "a_3.0": pl.UInt8 } }, + ); + expect(actual).toFrameEqual(expected); + } + { + const actual = s.toDummies(":", true); + const expected = pl.DataFrame( + { "a:2.0": [0, 1, 0], "a:3.0": [0, 0, 1] }, + { schema: { "a:2.0": pl.UInt8, "a:3.0": pl.UInt8 } }, + ); + expect(actual).toFrameEqual(expected); + } }); }); describe("comparators & math", () => { diff --git a/polars/dataframe.ts b/polars/dataframe.ts index df4d9ed6..59ae5f0e 100644 --- a/polars/dataframe.ts +++ b/polars/dataframe.ts @@ -1191,12 +1191,15 @@ export interface DataFrame = any> rename(mapping: Record): DataFrame; /** * Replace a column at an index location. + * + * @warning typescript cannot encode type mutation, + * so the type of the DataFrame will be incorrect. cast the type of dataframe manually. * ___ * @param index - Column index * @param newColumn - New column to insert * @example * ``` - * > const df = pl.DataFrame({ + * > const df: pl.DataFrame = pl.DataFrame({ * ... "foo": [1, 2, 3], * ... "bar": [6, 7, 8], * ... "ham": ['a', 'b', 'c']