diff --git a/__tests__/expr.test.ts b/__tests__/expr.test.ts index bda8303a6..ab13483a4 100644 --- a/__tests__/expr.test.ts +++ b/__tests__/expr.test.ts @@ -1988,3 +1988,149 @@ describe("Round", () => { expect(exprFn).toThrow(); }); }); + +describe("EWM", () => { + test("ewmMean", () => { + let s = pl.Series("s", [2, 5, 3]); + let df = pl.DataFrame([s]); + let expected = pl.DataFrame({ s, ewmMean: [2.0, 4.0, 3.4285714285714284] }); + + let seriesActual = df.getColumn("s").ewmMean().rename("ewmMean"); + let actual = df.withColumn(col("s").ewmMean().as("ewmMean")); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + + seriesActual = df + .getColumn("s") + .ewmMean({ alpha: 0.5, adjust: true, ignoreNulls: true }) + .rename("ewmMean"); + actual = df.withColumn( + col("s") + .ewmMean({ alpha: 0.5, adjust: true, ignoreNulls: true }) + .as("ewmMean"), + ); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + + seriesActual = df + .getColumn("s") + .ewmMean({ alpha: 0.5, adjust: false, ignoreNulls: true }) + .rename("ewmMean"); + actual = df.withColumn( + col("s") + .ewmMean({ alpha: 0.5, adjust: false, ignoreNulls: true }) + .as("ewmMean"), + ); + + expected = pl.DataFrame({ s, ewmMean: [2.0, 3.5, 3.25] }); + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + + seriesActual = df + .getColumn("s") + .ewmMean(0.5, false, 1, true) + .rename("ewmMean"); + actual = df.withColumn(col("s").ewmMean(0.5, false, 1, true).as("ewmMean")); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + + s = pl.Series("a", [2, 3, 5, 7, 4]); + df = pl.DataFrame([s]); + + seriesActual = df + .getColumn("a") + .ewmMean({ adjust: true, minPeriods: 2, ignoreNulls: true }) + .round(5) + .rename("ewmMean"); + actual = df.withColumn( + col("a") + .ewmMean({ adjust: true, minPeriods: 2, ignoreNulls: true }) + .round(5) + .as("ewmMean"), + ); + + expected = pl.DataFrame({ ewmMean: [null, 2.66667, 4, 5.6, 4.77419], s }); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmMean")); + }); + + test("ewmStd", () => { + const s = pl.Series("s", [2, 5, 3]); + const df = pl.DataFrame([s]); + const expected = pl.DataFrame({ s, ewmStd: [0, 2.12132, 1.38873] }); + + let seriesActual = df.getColumn("s").ewmStd().round(5).rename("ewmStd"); + let actual = df.withColumn(col("s").ewmStd().round(5).as("ewmStd")); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmStd")); + + seriesActual = df + .getColumn("s") + .ewmStd({ alpha: 0.5, adjust: true, ignoreNulls: true }) + .round(5) + .rename("ewmStd"); + actual = df.withColumn( + col("s") + .ewmStd({ alpha: 0.5, adjust: true, ignoreNulls: true }) + .round(5) + .as("ewmStd"), + ); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmStd")); + + seriesActual = df + .getColumn("s") + .ewmStd(0.5, true, 1, false) + .round(5) + .rename("ewmStd"); + actual = df.withColumn( + col("s").ewmStd(0.5, true, 1, false).round(5).as("ewmStd"), + ); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmStd")); + }); + + test("ewmVar", () => { + const s = pl.Series("s", [2, 5, 3]); + const df = pl.DataFrame([s]); + const expected = pl.DataFrame({ s, ewmVar: [0, 4.5, 1.92857] }); + + let seriesActual = df.getColumn("s").ewmVar().round(5).rename("ewmVar"); + let actual = df.withColumn(col("s").ewmVar().round(5).as("ewmVar")); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmVar")); + + seriesActual = df + .getColumn("s") + .ewmVar({ alpha: 0.5, adjust: true, ignoreNulls: true }) + .round(5) + .rename("ewmVar"); + actual = df.withColumn( + col("s") + .ewmVar({ alpha: 0.5, adjust: true, ignoreNulls: true }) + .round(5) + .as("ewmVar"), + ); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmVar")); + + seriesActual = df + .getColumn("s") + .ewmVar(0.5, true, 1, false) + .round(5) + .rename("ewmVar"); + actual = df.withColumn( + col("s").ewmVar(0.5, true, 1, false).round(5).as("ewmVar"), + ); + + expect(actual).toFrameEqual(expected); + expect(seriesActual).toSeriesEqual(expected.getColumn("ewmVar")); + }); +}); diff --git a/polars/lazy/expr/index.ts b/polars/lazy/expr/index.ts index 6cc361d92..9649258fb 100644 --- a/polars/lazy/expr/index.ts +++ b/polars/lazy/expr/index.ts @@ -20,6 +20,7 @@ import { Round, Sample, Serialize, + EwmOps, } from "../../shared_traits"; import { InterpolationMethod, FillNullStrategy, RankMethod } from "../../types"; /** @@ -32,6 +33,7 @@ export interface Expr Cumulative, Sample, Round, + EwmOps, Serialize { /** @ignore */ _expr: any; @@ -140,6 +142,7 @@ export interface Expr * @param other Expression to compute dot product with */ dot(other: any): Expr; + /** * Exclude certain columns from a wildcard/regex selection. * @@ -727,6 +730,117 @@ export const _Expr = (_expr: any): Expr => { return _Expr(_expr.dot(expr)); }, + ewmMean( + opts: { + alpha?: number; + adjust?: boolean; + minPeriods?: number; + bias?: boolean; + ignoreNulls?: boolean; + }, + adjust?: boolean, + minPeriods?: number, + bias?: boolean, + ignoreNulls?: boolean, + ) { + if (opts) { + if (typeof opts === "number") { + return wrap( + "ewmMean", + opts, + adjust ?? true, + minPeriods ?? 1, + bias ?? false, + ignoreNulls ?? true, + ); + } else { + return wrap( + "ewmMean", + opts.alpha ?? 0.5, + opts.adjust ?? true, + opts.minPeriods ?? 1, + opts.bias ?? false, + opts.ignoreNulls ?? true, + ); + } + } else { + return wrap("ewmMean", 0.5, true, 1, false, true); + } + }, + ewmStd( + opts: { + alpha?: number; + adjust?: boolean; + minPeriods?: number; + bias?: boolean; + ignoreNulls?: boolean; + }, + adjust?: boolean, + minPeriods?: number, + bias?: boolean, + ignoreNulls?: boolean, + ) { + if (opts) { + if (typeof opts === "number") { + return wrap( + "ewmStd", + opts, + adjust ?? true, + minPeriods ?? 1, + bias ?? false, + ignoreNulls ?? true, + ); + } else { + return wrap( + "ewmStd", + opts.alpha ?? 0.5, + opts.adjust ?? true, + opts.minPeriods ?? 1, + opts.bias ?? false, + opts.ignoreNulls ?? true, + ); + } + } else { + return wrap("ewmStd", 0.5, true, 1, false, true); + } + }, + ewmVar( + opts: { + alpha?: number; + adjust?: boolean; + minPeriods?: number; + bias?: boolean; + ignoreNulls?: boolean; + }, + adjust?: boolean, + minPeriods?: number, + bias?: boolean, + ignoreNulls?: boolean, + ) { + if (opts) { + if (typeof opts === "number") { + return wrap( + "ewmVar", + opts, + adjust ?? true, + minPeriods ?? 1, + bias ?? false, + ignoreNulls ?? true, + ); + } else { + return wrap( + "ewmVar", + opts.alpha ?? 0.5, + opts.adjust ?? true, + opts.minPeriods ?? 1, + opts.bias ?? false, + opts.ignoreNulls ?? true, + ); + } + } else { + return wrap("ewmVar", 0.5, true, 1, false, true); + } + }, exclude(...columns) { return _Expr(_expr.exclude(columns.flat(2))); }, diff --git a/polars/series/index.ts b/polars/series/index.ts index dd7e01a42..c3cc77678 100644 --- a/polars/series/index.ts +++ b/polars/series/index.ts @@ -16,6 +16,7 @@ import { Round, Sample, Serialize, + EwmOps, } from "../shared_traits"; import { col } from "../lazy/functions"; import { InterpolationMethod, RankMethod } from "../types"; @@ -32,6 +33,7 @@ export interface Series Cumulative, Round, Sample, + EwmOps, Serialize { inner(): any; name: string; @@ -1265,6 +1267,15 @@ export function _Series(_s: any): Series { equals(field: Series) { return this.eq(field); }, + ewmMean(...args) { + return expr_op("ewmMean", ...args); + }, + ewmStd(...args) { + return expr_op("ewmStd", ...args); + }, + ewmVar(...args) { + return expr_op("ewmVar", ...args); + }, explode() { return wrap("explode"); }, diff --git a/polars/shared_traits.ts b/polars/shared_traits.ts index 132d9f4eb..192d87244 100644 --- a/polars/shared_traits.ts +++ b/polars/shared_traits.ts @@ -1189,3 +1189,142 @@ export interface GroupByOps { check_sorted?: boolean; }): T; } + +/*** + * Exponentially-weighted operations that can be applied to a Series and Expr + */ +export interface EwmOps { + /** + * Exponentially-weighted moving average. + * + * @param alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`. + * @param adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings + * - When ``adjust: true`` the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i` + * - When ``adjust=false`` the EW function is calculated recursively + * @param bias When ``bias: false``, apply a correction to make the estimate statistically unbiased. + * @param minPeriods Minimum number of observations in window required to have a value (otherwise result is null). + * @param ignoreNulls Ignore missing values when calculating weights. + * - When ``ignoreNulls: false`` (default), weights are based on absolute positions. + * - When ``ignoreNulls: true``, weights are based on relative positions. + * @returns Expr that evaluates to a float 64 Series. + * @examples + * ``` + * > const df = pl.DataFrame({a: [1, 2, 3]}); + * > df.select(pl.col("a").ewmMean()) + * shape: (3, 1) + * ┌──────────┐ + * │ a │ + * | --- │ + * │ f64 │ + * ╞══════════╡ + * │ 1.0 │ + * │ 1.666667 │ + * │ 2.428571 │ + * └──────────┘ + * ``` + */ + ewmMean(): T; + ewmMean( + alpha?: number, + adjust?: boolean, + minPeriods?: number, + bias?: boolean, + ignoreNulls?: boolean, + ): T; + ewmMean(opts: { + alpha?: number; + adjust?: boolean; + minPeriods?: number; + bias?: boolean; + ignoreNulls?: boolean; + }): T; + /** + * Exponentially-weighted standard deviation. + * + * @param alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`. + * @param adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings + * - When ``adjust: true`` the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i` + * - When ``adjust: false`` the EW function is calculated recursively + * @param minPeriods Minimum number of observations in window required to have a value (otherwise result is null). + * @param bias When ``bias: false``, apply a correction to make the estimate statistically unbiased. + * @param ignoreNulls Ignore missing values when calculating weights. + * - When ``ignoreNulls: false`` (default), weights are based on absolute positions. + * For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of + * - When ``ignoreNulls: true``, weights are based on relative positions. + * @returns Expr that evaluates to a float 64 Series. + * @examples + * ``` + * > const df = pl.DataFrame({a: [1, 2, 3]}); + * > df.select(pl.col("a").ewmStd()) + * shape: (3, 1) + * ┌──────────┐ + * │ a │ + * | --- │ + * │ f64 │ + * ╞══════════╡ + * │ 0.0 │ + * │ 0.707107 │ + * │ 0.963624 │ + * └──────────┘ + * ``` + */ + ewmStd(): T; + ewmStd( + alpha?: number, + adjust?: boolean, + minPeriods?: number, + bias?: boolean, + ignoreNulls?: boolean, + ): T; + ewmStd(opts: { + alpha?: number; + adjust?: boolean; + minPeriods?: number; + bias?: boolean; + ignoreNulls?: boolean; + }): T; + /** + * Exponentially-weighted variance. + * + * @param alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`. + * @param adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings + * - When ``adjust: true`` the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i` + * - When ``adjust: false`` the EW function is calculated recursively + * @param minPeriods Minimum number of observations in window required to have a value (otherwise result is null). + * @param bias When ``bias: false``, apply a correction to make the estimate statistically unbiased. + * @param ignoreNulls Ignore missing values when calculating weights. + * - When ``ignoreNulls: false`` (default), weights are based on absolute positions. + * - When ``ignoreNulls=true``, weights are based on relative positions. + * @returns Expr that evaluates to a float 64 Series. + * @examples + * ``` + * > const df = pl.DataFrame({a: [1, 2, 3]}); + * > df.select(pl.col("a").ewmVar()) + * shape: (3, 1) + * ┌──────────┐ + * │ a │ + * | --- │ + * │ f64 │ + * ╞══════════╡ + * │ 0.0 │ + * │ 0.5 │ + * │ 0.928571 │ + * └──────────┘ + * ``` + */ + ewmVar(): T; + ewmVar( + alpha?: number, + adjust?: boolean, + minPeriods?: number, + bias?: boolean, + ignoreNulls?: boolean, + ): T; + ewmVar(opts: { + alpha?: number; + adjust?: boolean; + minPeriods?: number; + bias?: boolean; + ignoreNulls?: boolean; + }): T; +}