From 1f7a1f68057ccd2e11e8cc36583e45bb31f3d7e6 Mon Sep 17 00:00:00 2001 From: Thomas McKenna Date: Mon, 25 Nov 2024 11:19:13 -0600 Subject: [PATCH] handle empty arrays when inferring schema. fixes #296 --- __tests__/dataframe.test.ts | 10 ++++++++++ src/dataframe.rs | 33 +++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/__tests__/dataframe.test.ts b/__tests__/dataframe.test.ts index 9539fa07..ab653e8d 100644 --- a/__tests__/dataframe.test.ts +++ b/__tests__/dataframe.test.ts @@ -2008,6 +2008,16 @@ describe("create", () => { string: pl.String, }); }); + test("from row objects, inferred schema, empty array", () => { + const df = pl.readRecords([ + { a: [], b: 0 }, + { a: [""], b: 0 }, + ]); + expect(df.schema).toStrictEqual({ + a: pl.List(pl.String), + b: pl.Float64, + }); + }); test("from row objects, with schema", () => { const rows = [ { num: 1, date: "foo", string: "foo1" }, diff --git a/src/dataframe.rs b/src/dataframe.rs index 174eba4e..b9fee43e 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -1657,21 +1657,26 @@ fn obj_to_pairs(rows: &Array, len: usize) -> impl '_ + Iterator = - Vec::with_capacity(len as usize); - - for idx in 0..max_take { - let item: napi::JsUnknown = - arr.get_element(idx as u32).unwrap(); - let ty = item.get_type().unwrap(); - let dt: Wrap = ty.into(); - dtypes.push(dt.0) - } - let dtype = coerce_data_type(&dtypes); - DataType::List(dtype.into()) + if len == 0 { + DataType::List(DataType::Null.into()) + } else { + // dont compare too many items, as it could be expensive + let max_take = std::cmp::min(len as usize, 10); + let mut dtypes: Vec = + Vec::with_capacity(len as usize); + + for idx in 0..max_take { + let item: napi::JsUnknown = + arr.get_element(idx as u32).unwrap(); + let ty = item.get_type().unwrap(); + let dt: Wrap = ty.into(); + dtypes.push(dt.0) + } + let dtype = coerce_data_type(&dtypes); + + DataType::List(dtype.into()) + } } else if val.is_date().unwrap() { DataType::Datetime(TimeUnit::Milliseconds, None) } else {