fix(rust): Increase precision when constructing float Series (#25323)

camriddell · web-flow · commit 5056a2b79f40 · 2025-12-04T12:46:58.000+01:00
diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs
@@ -2,6 +2,7 @@ use std::fmt::Write;
 
 use arrow::bitmap::MutableBitmap;
 use num_traits::AsPrimitive;
+use polars_compute::cast::SerPrimitive;
 
 #[cfg(feature = "dtype-categorical")]
 use crate::chunked_array::builder::CategoricalChunkedBuilder;
@@ -308,17 +309,119 @@ fn any_values_to_string(values: &[AnyValue], strict: bool) -> PolarsResult<Strin
         Ok(builder.finish())
     }
     fn any_values_to_string_nonstrict(values: &[AnyValue]) -> StringChunked {
+        fn _write_any_value(av: &AnyValue<'_>, buffer: &mut String, float_buf: &mut Vec<u8>) {
+            match av {
+                AnyValue::String(s) => buffer.push_str(s),
+                AnyValue::Float64(f) => {
+                    float_buf.clear();
+                    SerPrimitive::write(float_buf, *f);
+                    let s = std::str::from_utf8(float_buf).unwrap();
+                    buffer.push_str(s);
+                },
+                AnyValue::Float32(f) => {
+                    float_buf.clear();
+                    SerPrimitive::write(float_buf, *f as f64);
+                    let s = std::str::from_utf8(float_buf).unwrap();
+                    buffer.push_str(s);
+                },
+                #[cfg(feature = "dtype-f16")]
+                AnyValue::Float16(f) => {
+                    float_buf.clear();
+                    SerPrimitive::write(float_buf, f64::from(*f));
+                    let s = std::str::from_utf8(float_buf).unwrap();
+                    buffer.push_str(s);
+                },
+                #[cfg(feature = "dtype-struct")]
+                AnyValue::StructOwned(payload) => {
+                    buffer.push('{');
+                    let mut iter = payload.0.iter().peekable();
+                    while let Some(child) = iter.next() {
+                        _write_any_value(child, buffer, float_buf);
+                        if iter.peek().is_some() {
+                            buffer.push(',')
+                        }
+                    }
+                    buffer.push('}');
+                },
+                #[cfg(feature = "dtype-struct")]
+                AnyValue::Struct(_, _, flds) => {
+                    let mut vals = Vec::with_capacity(flds.len());
+                    av._materialize_struct_av(&mut vals);
+
+                    buffer.push('{');
+                    let mut iter = vals.iter().peekable();
+                    while let Some(child) = iter.next() {
+                        _write_any_value(child, buffer, float_buf);
+                        if iter.peek().is_some() {
+                            buffer.push(',')
+                        }
+                    }
+                    buffer.push('}');
+                },
+                #[cfg(feature = "dtype-array")]
+                AnyValue::Array(vals, _) => {
+                    buffer.push('[');
+                    let mut iter = vals.iter().peekable();
+                    while let Some(child) = iter.next() {
+                        _write_any_value(&child, buffer, float_buf);
+                        if iter.peek().is_some() {
+                            buffer.push(',');
+                        }
+                    }
+                    buffer.push(']');
+                },
+                AnyValue::List(vals) => {
+                    buffer.push('[');
+                    let mut iter = vals.iter().peekable();
+                    while let Some(child) = iter.next() {
+                        _write_any_value(&child, buffer, float_buf);
+                        if iter.peek().is_some() {
+                            buffer.push(',');
+                        }
+                    }
+                    buffer.push(']');
+                },
+                av => {
+                    write!(buffer, "{av}").unwrap();
+                },
+            }
+        }
+
         let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
         let mut owned = String::new(); // Amortize allocations.
+        let mut float_buf = vec![];
         for av in values {
+            owned.clear();
+            float_buf.clear();
+
             match av {
                 AnyValue::String(s) => builder.append_value(s),
                 AnyValue::StringOwned(s) => builder.append_value(s),
                 AnyValue::Null => builder.append_null(),
                 AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => builder.append_null(),
+
+                // Explicitly convert and dump floating-point values to strings
+                // to preserve as much precision as possible.
+                // Using write!(..., "{av}") steps through Display formatting
+                // which rounds to an arbitrary precision thus losing information.
+                AnyValue::Float64(f) => {
+                    SerPrimitive::write(&mut float_buf, *f);
+                    let s = std::str::from_utf8(&float_buf).unwrap();
+                    builder.append_value(s);
+                },
+                AnyValue::Float32(f) => {
+                    SerPrimitive::write(&mut float_buf, *f as f64); // promote to f64 for serialization
+                    let s = std::str::from_utf8(&float_buf).unwrap();
+                    builder.append_value(s);
+                },
+                #[cfg(feature = "dtype-f16")]
+                AnyValue::Float16(f) => {
+                    SerPrimitive::write(&mut float_buf, f64::from(*f));
+                    let s = std::str::from_utf8(&float_buf).unwrap();
+                    builder.append_value(s);
+                },
                 av => {
-                    owned.clear();
-                    write!(owned, "{av}").unwrap();
+                    _write_any_value(av, &mut owned, &mut float_buf);
                     builder.append_value(&owned);
                 },
             }
diff --git a/py-polars/tests/unit/constructors/test_any_value_fallbacks.py b/py-polars/tests/unit/constructors/test_any_value_fallbacks.py
@@ -7,6 +7,7 @@
 from typing import TYPE_CHECKING, Any
 
 import pytest
+from numpy import array
 
 import polars as pl
 from polars._plr import PySeries
@@ -408,3 +409,44 @@ def test_categorical_lit_18874() -> None:
             ]
         ),
     )
+
+
+@pytest.mark.parametrize(
+    ("values", "expected"),
+    [
+        # Float64 should have ~17; Float32 ~6 digits of precision preserved
+        ([0.123, 0.123456789], ["0.123", "0.123456789"]),
+        ([[0.123, 0.123456789]], ["[0.123,0.123456789]"]),
+        ([array([0.123, 0.123456789])], ["[0.123,0.123456789]"]),
+        ([{"a": 0.123, "b": 0.123456789}], ["{0.123,0.123456789}"]),
+        ([[{"a": 0.123, "b": 0.123456789}]], ["[{0.123,0.123456789}]"]),
+        ([{"x": [0.1, 0.2]}, [{"y": 0.3}]], ["{[0.1,0.2]}", "[{0.3}]"]),
+        (
+            [None, {"a": None, "b": 1.0}, [None, 2.0]],
+            [None, "{null,1.0}", "[null,2.0]"],
+        ),
+        ([[], {}], ["[]", "{}"]),
+        ([[0.5]], ["[0.5]"]),
+        ([{"a": 0.5}], ["{0.5}"]),
+    ],
+    ids=[
+        "basic_floats",
+        "nested_list",
+        "nested_array",
+        "basic_struct",
+        "list_of_structs",
+        "nested_mixed",
+        "mixed_nulls",
+        "empty_containers",
+        "single_element_list",
+        "single_element_struct",
+    ],
+)
+def test_float_to_string_precision_25257(
+    values: list[Any], expected: list[Any]
+) -> None:
+    # verify the conversion is decoupled from Display formatting
+    with pl.Config(float_precision=1):
+        s = pl.Series(values, strict=False, dtype=pl.String)
+
+    assert (s == pl.Series(expected)).all()