diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs index e471372bc..cddf0585e 100644 --- a/c/sedona-geos/src/lib.rs +++ b/c/sedona-geos/src/lib.rs @@ -33,6 +33,7 @@ mod st_issimple; mod st_isvalid; mod st_isvalidreason; mod st_length; +mod st_line_merge; mod st_makevalid; mod st_minimumclearance; mod st_minimumclearance_line; diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs index 1a2479dda..82553bc29 100644 --- a/c/sedona-geos/src/register.rs +++ b/c/sedona-geos/src/register.rs @@ -31,6 +31,7 @@ use crate::{ st_isvalid::st_is_valid_impl, st_isvalidreason::st_is_valid_reason_impl, st_length::st_length_impl, + st_line_merge::st_line_merge_impl, st_makevalid::st_make_valid_impl, st_minimumclearance::st_minimum_clearance_impl, st_minimumclearance_line::st_minimum_clearance_line_impl, @@ -81,6 +82,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { ("st_isvalid", st_is_valid_impl()), ("st_isvalidreason", st_is_valid_reason_impl()), ("st_length", st_length_impl()), + ("st_linemerge", st_line_merge_impl()), ("st_numinteriorrings", st_num_interior_rings_impl()), ("st_numpoints", st_num_points_impl()), ("st_nrings", st_nrings_impl()), diff --git a/c/sedona-geos/src/st_line_merge.rs b/c/sedona-geos/src/st_line_merge.rs new file mode 100644 index 000000000..db7146c86 --- /dev/null +++ b/c/sedona-geos/src/st_line_merge.rs @@ -0,0 +1,181 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_array::builder::BinaryBuilder; +use datafusion_common::{error::Result, DataFusionError, ScalarValue}; +use datafusion_expr::ColumnarValue; +use geos::Geom; +use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; +use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher}; + +use crate::executor::GeosExecutor; +use crate::geos_to_wkb::write_geos_geometry; + +pub fn st_line_merge_impl() -> ScalarKernelRef { + Arc::new(STLineMerge {}) +} + +#[derive(Debug)] +struct STLineMerge {} + +impl SedonaScalarKernel for STLineMerge { + fn return_type( + &self, + args: &[sedona_schema::datatypes::SedonaType], + ) -> datafusion_common::Result> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_geometry(), + ArgMatcher::optional(ArgMatcher::is_boolean()), + ], + WKB_GEOMETRY, + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[sedona_schema::datatypes::SedonaType], + args: &[datafusion_expr::ColumnarValue], + ) -> datafusion_common::Result { + let executor = GeosExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + + let directed = match args.get(1) { + Some(ColumnarValue::Scalar(ScalarValue::Boolean(Some(opt_bool)))) => *opt_bool, + _ => false, + }; + + executor.execute_wkb_void(|maybe_wkb| { + match maybe_wkb { + Some(wkb) => { + invoke_scalar(&wkb, &mut builder, directed)?; + builder.append_value([]); + } + None => builder.append_null(), + } + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +fn invoke_scalar( + geos_geom: &geos::Geometry, + writer: &mut impl std::io::Write, + directed: bool, +) -> Result<()> { + // PostGIS seems to return the original geometry if it is empty + let is_empty = geos_geom.is_empty().map_err(|e| { + DataFusionError::Execution(format!("Failed to check if the geometry is empty: {e}")) + })?; + if is_empty { + write_geos_geometry(geos_geom, writer)?; + return Ok(()); + } + + let result = if directed { + geos_geom.line_merge_directed() + } else { + geos_geom.line_merge() + }; + + let geom = + result.map_err(|e| DataFusionError::Execution(format!("Failed to merge lines: {e}")))?; + + write_geos_geometry(&geom, writer)?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use arrow_array::ArrayRef; + use datafusion_common::ScalarValue; + use rstest::rstest; + use sedona_expr::scalar_udf::SedonaScalarUDF; + use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY, WKB_VIEW_GEOMETRY}; + use sedona_testing::create::create_array; + use sedona_testing::testers::ScalarUdfTester; + + use super::*; + + #[rstest] + fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + use arrow_schema::DataType; + + let udf = SedonaScalarUDF::from_impl("st_linemerge", st_line_merge_impl()); + let tester = ScalarUdfTester::new( + udf.into(), + vec![sedona_type, SedonaType::Arrow(DataType::Boolean)], + ); + tester.assert_return_type(WKB_GEOMETRY); + + let input = vec![ + Some("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))"), + Some("MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))"), // opposite direction + Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"), // doesn't touch + ]; + + let expected: ArrayRef = create_array( + &[ + Some("LINESTRING (0 0, 1 0, 1 1)"), + Some("LINESTRING (0 0, 1 0, 1 1)"), + Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"), + ], + &WKB_GEOMETRY, + ); + + assert_eq!( + &tester + .invoke_wkb_array_scalar(input.clone(), false) + .unwrap(), + &expected + ); + + // If directed is true, lines with opposite directions won't be merged + + let expected_directed: ArrayRef = create_array( + &[ + Some("LINESTRING (0 0, 1 0, 1 1)"), + Some("MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))"), + Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"), + ], + &WKB_GEOMETRY, + ); + + assert_eq!( + &tester.invoke_wkb_array_scalar(input, true).unwrap(), + &expected_directed + ); + + // handle NULL + + let result = tester + .invoke_scalar_scalar(ScalarValue::Null, false) + .unwrap(); + assert!(result.is_null()); + } +} diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index f6597298f..ff566cccf 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -1797,6 +1797,68 @@ def test_st_isring_non_linestring_error(eng, geom): eng.assert_query_result(f"SELECT ST_IsRing(ST_GeomFromText('{geom}'))", None) +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom", "expected"), + [ + (None, None), + ("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))", "LINESTRING (0 0, 1 0, 1 1)"), + # opposite direction + ( + "MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))", + "LINESTRING (0 0, 1 0, 1 1)", + ), + # non-touching + ( + "MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))", + "MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))", + ), + # empty cases + ("POINT EMPTY", "POINT (nan nan)"), + ("LINESTRING EMPTY", "LINESTRING EMPTY"), + ("POLYGON EMPTY", "POLYGON EMPTY"), + ("MULTIPOINT EMPTY", "MULTIPOINT EMPTY"), + ("MULTILINESTRING EMPTY", "MULTILINESTRING EMPTY"), + ("MULTIPOLYGON EMPTY", "MULTIPOLYGON EMPTY"), + ("GEOMETRYCOLLECTION EMPTY", "GEOMETRYCOLLECTION EMPTY"), + # Note that the behaviour on non-multilinestring geometry is not documented. + # But, we test such cases here as well to detect if there's any difference. + ("POINT (0 0)", "GEOMETRYCOLLECTION EMPTY"), + ("LINESTRING (0 0, 1 0)", "LINESTRING (0 0, 1 0)"), + ("POLYGON ((0 0, 0 1, 1 0, 0 0))", "LINESTRING (0 0, 0 1, 1 0, 0 0)"), + ], +) +def test_st_linemerge(eng, geom, expected): + eng = eng.create_or_skip() + eng.assert_query_result( + f"SELECT ST_LineMerge({geom_or_null(geom)})", + expected, + ) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom", "expected"), + [ + ("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))", "LINESTRING (0 0, 1 0, 1 1)"), + ( + "MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))", + "MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))", + ), + ( + "MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))", + "MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))", + ), + ], +) +def test_st_linemerge_directed(eng, geom, expected): + eng = eng.create_or_skip() + eng.assert_query_result( + f"SELECT ST_LineMerge({geom_or_null(geom)}, true)", + expected, + ) + + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geom", "expected"), diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 4fe633c84..bef42fff1 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -50,6 +50,7 @@ pub mod st_isclosed; mod st_iscollection; pub mod st_isempty; mod st_length; +mod st_line_merge; mod st_makeline; mod st_numgeometries; mod st_perimeter; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index 2b0130bb9..32549b675 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -58,6 +58,7 @@ pub fn default_function_set() -> FunctionSet { crate::predicates::st_knn_udf, crate::predicates::st_touches_udf, crate::predicates::st_within_udf, + crate::st_line_merge::st_line_merge_udf, crate::referencing::st_line_interpolate_point_udf, crate::referencing::st_line_locate_point_udf, crate::sd_format::sd_format_udf, diff --git a/rust/sedona-functions/src/st_line_merge.rs b/rust/sedona-functions/src/st_line_merge.rs new file mode 100644 index 000000000..2c409a11e --- /dev/null +++ b/rust/sedona-functions/src/st_line_merge.rs @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility}; +use sedona_expr::scalar_udf::SedonaScalarUDF; +use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher}; + +/// ST_LineMerge() scalar UDF implementation +/// +/// Stub function for line merging. +pub fn st_line_merge_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new_stub( + "st_linemerge", + ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY), + Volatility::Immutable, + Some(st_line_merge_doc()), + ) +} + +fn st_line_merge_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Merge the line segments in a geometry", + "ST_LineMerge (Geom: Geometry)", + ) + .with_argument("geom", "geometry: Input geometry") + .with_argument( + "directed", + "If true, lines with opposite directions will not be merged", + ) + .with_sql_example( + "SELECT ST_LineMerge(ST_GeomFromWKT('MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))'))", + ) + .build() +} + +#[cfg(test)] +mod tests { + use datafusion_expr::ScalarUDF; + + use super::*; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = st_line_merge_udf().into(); + assert_eq!(udf.name(), "st_linemerge"); + assert!(udf.documentation().is_some()) + } +}