-
Notifications
You must be signed in to change notification settings - Fork 42
feat(rust/sedona-geos): Implement ST_LineMerge() #503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
4dc2d95
67f23ff
288948d
6cfa146
144a6b3
6058662
6f34f57
9ab729c
f0add40
a0bacd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,181 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| use std::sync::Arc; | ||
|
|
||
| use arrow_array::builder::BinaryBuilder; | ||
| use datafusion_common::{error::Result, DataFusionError, ScalarValue}; | ||
| use datafusion_expr::ColumnarValue; | ||
| use geos::Geom; | ||
| use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; | ||
| use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; | ||
| use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher}; | ||
|
|
||
| use crate::executor::GeosExecutor; | ||
| use crate::geos_to_wkb::write_geos_geometry; | ||
|
|
||
| pub fn st_line_merge_impl() -> ScalarKernelRef { | ||
| Arc::new(STLineMerge {}) | ||
| } | ||
|
|
||
| #[derive(Debug)] | ||
| struct STLineMerge {} | ||
|
|
||
| impl SedonaScalarKernel for STLineMerge { | ||
| fn return_type( | ||
| &self, | ||
| args: &[sedona_schema::datatypes::SedonaType], | ||
| ) -> datafusion_common::Result<Option<sedona_schema::datatypes::SedonaType>> { | ||
| let matcher = ArgMatcher::new( | ||
| vec![ | ||
| ArgMatcher::is_geometry(), | ||
| ArgMatcher::optional(ArgMatcher::is_boolean()), | ||
| ], | ||
| WKB_GEOMETRY, | ||
| ); | ||
| matcher.match_args(args) | ||
| } | ||
|
|
||
| fn invoke_batch( | ||
| &self, | ||
| arg_types: &[sedona_schema::datatypes::SedonaType], | ||
| args: &[datafusion_expr::ColumnarValue], | ||
| ) -> datafusion_common::Result<datafusion_expr::ColumnarValue> { | ||
| let executor = GeosExecutor::new(arg_types, args); | ||
| let mut builder = BinaryBuilder::with_capacity( | ||
| executor.num_iterations(), | ||
| WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), | ||
| ); | ||
|
|
||
| let directed = match args.get(1) { | ||
| Some(ColumnarValue::Scalar(ScalarValue::Boolean(Some(opt_bool)))) => *opt_bool, | ||
| _ => false, | ||
| }; | ||
|
|
||
| executor.execute_wkb_void(|maybe_wkb| { | ||
| match maybe_wkb { | ||
| Some(wkb) => { | ||
| invoke_scalar(&wkb, &mut builder, directed)?; | ||
| builder.append_value([]); | ||
| } | ||
| None => builder.append_null(), | ||
| } | ||
|
|
||
| Ok(()) | ||
| })?; | ||
|
|
||
| executor.finish(Arc::new(builder.finish())) | ||
| } | ||
| } | ||
|
|
||
| fn invoke_scalar( | ||
| geos_geom: &geos::Geometry, | ||
| writer: &mut impl std::io::Write, | ||
| directed: bool, | ||
| ) -> Result<()> { | ||
| // PostGIS seems to return the original geometry if it is empty | ||
| let is_empty = geos_geom.is_empty().map_err(|e| { | ||
| DataFusionError::Execution(format!("Failed to check if the geometry is empty: {e}")) | ||
| })?; | ||
| if is_empty { | ||
| write_geos_geometry(geos_geom, writer)?; | ||
| return Ok(()); | ||
| } | ||
|
|
||
| let result = if directed { | ||
| geos_geom.line_merge_directed() | ||
| } else { | ||
| geos_geom.line_merge() | ||
| }; | ||
|
|
||
| let geom = | ||
| result.map_err(|e| DataFusionError::Execution(format!("Failed to merge lines: {e}")))?; | ||
|
|
||
| write_geos_geometry(&geom, writer)?; | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use arrow_array::ArrayRef; | ||
| use datafusion_common::ScalarValue; | ||
| use rstest::rstest; | ||
| use sedona_expr::scalar_udf::SedonaScalarUDF; | ||
| use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY, WKB_VIEW_GEOMETRY}; | ||
| use sedona_testing::create::create_array; | ||
| use sedona_testing::testers::ScalarUdfTester; | ||
|
|
||
| use super::*; | ||
|
|
||
| #[rstest] | ||
| fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { | ||
| use arrow_schema::DataType; | ||
|
|
||
| let udf = SedonaScalarUDF::from_impl("st_linemerge", st_line_merge_impl()); | ||
| let tester = ScalarUdfTester::new( | ||
| udf.into(), | ||
| vec![sedona_type, SedonaType::Arrow(DataType::Boolean)], | ||
| ); | ||
| tester.assert_return_type(WKB_GEOMETRY); | ||
|
|
||
| let input = vec![ | ||
| Some("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))"), | ||
| Some("MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))"), // opposite direction | ||
| Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"), // doesn't touch | ||
| ]; | ||
|
|
||
| let expected: ArrayRef = create_array( | ||
| &[ | ||
| Some("LINESTRING (0 0, 1 0, 1 1)"), | ||
| Some("LINESTRING (0 0, 1 0, 1 1)"), | ||
| Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"), | ||
| ], | ||
| &WKB_GEOMETRY, | ||
| ); | ||
|
|
||
| assert_eq!( | ||
| &tester | ||
| .invoke_wkb_array_scalar(input.clone(), false) | ||
| .unwrap(), | ||
| &expected | ||
| ); | ||
|
|
||
| // If directed is true, lines with opposite directions won't be merged | ||
|
|
||
| let expected_directed: ArrayRef = create_array( | ||
| &[ | ||
| Some("LINESTRING (0 0, 1 0, 1 1)"), | ||
| Some("MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))"), | ||
| Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"), | ||
| ], | ||
| &WKB_GEOMETRY, | ||
| ); | ||
|
|
||
| assert_eq!( | ||
| &tester.invoke_wkb_array_scalar(input, true).unwrap(), | ||
| &expected_directed | ||
| ); | ||
|
|
||
| // handle NULL | ||
|
|
||
| let result = tester | ||
| .invoke_scalar_scalar(ScalarValue::Null, false) | ||
| .unwrap(); | ||
| assert!(result.is_null()); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1797,6 +1797,68 @@ def test_st_isring_non_linestring_error(eng, geom): | |
| eng.assert_query_result(f"SELECT ST_IsRing(ST_GeomFromText('{geom}'))", None) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) | ||
| @pytest.mark.parametrize( | ||
| ("geom", "expected"), | ||
| [ | ||
| (None, None), | ||
| ("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))", "LINESTRING (0 0, 1 0, 1 1)"), | ||
| # opposite direction | ||
| ( | ||
| "MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))", | ||
| "LINESTRING (0 0, 1 0, 1 1)", | ||
| ), | ||
| # non-touching | ||
| ( | ||
| "MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))", | ||
| "MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))", | ||
| ), | ||
| # empty cases | ||
| # ("POINT EMPTY", "POINT EMPTY"), # PostGIS returns POINT (nan, nan) | ||
yutannihilation marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ("LINESTRING EMPTY", "LINESTRING EMPTY"), | ||
| ("POLYGON EMPTY", "POLYGON EMPTY"), | ||
| ("MULTIPOINT EMPTY", "MULTIPOINT EMPTY"), | ||
| ("MULTILINESTRING EMPTY", "MULTILINESTRING EMPTY"), | ||
| ("MULTIPOLYGON EMPTY", "MULTIPOLYGON EMPTY"), | ||
| ("GEOMETRYCOLLECTION EMPTY", "GEOMETRYCOLLECTION EMPTY"), | ||
| # Note that the behaviour on non-multilinestring geometry is not documented. | ||
| # But, we test such cases here as well to detect if there's any difference. | ||
| ("POINT (0 0)", "GEOMETRYCOLLECTION EMPTY"), | ||
| ("LINESTRING (0 0, 1 0)", "LINESTRING (0 0, 1 0)"), | ||
| ("POLYGON ((0 0, 0 1, 1 0, 0 0))", "LINESTRING (0 0, 0 1, 1 0, 0 0)"), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interesting. The docs say "Other geometry types return an empty GeometryCollection" in the note. Though that doesn't seem to be the case for POLYGON or LINESTRING input 🤷. Could we test some empty geometries, for both of these tests? Those can often catch weird edge cases.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for catching. I simply didn't notice the note... I'll add empty cases. |
||
| ], | ||
| ) | ||
| def test_st_linemerge(eng, geom, expected): | ||
| eng = eng.create_or_skip() | ||
| eng.assert_query_result( | ||
| f"SELECT ST_LineMerge({geom_or_null(geom)})", | ||
| expected, | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) | ||
| @pytest.mark.parametrize( | ||
| ("geom", "expected"), | ||
| [ | ||
| ("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))", "LINESTRING (0 0, 1 0, 1 1)"), | ||
| ( | ||
| "MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))", | ||
| "MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))", | ||
| ), | ||
| ( | ||
| "MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))", | ||
| "MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))", | ||
| ), | ||
| ], | ||
| ) | ||
| def test_st_linemerge_directed(eng, geom, expected): | ||
| eng = eng.create_or_skip() | ||
| eng.assert_query_result( | ||
| f"SELECT ST_LineMerge({geom_or_null(geom)}, true)", | ||
| expected, | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) | ||
| @pytest.mark.parametrize( | ||
| ("geom", "expected"), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
| use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility}; | ||
| use sedona_expr::scalar_udf::SedonaScalarUDF; | ||
| use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher}; | ||
|
|
||
| /// ST_LineMerge() scalar UDF implementation | ||
| /// | ||
| /// Stub function for line merging. | ||
| pub fn st_line_merge_udf() -> SedonaScalarUDF { | ||
| SedonaScalarUDF::new_stub( | ||
| "st_linemerge", | ||
| ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY), | ||
| Volatility::Immutable, | ||
| Some(st_line_merge_doc()), | ||
| ) | ||
| } | ||
|
|
||
| fn st_line_merge_doc() -> Documentation { | ||
| Documentation::builder( | ||
| DOC_SECTION_OTHER, | ||
| "Merge the line segments in a geometry", | ||
| "ST_LineMerge (Geom: Geometry)", | ||
| ) | ||
| .with_argument("geom", "geometry: Input geometry") | ||
| .with_argument( | ||
| "directed", | ||
| "If true, lines with opposite directions will not be merged", | ||
| ) | ||
| .with_sql_example( | ||
| "SELECT ST_LineMerge(ST_GeomFromWKT('MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))'))", | ||
| ) | ||
| .build() | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use datafusion_expr::ScalarUDF; | ||
|
|
||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn udf_metadata() { | ||
| let udf: ScalarUDF = st_line_merge_udf().into(); | ||
| assert_eq!(udf.name(), "st_linemerge"); | ||
| assert!(udf.documentation().is_some()) | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.