Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions c/sedona-geos/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ mod st_issimple;
mod st_isvalid;
mod st_isvalidreason;
mod st_length;
mod st_line_merge;
mod st_makevalid;
mod st_minimumclearance;
mod st_minimumclearance_line;
Expand Down
2 changes: 2 additions & 0 deletions c/sedona-geos/src/register.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use crate::{
st_isvalid::st_is_valid_impl,
st_isvalidreason::st_is_valid_reason_impl,
st_length::st_length_impl,
st_line_merge::st_line_merge_impl,
st_makevalid::st_make_valid_impl,
st_minimumclearance::st_minimum_clearance_impl,
st_minimumclearance_line::st_minimum_clearance_line_impl,
Expand Down Expand Up @@ -81,6 +82,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
("st_isvalid", st_is_valid_impl()),
("st_isvalidreason", st_is_valid_reason_impl()),
("st_length", st_length_impl()),
("st_linemerge", st_line_merge_impl()),
("st_numinteriorrings", st_num_interior_rings_impl()),
("st_numpoints", st_num_points_impl()),
("st_nrings", st_nrings_impl()),
Expand Down
181 changes: 181 additions & 0 deletions c/sedona-geos/src/st_line_merge.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::sync::Arc;

use arrow_array::builder::BinaryBuilder;
use datafusion_common::{error::Result, DataFusionError, ScalarValue};
use datafusion_expr::ColumnarValue;
use geos::Geom;
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher};

use crate::executor::GeosExecutor;
use crate::geos_to_wkb::write_geos_geometry;

pub fn st_line_merge_impl() -> ScalarKernelRef {
Arc::new(STLineMerge {})
}

#[derive(Debug)]
struct STLineMerge {}

impl SedonaScalarKernel for STLineMerge {
fn return_type(
&self,
args: &[sedona_schema::datatypes::SedonaType],
) -> datafusion_common::Result<Option<sedona_schema::datatypes::SedonaType>> {
let matcher = ArgMatcher::new(
vec![
ArgMatcher::is_geometry(),
ArgMatcher::optional(ArgMatcher::is_boolean()),
],
WKB_GEOMETRY,
);
matcher.match_args(args)
}

fn invoke_batch(
&self,
arg_types: &[sedona_schema::datatypes::SedonaType],
args: &[datafusion_expr::ColumnarValue],
) -> datafusion_common::Result<datafusion_expr::ColumnarValue> {
let executor = GeosExecutor::new(arg_types, args);
let mut builder = BinaryBuilder::with_capacity(
executor.num_iterations(),
WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
);

let directed = match args.get(1) {
Some(ColumnarValue::Scalar(ScalarValue::Boolean(Some(opt_bool)))) => *opt_bool,
_ => false,
};

executor.execute_wkb_void(|maybe_wkb| {
match maybe_wkb {
Some(wkb) => {
invoke_scalar(&wkb, &mut builder, directed)?;
builder.append_value([]);
}
None => builder.append_null(),
}

Ok(())
})?;

executor.finish(Arc::new(builder.finish()))
}
}

fn invoke_scalar(
geos_geom: &geos::Geometry,
writer: &mut impl std::io::Write,
directed: bool,
) -> Result<()> {
// PostGIS seems to return the original geometry if it is empty
let is_empty = geos_geom.is_empty().map_err(|e| {
DataFusionError::Execution(format!("Failed to check if the geometry is empty: {e}"))
})?;
if is_empty {
write_geos_geometry(geos_geom, writer)?;
return Ok(());
}

let result = if directed {
geos_geom.line_merge_directed()
} else {
geos_geom.line_merge()
};

let geom =
result.map_err(|e| DataFusionError::Execution(format!("Failed to merge lines: {e}")))?;

write_geos_geometry(&geom, writer)?;

Ok(())
}

#[cfg(test)]
mod tests {
use arrow_array::ArrayRef;
use datafusion_common::ScalarValue;
use rstest::rstest;
use sedona_expr::scalar_udf::SedonaScalarUDF;
use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
use sedona_testing::create::create_array;
use sedona_testing::testers::ScalarUdfTester;

use super::*;

#[rstest]
fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) {
use arrow_schema::DataType;

let udf = SedonaScalarUDF::from_impl("st_linemerge", st_line_merge_impl());
let tester = ScalarUdfTester::new(
udf.into(),
vec![sedona_type, SedonaType::Arrow(DataType::Boolean)],
);
tester.assert_return_type(WKB_GEOMETRY);

let input = vec![
Some("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))"),
Some("MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))"), // opposite direction
Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"), // doesn't touch
];

let expected: ArrayRef = create_array(
&[
Some("LINESTRING (0 0, 1 0, 1 1)"),
Some("LINESTRING (0 0, 1 0, 1 1)"),
Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"),
],
&WKB_GEOMETRY,
);

assert_eq!(
&tester
.invoke_wkb_array_scalar(input.clone(), false)
.unwrap(),
&expected
);

// If directed is true, lines with opposite directions won't be merged

let expected_directed: ArrayRef = create_array(
&[
Some("LINESTRING (0 0, 1 0, 1 1)"),
Some("MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))"),
Some("MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))"),
],
&WKB_GEOMETRY,
);

assert_eq!(
&tester.invoke_wkb_array_scalar(input, true).unwrap(),
&expected_directed
);

// handle NULL

let result = tester
.invoke_scalar_scalar(ScalarValue::Null, false)
.unwrap();
assert!(result.is_null());
}
}
62 changes: 62 additions & 0 deletions python/sedonadb/tests/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1797,6 +1797,68 @@ def test_st_isring_non_linestring_error(eng, geom):
eng.assert_query_result(f"SELECT ST_IsRing(ST_GeomFromText('{geom}'))", None)


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),
[
(None, None),
("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))", "LINESTRING (0 0, 1 0, 1 1)"),
# opposite direction
(
"MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))",
"LINESTRING (0 0, 1 0, 1 1)",
),
# non-touching
(
"MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))",
"MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))",
),
# empty cases
("POINT EMPTY", "POINT (nan nan)"),
("LINESTRING EMPTY", "LINESTRING EMPTY"),
("POLYGON EMPTY", "POLYGON EMPTY"),
("MULTIPOINT EMPTY", "MULTIPOINT EMPTY"),
("MULTILINESTRING EMPTY", "MULTILINESTRING EMPTY"),
("MULTIPOLYGON EMPTY", "MULTIPOLYGON EMPTY"),
("GEOMETRYCOLLECTION EMPTY", "GEOMETRYCOLLECTION EMPTY"),
# Note that the behaviour on non-multilinestring geometry is not documented.
# But, we test such cases here as well to detect if there's any difference.
("POINT (0 0)", "GEOMETRYCOLLECTION EMPTY"),
("LINESTRING (0 0, 1 0)", "LINESTRING (0 0, 1 0)"),
("POLYGON ((0 0, 0 1, 1 0, 0 0))", "LINESTRING (0 0, 0 1, 1 0, 0 0)"),
],
)
def test_st_linemerge(eng, geom, expected):
eng = eng.create_or_skip()
eng.assert_query_result(
f"SELECT ST_LineMerge({geom_or_null(geom)})",
expected,
)


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),
[
("MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))", "LINESTRING (0 0, 1 0, 1 1)"),
(
"MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))",
"MULTILINESTRING ((0 0, 1 0), (1 1, 1 0))",
),
(
"MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))",
"MULTILINESTRING ((0 0, 1 0), (8 8, 9 9))",
),
],
)
def test_st_linemerge_directed(eng, geom, expected):
eng = eng.create_or_skip()
eng.assert_query_result(
f"SELECT ST_LineMerge({geom_or_null(geom)}, true)",
expected,
)


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),
Expand Down
1 change: 1 addition & 0 deletions rust/sedona-functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pub mod st_isclosed;
mod st_iscollection;
pub mod st_isempty;
mod st_length;
mod st_line_merge;
mod st_makeline;
mod st_numgeometries;
mod st_perimeter;
Expand Down
1 change: 1 addition & 0 deletions rust/sedona-functions/src/register.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ pub fn default_function_set() -> FunctionSet {
crate::predicates::st_knn_udf,
crate::predicates::st_touches_udf,
crate::predicates::st_within_udf,
crate::st_line_merge::st_line_merge_udf,
crate::referencing::st_line_interpolate_point_udf,
crate::referencing::st_line_locate_point_udf,
crate::sd_format::sd_format_udf,
Expand Down
62 changes: 62 additions & 0 deletions rust/sedona-functions/src/st_line_merge.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility};
use sedona_expr::scalar_udf::SedonaScalarUDF;
use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher};

/// ST_LineMerge() scalar UDF implementation
///
/// Stub function for line merging.
pub fn st_line_merge_udf() -> SedonaScalarUDF {
SedonaScalarUDF::new_stub(
"st_linemerge",
ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY),
Volatility::Immutable,
Some(st_line_merge_doc()),
)
}

fn st_line_merge_doc() -> Documentation {
Documentation::builder(
DOC_SECTION_OTHER,
"Merge the line segments in a geometry",
"ST_LineMerge (Geom: Geometry)",
)
.with_argument("geom", "geometry: Input geometry")
.with_argument(
"directed",
"If true, lines with opposite directions will not be merged",
)
.with_sql_example(
"SELECT ST_LineMerge(ST_GeomFromWKT('MULTILINESTRING ((0 0, 1 0), (1 0, 1 1))'))",
)
.build()
}

#[cfg(test)]
mod tests {
use datafusion_expr::ScalarUDF;

use super::*;

#[test]
fn udf_metadata() {
let udf: ScalarUDF = st_line_merge_udf().into();
assert_eq!(udf.name(), "st_linemerge");
assert!(udf.documentation().is_some())
}
}