Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
77c084b
first pass
paleolimbot Sep 16, 2025
7cd67e4
try to reorganize
paleolimbot Jan 9, 2026
46ac6ab
revert changes
paleolimbot Jan 9, 2026
2df8381
make circle generation more resistent to large sizes
paleolimbot Jan 9, 2026
c4a077d
remove python arg checks
paleolimbot Jan 9, 2026
c81ffb9
add context to failable uniform distrubtion constructors
paleolimbot Jan 9, 2026
f49908e
slight improvements
paleolimbot Jan 9, 2026
0b3a77c
move more validation lower
paleolimbot Jan 9, 2026
b073c79
move range/scalar parsing to Rust
paleolimbot Jan 9, 2026
a9c7e1b
better arg names
paleolimbot Jan 9, 2026
44f95b6
fix name updates
paleolimbot Jan 9, 2026
c47df1e
use purely rust defaults
paleolimbot Jan 9, 2026
dc6fa04
fix tests
paleolimbot Jan 9, 2026
ca82e52
new seed for geography
paleolimbot Jan 9, 2026
2cb076d
docs
paleolimbot Jan 9, 2026
cc7971e
make the table provider always have exact output size
paleolimbot Jan 10, 2026
dc537dd
geom type then size
paleolimbot Jan 10, 2026
0372ab8
adjust test
paleolimbot Jan 10, 2026
5e6f2e2
target_rows -> num_rows
paleolimbot Jan 11, 2026
990b975
update SELECT * from sd_random_geometry()
paleolimbot Jan 11, 2026
299280b
update sjoin tests
paleolimbot Jan 11, 2026
8bc0b55
add some debug context to the random geometry provider
paleolimbot Jan 11, 2026
6a1c845
handle empty streams
paleolimbot Jan 11, 2026
9666171
don't use .to_arrow_table() when not needed
paleolimbot Jan 11, 2026
04c1514
fmt
paleolimbot Jan 11, 2026
4e286a2
more relevant distance
paleolimbot Jan 11, 2026
bab3e2c
add to context
paleolimbot Jan 11, 2026
4bd96e6
Update rust/sedona-testing/src/datagen.rs
paleolimbot Jan 12, 2026
03196ff
Update rust/sedona-testing/src/datagen.rs
paleolimbot Jan 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions benchmarks/test_bench_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,47 +43,47 @@ def setup_class(self):
"points_simple",
{
"geom_type": "Point",
"target_rows": num_geoms,
"num_rows": num_geoms,
},
),
(
"segments_large",
{
"geom_type": "LineString",
"target_rows": num_geoms,
"vertices_per_linestring_range": [2, 10],
"num_rows": num_geoms,
"num_vertices": [2, 10],
},
),
(
"polygons_simple",
{
"geom_type": "Polygon",
"target_rows": num_geoms,
"vertices_per_linestring_range": [10, 10],
"num_rows": num_geoms,
"num_vertices": [10, 10],
},
),
(
"polygons_complex",
{
"geom_type": "Polygon",
"target_rows": num_geoms,
"vertices_per_linestring_range": [500, 500],
"num_rows": num_geoms,
"num_vertices": [500, 500],
},
),
(
"collections_simple",
{
"geom_type": "GeometryCollection",
"target_rows": num_geoms,
"vertices_per_linestring_range": [10, 10],
"num_rows": num_geoms,
"num_vertices": [10, 10],
},
),
(
"collections_complex",
{
"geom_type": "GeometryCollection",
"target_rows": num_geoms,
"vertices_per_linestring_range": [500, 500],
"num_rows": num_geoms,
"num_vertices": [500, 500],
},
),
]:
Expand All @@ -97,7 +97,7 @@ def setup_class(self):
{
"seed": 42,
"bounds": [0.0, 0.0, 80.0, 100.0], # Slightly left-leaning
"size_range": [
"size": [
1.0,
15.0,
], # Medium-sized geometries for good intersection chance
Expand All @@ -110,7 +110,7 @@ def setup_class(self):
{
"seed": 43,
"bounds": [20.0, 0.0, 100.0, 100.0], # Slightly right-leaning
"size_range": [1.0, 15.0], # Same size range for fair comparison
"size": [1.0, 15.0], # Same size range for fair comparison
}
)

Expand Down
8 changes: 4 additions & 4 deletions benchmarks/test_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ def setup_class(self):
# Create building-like polygons (index side - fewer, larger geometries)
building_options = {
"geom_type": "Polygon",
"target_rows": 2_000,
"vertices_per_linestring_range": [4, 8],
"size_range": [0.001, 0.01],
"num_rows": 2_000,
"num_vertices": [4, 8],
"size": [0.001, 0.01],
"seed": 42,
}

Expand All @@ -51,7 +51,7 @@ def setup_class(self):
# Create trip pickup points (probe side)
trip_options = {
"geom_type": "Point",
"target_rows": 10_000,
"num_rows": 10_000,
"seed": 43,
}

Expand Down
10 changes: 9 additions & 1 deletion python/sedonadb/python/sedonadb/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import sys
from functools import cached_property
from pathlib import Path
from typing import Any, Dict, Iterable, Literal, Optional, Union

from sedonadb._lib import InternalContext, configure_proj_shared
from sedonadb._options import Options
from sedonadb.dataframe import DataFrame, _create_data_frame
from sedonadb.functions import Functions
from sedonadb.utility import sedona # noqa: F401
from sedonadb._options import Options


class SedonaContext:
Expand Down Expand Up @@ -272,6 +275,11 @@ def register_udf(self, udf: Any):
"""
self._impl.register_udf(udf)

@cached_property
def funcs(self) -> Functions:
"""Access Python wrappers for SedonaDB functions"""
return Functions(self)


def connect() -> SedonaContext:
"""Create a new [SedonaContext][sedonadb.context.SedonaContext]"""
Expand Down
40 changes: 40 additions & 0 deletions python/sedonadb/python/sedonadb/functions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from functools import cached_property
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from sedonadb.functions.table import TableFunctions


class Functions:
"""Functions accessor

This class provides Pythonic wrappers to call SedonaDB functions
given a specific SedonaDB context.
"""

def __init__(self, ctx):
self._ctx = ctx

@cached_property
def table(self) -> "TableFunctions":
"""Access SedonaDB Table functions"""
from sedonadb.functions.table import TableFunctions

return TableFunctions(self._ctx)
114 changes: 114 additions & 0 deletions python/sedonadb/python/sedonadb/functions/table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import json
from typing import Optional, Literal, Union, Tuple, Iterable

from sedonadb.dataframe import DataFrame
from sedonadb.utility import sedona # noqa: F401


class TableFunctions:
def __init__(self, ctx):
self._ctx = ctx

def sd_random_geometry(
self,
geom_type: Optional[
Literal[
"Geometry",
"Point",
"LineString",
"Polygon",
"MultiPoint",
"MultiLineString",
"MultiPolygon",
"GeometryCollection",
]
] = None,
num_rows: Optional[int] = None,
*,
num_vertices: Union[int, Tuple[int, int], None] = None,
num_parts: Union[int, Tuple[int, int], None] = None,
size: Union[float, Tuple[float, float], None] = None,
bounds: Optional[Iterable[float]] = None,
hole_rate: Optional[float] = None,
empty_rate: Optional[float] = None,
null_rate: Optional[float] = None,
seed: Optional[int] = None,
) -> DataFrame:
"""
Generate a DataFrame with random geometries for testing purposes.
This function creates a DataFrame containing randomly generated geometries with
configurable parameters for geometry type, size, complexity, and spatial distribution.
Returns a DataFrame with columns 'id', 'dist', and 'geometry' containing randomly
generated geometries and distances.

Parameters
----------
geom_type : str, default "Point"
The type of geometry to generate. One of "Geometry",
"Point", "LineString", "Polygon", "MultiPoint", "MultiLineString",
"MultiPolygon", or "GeometryCollection".
num_rows : int, default 1024
Number of rows to generate.
num_vertices : int or tuple of (int, int), default 4
Number of vertices per geometry. If a tuple, specifies (min, max) range.
num_parts : int or tuple of (int, int), default (1, 3)
Number of parts for multi-geometries. If a tuple, specifies (min, max) range.
size : float or tuple of (float, float), default (1.0, 10.0)
Spatial size of geometries. If a tuple, specifies (min, max) range.
bounds : iterable of float, default [0.0, 0.0, 100.0, 100.0]
Spatial bounds as [xmin, ymin, xmax, ymax] to constrain generated geometries.
hole_rate : float, default 0.0
Rate of polygons with holes, between 0.0 and 1.0.
empty_rate : float, default 0.0
Rate of empty geometries, between 0.0 and 1.0.
null_rate : float, default 0.0
Rate of null geometries, between 0.0 and 1.0.
seed : int, optional
Random seed for reproducible geometry generation. If omitted, the result is
non-deterministic.

Examples
--------
>>> sd = sedona.db.connect()
>>> sd.funcs.table.sd_random_geometry("Point", 1, seed=938).show()
┌───────┬───────────────────┬────────────────────────────────────────────┐
│ id ┆ dist ┆ geometry │
│ int32 ┆ float64 ┆ geometry │
╞═══════╪═══════════════════╪════════════════════════════════════════════╡
│ 0 ┆ 58.86528701627309 ┆ POINT(94.77686827801787 17.65107885959438) │
└───────┴───────────────────┴────────────────────────────────────────────┘
"""

args = {
"bounds": bounds,
"empty_rate": empty_rate,
"geom_type": geom_type,
"null_rate": null_rate,
"num_parts": num_parts,
"hole_rate": hole_rate,
"seed": seed,
"size": size,
"num_rows": num_rows,
"num_vertices": num_vertices,
}

args = {k: v for k, v in args.items() if v is not None}

return self._ctx.sql(f"SELECT * FROM sd_random_geometry('{json.dumps(args)}')")
13 changes: 12 additions & 1 deletion python/sedonadb/python/sedonadb/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import os
import math
import os
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, List, Tuple
Expand All @@ -29,6 +29,17 @@
import sedonadb


def random_geometry(*args, **kwargs) -> "sedonadb.dataframe.DataFrame":
"""
Generate a DataFrame with random geometries for testing purposes by
calling sd_random_geometry() on an isolated SedonaDB session.
"""
import sedonadb

sd = sedonadb.connect()
return sd.funcs.table.sd_random_geometry(*args, **kwargs)


def skip_if_not_exists(path: Path):
"""Skip a test using pytest.skip() if path does not exist

Expand Down
26 changes: 26 additions & 0 deletions python/sedonadb/tests/test_funcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.


def test_random_geometry(con):
df = con.funcs.table.sd_random_geometry("Point", 5, seed=99873)

# Ensure we produce the correct number of rows
assert df.count() == 5

# Ensure the output is reproducible
assert df.to_arrow_table() == df.to_arrow_table()
Loading