Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement geometry traits on geos objects and simplify geos conversions #318

Merged
merged 4 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/algorithm/geos/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ impl Buffer for PointArray {
fn buffer(&self, width: f64, quadsegs: i32) -> Result<Self::Output> {
// NOTE: the bumpalo allocator didn't appear to make any perf difference with geos :shrug:
// Presumably GEOS is allocating on its own before we can put the geometry in the Bump?
let bump = bumpalo::Bump::new();
// let bump = bumpalo::Bump::new();

let mut geos_geoms = bumpalo::collections::Vec::with_capacity_in(self.len(), &bump);
let mut geos_geoms = Vec::with_capacity(self.len());

for maybe_g in self.iter_geos() {
if let Some(g) = maybe_g {
Expand Down
73 changes: 1 addition & 72 deletions src/io/geos/array/linestring.rs
Original file line number Diff line number Diff line change
@@ -1,90 +1,19 @@
use arrow_array::OffsetSizeTrait;

use crate::array::linestring::LineStringCapacity;
use crate::array::{LineStringArray, LineStringBuilder};
use crate::error::{GeoArrowError, Result};
use crate::geo_traits::LineStringTrait;
use crate::io::geos::scalar::GEOSLineString;

// NOTE: this, `first_pass`, and `second_pass` are copied from their main implementations, because
// implementing geometry access traits on GEOS geometries that yield ConstGeometry objects with two
// lifetimes seemed really, really hard. Ideally one day we can unify the two branches!

impl<O: OffsetSizeTrait> LineStringBuilder<O> {
/// Add a new GEOS LineString to the end of this array.
///
/// # Errors
///
/// This function errors iff the new last item is larger than what O supports.
#[allow(dead_code)]
fn push_geos_line_string(&mut self, value: Option<&GEOSLineString>) -> Result<()> {
if let Some(line_string) = value {
// For each ring:
// - Get ring
// - Add ring's # of coords to self.ring_offsets
// - Push ring's coords to self.coords

self.geom_offsets
.try_push_usize(line_string.num_coords())
.unwrap();

for coord_idx in 0..line_string.num_coords() {
let coord = line_string.coord(coord_idx).unwrap();
self.coords.push_coord(&coord);
}

self.validity.append(true);
} else {
self.push_null();
}
Ok(())
}
}

pub(crate) fn first_pass(geoms: &[Option<GEOSLineString>], geoms_length: usize) -> (usize, usize) {
let mut coord_capacity = 0;
let geom_capacity = geoms_length;

for line_string in geoms.iter().flatten() {
coord_capacity += line_string.num_coords();
}

(coord_capacity, geom_capacity)
}

pub(crate) fn second_pass<'a, O: OffsetSizeTrait>(
geoms: impl Iterator<Item = Option<GEOSLineString<'a>>>,
coord_capacity: usize,
geom_capacity: usize,
) -> LineStringBuilder<O> {
let capacity = LineStringCapacity::new(coord_capacity, geom_capacity);
let mut array = LineStringBuilder::with_capacity(capacity);

geoms
.into_iter()
.try_for_each(|maybe_multi_point| array.push_line_string(maybe_multi_point.as_ref()))
.unwrap();

array
}

impl<O: OffsetSizeTrait> TryFrom<Vec<Option<geos::Geometry<'_>>>> for LineStringBuilder<O> {
type Error = GeoArrowError;

fn try_from(value: Vec<Option<geos::Geometry<'_>>>) -> Result<Self> {
let length = value.len();
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSLineString>> = value
.into_iter()
.map(|geom| geom.map(GEOSLineString::new_unchecked))
.collect();

let (coord_capacity, geom_capacity) = first_pass(&geos_objects, length);
Ok(second_pass(
geos_objects.into_iter(),
coord_capacity,
geom_capacity,
))
Ok(geos_objects.into())
}
}

Expand Down
64 changes: 2 additions & 62 deletions src/io/geos/array/multipoint.rs
Original file line number Diff line number Diff line change
@@ -1,79 +1,19 @@
use arrow_array::OffsetSizeTrait;
use geos::Geom;

use crate::array::multipoint::MultiPointCapacity;
use crate::array::{MultiPointArray, MultiPointBuilder};
use crate::error::{GeoArrowError, Result};
use crate::error::GeoArrowError;
use crate::io::geos::scalar::GEOSMultiPoint;

// NOTE: this, `first_pass`, and `second_pass` are copied from their main implementations, because
// implementing geometry access traits on GEOS geometries that yield ConstGeometry objects with two
// lifetimes seemed really, really hard. Ideally one day we can unify the two branches!

impl<O: OffsetSizeTrait> MultiPointBuilder<O> {
/// Push a GEOS multi point
fn push_geos_multi_point(&mut self, value: Option<&GEOSMultiPoint>) -> Result<()> {
if let Some(multi_point) = value {
let num_points = multi_point.num_points();
for point_idx in 0..num_points {
let point = multi_point.0.get_geometry_n(point_idx).unwrap();
let x = point.get_x()?;
let y = point.get_y()?;
unsafe {
self.push_xy(x, y)?;
}
}
self.try_push_length(num_points)?;
} else {
self.push_null();
}
Ok(())
}
}

fn first_pass(geoms: &[Option<GEOSMultiPoint>], geoms_length: usize) -> (usize, usize) {
let mut coord_capacity = 0;
let geom_capacity = geoms_length;

for multi_point in geoms.iter().flatten() {
coord_capacity += multi_point.num_points();
}

(coord_capacity, geom_capacity)
}

fn second_pass<'a, O: OffsetSizeTrait>(
geoms: impl Iterator<Item = Option<GEOSMultiPoint<'a>>>,
coord_capacity: usize,
geom_capacity: usize,
) -> MultiPointBuilder<O> {
let capacity = MultiPointCapacity::new(coord_capacity, geom_capacity);
let mut array = MultiPointBuilder::with_capacity(capacity);

geoms
.into_iter()
.try_for_each(|maybe_multi_point| array.push_geos_multi_point(maybe_multi_point.as_ref()))
.unwrap();

array
}

impl<'a, O: OffsetSizeTrait> TryFrom<Vec<Option<geos::Geometry<'a>>>> for MultiPointBuilder<O> {
type Error = GeoArrowError;

fn try_from(value: Vec<Option<geos::Geometry<'a>>>) -> std::result::Result<Self, Self::Error> {
let length = value.len();
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiPoint>> = value
.into_iter()
.map(|geom| geom.map(GEOSMultiPoint::new_unchecked))
.collect();
let (coord_capacity, geom_capacity) = first_pass(&geos_objects, length);
Ok(second_pass(
geos_objects.into_iter(),
coord_capacity,
geom_capacity,
))
Ok(geos_objects.into())
}
}

Expand Down
195 changes: 2 additions & 193 deletions src/io/geos/array/multipolygon.rs
Original file line number Diff line number Diff line change
@@ -1,210 +1,19 @@
use arrow_array::OffsetSizeTrait;

use crate::array::multipolygon::MultiPolygonCapacity;
use crate::array::{MultiPolygonArray, MultiPolygonBuilder};
use crate::error::{GeoArrowError, Result};
use crate::io::geos::scalar::{GEOSConstPolygon, GEOSMultiPolygon, GEOSPolygon};
use geos::Geom;

// NOTE: this, `first_pass`, and `second_pass` are copied from their main implementations, because
// implementing geometry access traits on GEOS geometries that yield ConstGeometry objects with two
// lifetimes seemed really, really hard. Ideally one day we can unify the two branches!

impl<O: OffsetSizeTrait> MultiPolygonBuilder<O> {
/// Add a new GEOS Polygon to the end of this array.
///
/// # Errors
///
/// This function errors iff the new last item is larger than what O supports.
#[allow(dead_code)]
fn push_geos_polygon(&mut self, value: Option<&GEOSPolygon>) -> Result<()> {
if let Some(polygon) = value {
let exterior_ring = polygon.exterior();
if exterior_ring.is_none() {
self.push_empty();
return Ok(());
}

// Total number of polygons in this MultiPolygon
let num_polygons = 1;
self.geom_offsets.try_push_usize(num_polygons).unwrap();

let ext_ring = polygon.exterior().unwrap();
let coord_seq = ext_ring.0.get_coord_seq()?;
for coord_idx in 0..ext_ring.num_coords() {
self.coords
.push_xy(coord_seq.get_x(coord_idx)?, coord_seq.get_y(coord_idx)?);
}

// Total number of rings in this Multipolygon
self.polygon_offsets
.try_push_usize(polygon.num_interiors() + 1)
.unwrap();

// Number of coords for each ring
self.ring_offsets
.try_push_usize(ext_ring.num_coords())
.unwrap();

for int_ring_idx in 0..polygon.num_interiors() {
let int_ring = polygon.interior(int_ring_idx).unwrap();
self.ring_offsets
.try_push_usize(int_ring.num_coords())
.unwrap();
let coord_seq = int_ring.0.get_coord_seq()?;

for coord_idx in 0..int_ring.num_coords() {
self.coords
.push_xy(coord_seq.get_x(coord_idx)?, coord_seq.get_y(coord_idx)?);
}
}
} else {
self.push_null();
};
Ok(())
}

/// Add a new GEOS MultiPolygon to the end of this array.
///
/// # Errors
///
/// This function errors iff the new last item is larger than what O supports.
fn push_geos_multi_polygon(&mut self, value: Option<&GEOSMultiPolygon>) -> Result<()> {
if let Some(multi_polygon) = value {
// Total number of polygons in this MultiPolygon
let num_polygons = multi_polygon.num_polygons();
self.geom_offsets.try_push_usize(num_polygons).unwrap();

// Iterate over polygons
for polygon_idx in 0..num_polygons {
let polygon = multi_polygon.polygon(polygon_idx).unwrap();

// Here we unwrap the exterior ring because a polygon inside a multi polygon should
// never be empty.
let ext_ring = polygon.exterior().unwrap();
let coord_seq = ext_ring.0.get_coord_seq()?;
for coord_idx in 0..ext_ring.num_coords() {
self.coords
.push_xy(coord_seq.get_x(coord_idx)?, coord_seq.get_y(coord_idx)?);
}

// Total number of rings in this Multipolygon
self.polygon_offsets
.try_push_usize(polygon.num_interiors() + 1)
.unwrap();

// Number of coords for each ring
self.ring_offsets
.try_push_usize(ext_ring.num_coords())
.unwrap();

for int_ring_idx in 0..polygon.num_interiors() {
let int_ring = polygon.interior(int_ring_idx).unwrap();
self.ring_offsets
.try_push_usize(int_ring.num_coords())
.unwrap();
let coord_seq = int_ring.0.get_coord_seq()?;

for coord_idx in 0..int_ring.num_coords() {
self.coords
.push_xy(coord_seq.get_x(coord_idx)?, coord_seq.get_y(coord_idx)?);
}
}
}
} else {
self.push_null();
};
Ok(())
}
}

fn first_pass(
geoms: &[Option<GEOSMultiPolygon>],
geoms_length: usize,
) -> (usize, usize, usize, usize) {
let mut coord_capacity = 0;
let mut ring_capacity = 0;
let mut polygon_capacity = 0;
let geom_capacity = geoms_length;

for multi_polygon in geoms.iter().flatten() {
// Total number of polygons in this MultiPolygon
let num_polygons = multi_polygon.num_polygons();
polygon_capacity += num_polygons;

for polygon_idx in 0..num_polygons {
let polygon = GEOSConstPolygon::new_unchecked(
multi_polygon.0.get_geometry_n(polygon_idx).unwrap(),
);

// Total number of rings in this MultiPolygon
ring_capacity += polygon.num_interiors() + 1;

// Number of coords for each ring
if let Some(exterior) = polygon.exterior() {
coord_capacity += exterior.num_coords();
}

for int_ring_idx in 0..polygon.num_interiors() {
let int_ring = polygon.interior(int_ring_idx).unwrap();
coord_capacity += int_ring.num_coords();
}
}
}

(
coord_capacity,
ring_capacity,
polygon_capacity,
geom_capacity,
)
}

fn second_pass<'a, O: OffsetSizeTrait>(
geoms: impl Iterator<Item = Option<GEOSMultiPolygon<'a>>>,
coord_capacity: usize,
ring_capacity: usize,
polygon_capacity: usize,
geom_capacity: usize,
) -> MultiPolygonBuilder<O> {
let capacity = MultiPolygonCapacity::new(
coord_capacity,
ring_capacity,
polygon_capacity,
geom_capacity,
);
let mut array = MultiPolygonBuilder::with_capacity(capacity);

geoms
.into_iter()
.try_for_each(|maybe_multi_polygon| {
array.push_geos_multi_polygon(maybe_multi_polygon.as_ref())
})
.unwrap();

array
}
use crate::io::geos::scalar::GEOSMultiPolygon;

impl<O: OffsetSizeTrait> TryFrom<Vec<Option<geos::Geometry<'_>>>> for MultiPolygonBuilder<O> {
type Error = GeoArrowError;

fn try_from(value: Vec<Option<geos::Geometry<'_>>>) -> Result<Self> {
let length = value.len();
// TODO: don't use new_unchecked
let geos_objects: Vec<Option<GEOSMultiPolygon>> = value
.into_iter()
.map(|geom| geom.map(GEOSMultiPolygon::new_unchecked))
.collect();

let (coord_capacity, ring_capacity, polygon_capacity, geom_capacity) =
first_pass(&geos_objects, length);
Ok(second_pass(
geos_objects.into_iter(),
coord_capacity,
ring_capacity,
polygon_capacity,
geom_capacity,
))
Ok(geos_objects.into())
}
}

Expand Down
Loading