Skip to content

Commit

Permalink
Disallow GeoSeries from accepting a column in favor of _from_column (
Browse files Browse the repository at this point in the history
…#1434)

closes #1433

Mirroring the upstream cudf changes in rapidsai/cudf#16454, `cudf.Series` disallows accepting a `ColumnBase` in favor of the `cudf.Series._from_column` constructor. This PR does the same for `GeoSeries` as well as addresses the breakages due to the upstream cudf change.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Bradley Dice (https://github.com/bdice)

URL: #1434
  • Loading branch information
mroeschke committed Aug 9, 2024
1 parent 6865f7c commit 1b60fb7
Show file tree
Hide file tree
Showing 19 changed files with 159 additions and 108 deletions.
34 changes: 17 additions & 17 deletions python/cuspatial/cuspatial/core/_column/geocolumn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023 NVIDIA CORPORATION
# Copyright (c) 2021-2024, NVIDIA CORPORATION

from enum import Enum
from functools import cached_property
Expand Down Expand Up @@ -153,14 +153,14 @@ def _from_points_xy(cls, points_xy: ColumnBase):
coord_dtype = points_xy.dtype
return cls(
(
cudf.Series(point_col),
cudf.Series(
cudf.Series._from_column(point_col),
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype)
),
cudf.Series(
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.POLYGON, coord_dtype)
),
),
Expand Down Expand Up @@ -205,14 +205,14 @@ def _from_multipoints_xy(

return cls(
(
cudf.Series(
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(multipoint_col),
cudf.Series(
cudf.Series._from_column(multipoint_col),
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype)
),
cudf.Series(
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.POLYGON, coord_dtype)
),
),
Expand Down Expand Up @@ -265,14 +265,14 @@ def _from_linestrings_xy(

return cls(
(
cudf.Series(
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(linestrings_col),
cudf.Series(
cudf.Series._from_column(linestrings_col),
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.POLYGON, coord_dtype)
),
),
Expand Down Expand Up @@ -331,16 +331,16 @@ def _from_polygons_xy(

return cls(
(
cudf.Series(
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.POINT, coord_dtype)
),
cudf.Series(
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.LINESTRING, coord_dtype)
),
cudf.Series(polygons_col),
cudf.Series._from_column(polygons_col),
),
meta,
)
Expand Down
36 changes: 29 additions & 7 deletions python/cuspatial/cuspatial/core/_column/geometa.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# Copyright (c) 2021-2022 NVIDIA CORPORATION
# Copyright (c) 2021-2024, NVIDIA CORPORATION

# This allows GeoMeta as its own init type
from __future__ import annotations

from enum import Enum
from typing import Union
from typing import Literal, Union

import cudf
import cudf.core.column


# This causes arrow to encode NONE as =255, which I'll accept now
Expand All @@ -26,12 +27,33 @@ class GeoMeta:
GeoSeries if necessary.
"""

def __init__(self, meta: Union[GeoMeta, dict]):
def __init__(
self,
meta: Union[
GeoMeta,
dict[
Literal["input_types", "union_offsets"],
cudf.core.column.ColumnBase,
],
],
):
if isinstance(meta, dict):
self.input_types = cudf.Series(meta["input_types"], dtype="int8")
self.union_offsets = cudf.Series(
meta["union_offsets"], dtype="int32"
)
meta_it = meta["input_types"]
if isinstance(meta_it, cudf.core.column.ColumnBase):
self.input_types = cudf.Series._from_column(meta_it).astype(
"int8"
)
else:
# Could be Series from GeoSeries.__getitem__
self.input_types = cudf.Series(meta_it, dtype="int8")
meta_uo = meta["union_offsets"]
if isinstance(meta_uo, cudf.core.column.ColumnBase):
self.union_offsets = cudf.Series._from_column(meta_uo).astype(
"int32"
)
else:
# Could be Series from GeoSeries.__getitem__
self.union_offsets = cudf.Series(meta_uo, dtype="int32")
else:
self.input_types = cudf.Series(meta.input_types, dtype="int8")
self.union_offsets = cudf.Series(meta.union_offsets, dtype="int32")
Expand Down
4 changes: 3 additions & 1 deletion python/cuspatial/cuspatial/core/binops/distance_dispatch.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Copyright (c) 2024, NVIDIA CORPORATION

import cudf
from cudf.core.column import as_column

Expand Down Expand Up @@ -200,4 +202,4 @@ def __call__(self):
# If `align==False`, geopandas preserves lhs index.
index = None if self._align else self._res_index

return cudf.Series(result, index=index, nan_as_null=False)
return cudf.Series._from_column(result, index=index)
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/core/binops/equals_count.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

import cudf

Expand Down Expand Up @@ -76,4 +76,4 @@ def pairwise_multipoint_equals_count(lhs, rhs):
rhs_column = rhs._column.mpoints._column
result = c_pairwise_multipoint_equals_count(lhs_column, rhs_column)

return cudf.Series(result)
return cudf.Series._from_column(result)
12 changes: 6 additions & 6 deletions python/cuspatial/cuspatial/core/binops/intersection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

from typing import TYPE_CHECKING

Expand Down Expand Up @@ -109,15 +109,15 @@ def pairwise_linestring_intersection(
)
from cuspatial.core.geoseries import GeoSeries

geometries = GeoSeries(
geometries = GeoSeries._from_column(
GeoColumn(
(
cudf.Series(points),
cudf.Series(
cudf.Series._from_column(points),
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.MULTIPOINT, coord_dtype)
),
cudf.Series(linestring_column),
cudf.Series(
cudf.Series._from_column(linestring_column),
cudf.Series._from_column(
empty_geometry_column(Feature_Enum.POLYGON, coord_dtype)
),
),
Expand Down
2 changes: 1 addition & 1 deletion python/cuspatial/cuspatial/core/binpreds/contains.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def _pairwise_contains_properly(points, polygons):
# point) pair where the point is contained properly by the polygon. We can
# use this to create a dataframe with only (polygon, point) pairs that
# satisfy the relationship.
pip_result = cudf.Series(result_column, dtype="bool")
pip_result = cudf.Series._from_column(result_column).astype("bool")
trues = pip_result[pip_result].index
true_pairs = cudf.DataFrame(
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def _preprocess_multipoint_rhs(self, lhs, rhs):
point_indices = geom.point_indices()
from cuspatial.core.geoseries import GeoSeries

final_rhs = GeoSeries(GeoColumn._from_points_xy(xy_points._column))
final_rhs = GeoSeries._from_column(
GeoColumn._from_points_xy(xy_points._column)
)
preprocess_result = PreprocessorResult(
lhs, rhs, final_rhs, point_indices
)
Expand Down
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/core/binpreds/feature_contains.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

from typing import TypeVar

Expand Down Expand Up @@ -71,7 +71,7 @@ def _intersection_results_for_contains_polygon(self, lhs, rhs):
if len(pli_features) == 0:
return _zero_series(len(lhs))

pli_offsets = cudf.Series(pli[0])
pli_offsets = cudf.Series._from_column(pli[0])

# Convert the pli to multipoints for equality checking
multipoints = _points_and_lines_to_multipoints(
Expand Down
6 changes: 3 additions & 3 deletions python/cuspatial/cuspatial/core/binpreds/feature_equals.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -236,7 +236,7 @@ def _preprocess(self, lhs: "GeoSeries", rhs: "GeoSeries"):
lhs, rhs, PreprocessorResult(None, rhs.point_indices)
)

def _vertices_equals(self, lhs: Series, rhs: Series):
def _vertices_equals(self, lhs: Series, rhs: Series) -> Series:
"""Compute the equals relationship between interleaved xy
coordinate buffers."""
if not isinstance(lhs, Series):
Expand All @@ -246,7 +246,7 @@ def _vertices_equals(self, lhs: Series, rhs: Series):
length = min(len(lhs), len(rhs))
a = lhs[:length:2]._column == rhs[:length:2]._column
b = rhs[1:length:2]._column == lhs[1:length:2]._column
return a & b
return Series._from_column(a & b)

def _compute_predicate(self, lhs, rhs, preprocessor_result):
"""Perform the binary predicate operation on the input GeoSeries.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.


import cupy as cp
Expand Down Expand Up @@ -70,7 +70,7 @@ def _get_intersecting_geometry_indices(self, lhs, op_result):
a set of lengths from the returned offsets buffer, then
returns an integer index for all of the offset sizes that
are larger than 0."""
is_offsets = cudf.Series(op_result.result[0])
is_offsets = cudf.Series._from_column(op_result.result[0])
is_sizes = is_offsets[1:].reset_index(drop=True) - is_offsets[
:-1
].reset_index(drop=True)
Expand Down
4 changes: 2 additions & 2 deletions python/cuspatial/cuspatial/core/binpreds/feature_touches.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

import cupy as cp

Expand Down Expand Up @@ -66,7 +66,7 @@ def _preprocess(self, lhs, rhs):
# First compute pli which will contain points for line crossings and
# linestrings for overlapping segments.
pli = _basic_intersects_pli(lhs, rhs)
offsets = cudf.Series(pli[0])
offsets = cudf.Series._from_column(pli[0])
pli_geometry_count = offsets[1:].reset_index(drop=True) - offsets[
:-1
].reset_index(drop=True)
Expand Down
9 changes: 6 additions & 3 deletions python/cuspatial/cuspatial/core/geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,10 @@ def _apply_boolean_mask(self, mask: BooleanMask, keep_index=True) -> T:
data = data_columns._apply_boolean_mask(mask, keep_index)

geo = GeoDataFrame(
{name: geo_columns[name][mask.column] for name in geo_columns}
{
name: geo_columns[name][cudf.Index._from_column(mask.column)]
for name in geo_columns
}
)

res = self._from_data(self._recombine_columns(geo, data))
Expand Down Expand Up @@ -319,9 +322,9 @@ class _GeoSeriesUtility:
def _from_data(cls, new_data, name=None, index=None):
new_column = new_data.columns[0]
if is_geometry_type(new_column):
return GeoSeries(new_column, name=name, index=index)
return GeoSeries._from_column(new_column, name=name, index=index)
else:
return cudf.Series(new_column, name=name, index=index)
return cudf.Series._from_column(new_column, name=name, index=index)


def is_geometry_type(obj):
Expand Down
Loading

0 comments on commit 1b60fb7

Please sign in to comment.