Skip to content

Commit

Permalink
Add conversion between cf and shapely for lines (#460)
Browse files Browse the repository at this point in the history
* Add conversion between cf and shapely for lines

* Make polygons raise

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Switch to using `linestrings` and `multilinestrings`

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add shapely to CF for lines

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Use `from_ragged_array`

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix mypy

* Full test coverage for patch

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
3 people authored Dec 11, 2023
1 parent f573ed7 commit 02b1d74
Show file tree
Hide file tree
Showing 3 changed files with 298 additions and 13 deletions.
155 changes: 149 additions & 6 deletions cf_xarray/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None
}
if types.issubset({"Point", "MultiPoint"}):
ds = points_to_cf(geometries)
elif types.issubset({"Polygon", "MultiPolygon"}) or types.issubset(
{"LineString", "MultiLineString"}
):
raise NotImplementedError("Only point geometries conversion is implemented.")
elif types.issubset({"LineString", "MultiLineString"}):
ds = lines_to_cf(geometries)
elif types.issubset({"Polygon", "MultiPolygon"}):
raise NotImplementedError("Polygon geometry conversion is not implemented.")
else:
raise ValueError(
f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}"
Expand Down Expand Up @@ -165,8 +165,10 @@ def cf_to_shapely(ds: xr.Dataset):
geom_type = ds.geometry_container.attrs["geometry_type"]
if geom_type == "point":
geometries = cf_to_points(ds)
elif geom_type in ["line", "polygon"]:
raise NotImplementedError("Only point geometries conversion is implemented.")
elif geom_type == "line":
geometries = cf_to_lines(ds)
elif geom_type == "polygon":
raise NotImplementedError("Polygon geometry conversion is not implemented.")
else:
raise ValueError(
f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}"
Expand Down Expand Up @@ -295,3 +297,144 @@ def cf_to_points(ds: xr.Dataset):
j += n

return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords)


def lines_to_cf(lines: xr.DataArray | Sequence):
"""Convert an iterable of lines (shapely.geometry.[Multi]Line) into a CF-compliant geometry dataset.
Parameters
----------
lines : sequence of shapely.geometry.Line or MultiLine
The sequence of [multi]lines to translate to a CF dataset.
Returns
-------
xr.Dataset
A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'
and optionally 'part_node_count'.
"""
from shapely import to_ragged_array

if isinstance(lines, xr.DataArray):
dim = lines.dims[0]
coord = lines[dim] if dim in lines.coords else None
lines_ = lines.values
else:
dim = "index"
coord = None
lines_ = np.array(lines)

_, arr, offsets = to_ragged_array(lines_)
x = arr[:, 0]
y = arr[:, 1]

node_count = np.diff(offsets[0])
if len(offsets) == 1:
indices = offsets[0]
part_node_count = node_count
else:
indices = np.take(offsets[0], offsets[1])
part_node_count = np.diff(indices)

geom_coords = arr.take(indices[:-1], 0)
crdX = geom_coords[:, 0]
crdY = geom_coords[:, 1]

ds = xr.Dataset(
data_vars={
"node_count": xr.DataArray(node_count, dims=("segment",)),
"part_node_count": xr.DataArray(part_node_count, dims=(dim,)),
"geometry_container": xr.DataArray(
attrs={
"geometry_type": "line",
"node_count": "node_count",
"part_node_count": "part_node_count",
"node_coordinates": "x y",
"coordinates": "crd_x crd_y",
}
),
},
coords={
"x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}),
"y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}),
"crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}),
"crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}),
},
)

if coord is not None:
ds = ds.assign_coords({dim: coord})

# Special case when we have no MultiLines
if len(ds.part_node_count) == len(ds.node_count):
ds = ds.drop_vars("part_node_count")
del ds.geometry_container.attrs["part_node_count"]
return ds


def cf_to_lines(ds: xr.Dataset):
"""Convert line geometries stored in a CF-compliant way to shapely lines stored in a single variable.
Parameters
----------
ds : xr.Dataset
A dataset with CF-compliant line geometries.
Must have a "geometry_container" variable with at least a 'node_coordinates' attribute.
Must also have the two 1D variables listed by this attribute.
Returns
-------
geometry : xr.DataArray
A 1D array of shapely.geometry.[Multi]Line objects.
It has the same dimension as the ``part_node_count`` or the coordinates variables, or
``'features'`` if those were not present in ``ds``.
"""
from shapely import GeometryType, from_ragged_array

# Shorthand for convenience
geo = ds.geometry_container.attrs

# The features dimension name, defaults to the one of 'part_node_count'
# or the dimension of the coordinates, if present.
feat_dim = None
if "coordinates" in geo:
xcoord_name, _ = geo["coordinates"].split(" ")
(feat_dim,) = ds[xcoord_name].dims

x_name, y_name = geo["node_coordinates"].split(" ")
xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1)

node_count_name = geo.get("node_count")
part_node_count_name = geo.get("part_node_count")
if node_count_name is None:
raise ValueError("'node_count' must be provided for line geometries")
else:
node_count = ds[node_count_name]

offset1 = np.insert(np.cumsum(node_count.values), 0, 0)
lines = from_ragged_array(GeometryType.LINESTRING, xy, offsets=(offset1,))

if part_node_count_name is None:
# No part_node_count means there are no multilines
# And if we had no coordinates, then the dimension defaults to "index"
feat_dim = feat_dim or "index"
part_node_count = xr.DataArray(node_count.values, dims=(feat_dim,))
if feat_dim in ds.coords:
part_node_count = part_node_count.assign_coords({feat_dim: ds[feat_dim]})

geoms = lines
else:
part_node_count = ds[part_node_count_name]

# get index of offset1 values that are edges for part_node_count
offset2 = np.nonzero(
np.isin(offset1, np.insert(np.cumsum(part_node_count), 0, 0))
)[0]
multilines = from_ragged_array(
GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2)
)

# get items from lines or multilines depending on number of segments
geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines)

return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords)
155 changes: 148 additions & 7 deletions cf_xarray/tests/test_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,83 @@ def geometry_ds():
return cf_ds, shp_ds


@pytest.fixture
def geometry_line_ds():
from shapely.geometry import LineString, MultiLineString

# empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries.
geoms = np.empty(3, dtype=object)
geoms[:] = [
MultiLineString([[[0, 0], [1, 2]], [[4, 4], [5, 6]]]),
LineString([[0, 0], [1, 0], [1, 1]]),
LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]),
]

ds = xr.Dataset()
shp_da = xr.DataArray(geoms, dims=("index",), name="geometry")

cf_ds = ds.assign(
x=xr.DataArray(
[0, 1, 4, 5, 0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}
),
y=xr.DataArray(
[0, 2, 4, 6, 0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}
),
part_node_count=xr.DataArray([4, 3, 3], dims=("index",)),
node_count=xr.DataArray([2, 2, 3, 3], dims=("segment",)),
crd_x=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "x"}),
crd_y=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "y"}),
geometry_container=xr.DataArray(
attrs={
"geometry_type": "line",
"node_count": "node_count",
"part_node_count": "part_node_count",
"node_coordinates": "x y",
"coordinates": "crd_x crd_y",
}
),
)

cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"])

return cf_ds, shp_da


@pytest.fixture
def geometry_line_without_multilines_ds():
from shapely.geometry import LineString

# empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries.
geoms = np.empty(2, dtype=object)
geoms[:] = [
LineString([[0, 0], [1, 0], [1, 1]]),
LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]),
]

ds = xr.Dataset()
shp_da = xr.DataArray(geoms, dims=("index",), name="geometry")

cf_ds = ds.assign(
x=xr.DataArray([0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}),
y=xr.DataArray([0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}),
node_count=xr.DataArray([3, 3], dims=("segment",)),
crd_x=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "x"}),
crd_y=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "y"}),
geometry_container=xr.DataArray(
attrs={
"geometry_type": "line",
"node_count": "node_count",
"node_coordinates": "x y",
"coordinates": "crd_x crd_y",
}
),
)

cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"])

return cf_ds, shp_da


@requires_shapely
def test_shapely_to_cf(geometry_ds):
from shapely.geometry import Point
Expand Down Expand Up @@ -87,12 +164,45 @@ def test_shapely_to_cf(geometry_ds):
assert set(out.dims) == {"features", "node"}


@requires_shapely
def test_shapely_to_cf_for_lines_as_da(geometry_line_ds):
expected, in_da = geometry_line_ds

actual = cfxr.shapely_to_cf(in_da)
xr.testing.assert_identical(actual, expected)

in_da = in_da.assign_coords(index=["a", "b", "c"])
actual = cfxr.shapely_to_cf(in_da)
xr.testing.assert_identical(actual, expected.assign_coords(index=["a", "b", "c"]))


@requires_shapely
def test_shapely_to_cf_for_lines_as_sequence(geometry_line_ds):
expected, in_da = geometry_line_ds
actual = cfxr.shapely_to_cf(in_da.values)
xr.testing.assert_identical(actual, expected)


@requires_shapely
def test_shapely_to_cf_for_lines_without_multilines(
geometry_line_without_multilines_ds,
):
expected, in_da = geometry_line_without_multilines_ds
actual = cfxr.shapely_to_cf(in_da)
xr.testing.assert_identical(actual, expected)


@requires_shapely
def test_shapely_to_cf_errors():
from shapely.geometry import LineString, Point
from shapely.geometry import Point, Polygon

geoms = [LineString([[1, 2], [2, 3]]), LineString([[2, 3, 4], [4, 3, 2]])]
with pytest.raises(NotImplementedError, match="Only point geometries conversion"):
geoms = [
Polygon([[1, 1], [1, 3], [3, 3], [1, 1]]),
Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]]),
]
with pytest.raises(
NotImplementedError, match="Polygon geometry conversion is not implemented"
):
cfxr.shapely_to_cf(geoms)

geoms.append(Point(1, 2))
Expand Down Expand Up @@ -122,16 +232,47 @@ def test_cf_to_shapely(geometry_ds):


@requires_shapely
def test_cf_to_shapely_errors(geometry_ds):
in_ds, expected = geometry_ds
in_ds.geometry_container.attrs["geometry_type"] = "line"
with pytest.raises(NotImplementedError, match="Only point geometries conversion"):
def test_cf_to_shapely_for_lines(geometry_line_ds):
in_ds, expected = geometry_line_ds

actual = cfxr.cf_to_shapely(in_ds)
assert actual.dims == ("index",)
xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected)


@requires_shapely
def test_cf_to_shapely_for_lines_without_multilines(
geometry_line_without_multilines_ds,
):
in_ds, expected = geometry_line_without_multilines_ds
actual = cfxr.cf_to_shapely(in_ds)
assert actual.dims == ("index",)
xr.testing.assert_identical(actual, expected)

in_ds = in_ds.assign_coords(index=["b", "c"])
actual = cfxr.cf_to_shapely(in_ds)
assert actual.dims == ("index",)
xr.testing.assert_identical(
actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"])
)


@requires_shapely
def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds):
in_ds, _ = geometry_ds
in_ds.geometry_container.attrs["geometry_type"] = "polygon"
with pytest.raises(NotImplementedError, match="Polygon geometry"):
cfxr.cf_to_shapely(in_ds)

in_ds.geometry_container.attrs["geometry_type"] = "punkt"
with pytest.raises(ValueError, match="Valid CF geometry types are "):
cfxr.cf_to_shapely(in_ds)

in_ds, _ = geometry_line_ds
del in_ds.geometry_container.attrs["node_count"]
with pytest.raises(ValueError, match="'node_count' must be provided"):
cfxr.cf_to_shapely(in_ds)


@requires_shapely
def test_reshape_unique_geometries(geometry_ds):
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ module=[
"pint",
"matplotlib",
"pytest",
"shapely",
"shapely.geometry",
"xarray.core.pycompat",
]
Expand Down

0 comments on commit 02b1d74

Please sign in to comment.