Skip to content

Commit fd996b9

Browse files
committed
Update to smart_repair with some minor algorithmic changes to improve robustness, and updated Pandas syntax to remove FutureWarnings
Update to smart_repair with some minor algorithmic changes to improve robustness, and updated Pandas syntax to remove FutureWarnings
1 parent eb5c2a5 commit fd996b9

File tree

3 files changed

+716
-1477
lines changed

3 files changed

+716
-1477
lines changed

maup/intersections.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
@require_same_crs
1010
def intersections(sources, targets, output_type="geoseries", area_cutoff=None):
1111
"""Computes all of the nonempty intersections between two sets of geometries.
12-
By default, the returned :meth:`~geopandas.GeoSeries` will have a MultiIndex, where the
12+
By default, the returned `~geopandas.GeoSeries` will have a MultiIndex, where the
1313
geometry at index *(i, j)* is the intersection of ``sources[i]`` and ``targets[j]``
1414
(if it is not empty).
1515
If output_type == "geodataframe", the return type is a range-indexed GeoDataFrame
@@ -20,8 +20,8 @@ def intersections(sources, targets, output_type="geoseries", area_cutoff=None):
2020
:param targets: geometries
2121
:type targets: :class:`~geopandas.GeoSeries` or :class:`~geopandas.GeoDataFrame`
2222
:rtype: :class:`~geopandas.GeoSeries`
23-
:param area_cutoff: (optional) if provided, only return intersections with area
24-
greater than ``area_cutoff``
23+
:param area_cutoff: (optional) if provided, only return intersections with
24+
area greater than ``area_cutoff``
2525
:type area_cutoff: Number or None
2626
"""
2727

maup/repair.py

Lines changed: 41 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,8 @@
44
import pandas
55

66
from geopandas import GeoSeries
7-
from shapely.geometry import (
8-
Polygon,
9-
MultiPolygon,
10-
LineString,
11-
MultiLineString,
12-
GeometryCollection,
13-
)
14-
from shapely import union_all
7+
from shapely.geometry import Polygon, MultiPolygon, LineString, MultiLineString, GeometryCollection
8+
from shapely.ops import unary_union
159

1610
from .adjacencies import adjacencies
1711
from .assign import assign_to_max
@@ -63,9 +57,8 @@ def trim_valid(value):
6357
"""
6458
if isinstance(value, GeometryCollection):
6559
# List comprehension excluding non-Polygons
66-
value = [
67-
item for item in value.geoms if isinstance(item, (Polygon, MultiPolygon))
68-
]
60+
value = [item for item in value.geoms
61+
if isinstance(item, (Polygon, MultiPolygon))]
6962
# Re-aggregegating multiple Polygons into single MultiPolygon object.
7063
value = value[0] if len(value) == 1 else MultiPolygon(value)
7164
return value
@@ -77,11 +70,9 @@ def holes_of_union(geometries):
7770
if not all(
7871
isinstance(geometry, (Polygon, MultiPolygon)) for geometry in geometries
7972
):
80-
raise TypeError(
81-
f"Must be a Polygon or MultiPolygon (got types {set([x.geom_type for x in geometries])})!"
82-
)
73+
raise TypeError(f"Must be a Polygon or MultiPolygon (got types {set([x.geom_type for x in geometries])})!")
8374

84-
union = union_all(geometries)
75+
union = unary_union(geometries)
8576
series = holes(union)
8677
series.crs = geometries.crs
8778
return series
@@ -120,10 +111,7 @@ def close_gaps(geometries, relative_threshold=0.1, force_polygons=False):
120111
geometries = get_geometries(geometries)
121112
gaps = holes_of_union(geometries)
122113
return absorb_by_shared_perimeter(
123-
gaps,
124-
geometries,
125-
relative_threshold=relative_threshold,
126-
force_polygons=force_polygons,
114+
gaps, geometries, relative_threshold=relative_threshold, force_polygons=force_polygons
127115
)
128116

129117

@@ -163,15 +151,10 @@ def resolve_overlaps(geometries, relative_threshold=0.1, force_polygons=False):
163151
to_remove = GeoSeries(
164152
pandas.concat([overlaps.droplevel(1), overlaps.droplevel(0)]), crs=overlaps.crs
165153
)
166-
with_overlaps_removed = geometries.apply(
167-
lambda x: x.difference(union_all(to_remove))
168-
)
154+
with_overlaps_removed = geometries.apply(lambda x: x.difference(unary_union(to_remove)))
169155

170156
return absorb_by_shared_perimeter(
171-
overlaps,
172-
with_overlaps_removed,
173-
relative_threshold=None,
174-
force_polygons=force_polygons,
157+
overlaps, with_overlaps_removed, relative_threshold=None, force_polygons=force_polygons
175158
)
176159

177160

@@ -189,9 +172,7 @@ def quick_repair(geometries, relative_threshold=0.1, force_polygons=False):
189172
For a more careful repair that takes adjacencies and higher-order overlaps
190173
between geometries into account, consider using smart_repair instead.
191174
"""
192-
return autorepair(
193-
geometries, relative_threshold=relative_threshold, force_polygons=force_polygons
194-
)
175+
return autorepair(geometries, relative_threshold=relative_threshold, force_polygons=force_polygons)
195176

196177

197178
def autorepair(geometries, relative_threshold=0.1, force_polygons=False):
@@ -213,28 +194,16 @@ def autorepair(geometries, relative_threshold=0.1, force_polygons=False):
213194

214195
if force_polygons:
215196
geometries = make_valid_polygons(remove_repeated_vertices(geometries))
216-
geometries = make_valid_polygons(
217-
resolve_overlaps(
218-
geometries,
219-
relative_threshold=relative_threshold,
220-
force_polygons=force_polygons,
221-
)
222-
)
223-
geometries = make_valid_polygons(
224-
close_gaps(
225-
geometries,
226-
relative_threshold=relative_threshold,
227-
force_polygons=force_polygons,
228-
)
229-
)
197+
geometries = make_valid_polygons(resolve_overlaps(geometries,
198+
relative_threshold=relative_threshold,
199+
force_polygons=force_polygons))
200+
geometries = make_valid_polygons(close_gaps(geometries,
201+
relative_threshold=relative_threshold,
202+
force_polygons=force_polygons))
230203
else:
231204
geometries = remove_repeated_vertices(geometries).make_valid()
232-
geometries = resolve_overlaps(
233-
geometries, relative_threshold=relative_threshold
234-
).make_valid()
235-
geometries = close_gaps(
236-
geometries, relative_threshold=relative_threshold
237-
).make_valid()
205+
geometries = resolve_overlaps(geometries, relative_threshold=relative_threshold).make_valid()
206+
geometries = close_gaps(geometries, relative_threshold=relative_threshold).make_valid()
238207

239208
return geometries
240209

@@ -244,9 +213,7 @@ def remove_repeated_vertices(geometries):
244213
Removes repeated vertices. Vertices are considered to be repeated if they
245214
appear consecutively, excluding the start and end points.
246215
"""
247-
return geometries.geometry.apply(
248-
lambda x: apply_func_to_polygon_parts(x, dedup_vertices)
249-
)
216+
return geometries.geometry.apply(lambda x: apply_func_to_polygon_parts(x, dedup_vertices))
250217

251218

252219
def snap_to_grid(geometries, n=-7):
@@ -263,19 +230,16 @@ def crop_to(source, target):
263230
"""
264231
Crops the source geometries to the target geometries.
265232
"""
266-
target_union = union_all(get_geometries(target))
267-
cropped_geometries = get_geometries(source).apply(
268-
lambda x: x.intersection(target_union)
269-
)
233+
target_union = unary_union(get_geometries(target))
234+
cropped_geometries = get_geometries(source).apply(lambda x: x.intersection(target_union))
270235

271236
if (cropped_geometries.area == 0).any():
272-
warnings.warn(
273-
"Some cropped geometries have zero area, likely due to\n"
274-
+ "large differences in the union of the geometries in your\n"
275-
+ "source and target shapefiles. This may become an issue\n"
276-
+ "when maupping.\n",
277-
AreaCroppingWarning,
278-
)
237+
warnings.warn("Some cropped geometries have zero area, likely due to\n" +
238+
"large differences in the union of the geometries in your\n" +
239+
"source and target shapefiles. This may become an issue\n" +
240+
"when maupping.\n",
241+
AreaCroppingWarning
242+
)
279243

280244
return cropped_geometries
281245

@@ -291,18 +255,13 @@ def expand_to(source, target, force_polygons=False):
291255
else:
292256
geometries = get_geometries(source).make_valid()
293257

294-
source_union = union_all(geometries)
258+
source_union = unary_union(geometries)
295259

296260
leftover_geometries = get_geometries(target).apply(lambda x: x - source_union)
297-
leftover_geometries = leftover_geometries[~leftover_geometries.is_empty].explode(
298-
index_parts=False
299-
)
261+
leftover_geometries = leftover_geometries[~leftover_geometries.is_empty].explode(index_parts=False)
300262

301263
geometries = absorb_by_shared_perimeter(
302-
leftover_geometries,
303-
get_geometries(source),
304-
relative_threshold=None,
305-
force_polygons=force_polygons,
264+
leftover_geometries, get_geometries(source), relative_threshold=None, force_polygons=force_polygons
306265
)
307266

308267
return geometries
@@ -322,14 +281,14 @@ def doctor(source, target=None, silent=False, accept_holes=False):
322281
False. (Default is accept_holes = False.)
323282
"""
324283
shapefiles = [source]
325-
source_union = union_all(get_geometries(source))
284+
source_union = unary_union(get_geometries(source))
326285

327286
health_check = True
328287

329288
if target is not None:
330289
shapefiles.append(target)
331290

332-
target_union = union_all(get_geometries(target))
291+
target_union = unary_union(get_geometries(target))
333292
sym_area = target_union.symmetric_difference(source_union).area
334293

335294
if sym_area != 0:
@@ -338,9 +297,7 @@ def doctor(source, target=None, silent=False, accept_holes=False):
338297
health_check = False
339298

340299
for shp in shapefiles:
341-
if not shp.geometry.apply(
342-
lambda x: isinstance(x, (Polygon, MultiPolygon))
343-
).all():
300+
if not shp.geometry.apply(lambda x: isinstance(x, (Polygon, MultiPolygon))).all():
344301
if silent is False:
345302
print("Some rows do not have geometries.")
346303
health_check = False
@@ -389,9 +346,7 @@ def apply_func_to_polygon_parts(shape, func):
389346
elif isinstance(shape, MultiPolygon):
390347
return MultiPolygon([func(poly) for poly in shape.geoms])
391348
else:
392-
raise TypeError(
393-
f"Can only apply {func} to a Polygon or MultiPolygon (got {shape} with type {type(shape)})!"
394-
)
349+
raise TypeError(f"Can only apply {func} to a Polygon or MultiPolygon (got {shape} with type {type(shape)})!")
395350

396351

397352
def dedup_vertices(polygon):
@@ -426,31 +381,16 @@ def dedup_vertices(polygon):
426381

427382
def snap_polygon_to_grid(polygon, n=-7):
428383
if len(polygon.interiors) == 0:
429-
return Polygon(
430-
[(round(x, -n), round(y, -n)) for x, y in polygon.exterior.coords]
431-
)
384+
return Polygon([(round(x, -n), round(y, -n)) for x, y in polygon.exterior.coords])
432385
else:
433-
return Polygon(
434-
[(round(x, -n), round(y, -n)) for x, y in polygon.exterior.coords],
435-
holes=[
436-
[(round(x, -n), round(y, -n)) for x, y in interior_ring.coords]
437-
for interior_ring in polygon.interiors
438-
],
439-
)
386+
return Polygon([(round(x, -n), round(y, -n)) for x, y in polygon.exterior.coords], holes=[[(round(x, -n), round(y, -n)) for x, y in interior_ring.coords] for interior_ring in polygon.interiors])
440387

441388

442389
def snap_multilinestring_to_grid(multilinestring, n=-7):
443390
if multilinestring.geom_type == "LineString":
444-
return LineString(
445-
[(round(x, -n), round(y, -n)) for x, y in multilinestring.coords]
446-
)
391+
return LineString([(round(x, -n), round(y, -n)) for x, y in multilinestring.coords])
447392
elif multilinestring.geom_type == "MultiLineString":
448-
return MultiLineString(
449-
[
450-
LineString([(round(x, -n), round(y, -n)) for x, y in linestring.coords])
451-
for linestring in multilinestring.geoms
452-
]
453-
)
393+
return MultiLineString([LineString([(round(x, -n), round(y, -n)) for x, y in linestring.coords]) for linestring in multilinestring.geoms])
454394

455395

456396
def split_by_level(series, multiindex):
@@ -461,9 +401,7 @@ def split_by_level(series, multiindex):
461401

462402

463403
@require_same_crs
464-
def absorb_by_shared_perimeter(
465-
sources, targets, relative_threshold=None, force_polygons=False
466-
):
404+
def absorb_by_shared_perimeter(sources, targets, relative_threshold=None, force_polygons=False):
467405
if len(sources) == 0:
468406
return targets
469407

@@ -484,8 +422,7 @@ def absorb_by_shared_perimeter(
484422
assignment = assignment[under_threshold]
485423

486424
sources_to_absorb = GeoSeries(
487-
sources.groupby(assignment).apply(union_all),
488-
crs=sources.crs,
425+
sources.groupby(assignment).apply(unary_union), crs=sources.crs,
489426
)
490427

491428
# Note that the following line produces a warning message when sources_to_absorb
@@ -497,7 +434,7 @@ def absorb_by_shared_perimeter(
497434

498435
# This difference in indices is expected since not all target geometries may have sources
499436
# to absorb, so it would be nice to remove this warning.
500-
result = targets.union(sources_to_absorb, align=True)
437+
result = targets.union(sources_to_absorb)
501438

502439
# The .union call only returns the targets who had a corresponding
503440
# source to absorb. Now we fill in all of the unchanged targets.

0 commit comments

Comments
 (0)