Skip to content

Commit d017342

Browse files
author
Fred Thomas
committed
fix indexing error in drop_duplicate_geometries
1 parent f67043f commit d017342

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

src/snkit/network.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -484,11 +484,13 @@ def node_connectivity_degree(node, network):
484484

485485
def drop_duplicate_geometries(gdf, keep="first"):
486486
"""Drop duplicate geometries from a dataframe"""
487+
487488
# convert to wkb so drop_duplicates will work
488489
# discussed in https://github.com/geopandas/geopandas/issues/521
489-
mask = gdf.geometry.apply(lambda geom: geom.wkb)
490-
# use dropped duplicates index to drop from actual dataframe
491-
return gdf.iloc[mask.drop_duplicates(keep=keep).index]
490+
mask = gdf.geometry.apply(lambda geom: geom.wkb).drop_duplicates(keep=keep).index
491+
492+
# use mask to drop from actual dataframe
493+
return gdf.loc[mask]
492494

493495

494496
def nearest_point_on_edges(point, edges):

tests/test_init.py

+19
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
warnings.filterwarnings("ignore", category=DeprecationWarning)
88
from geopandas import GeoDataFrame
99

10+
import pandas as pd
1011
from pandas.testing import assert_frame_equal
1112
from pytest import fixture, mark
1213
from shapely.geometry import Point, LineString, MultiPoint, MultiLineString
@@ -406,6 +407,24 @@ def test_passing_slice():
406407
assert_frame_equal(actual, expected)
407408

408409

410+
def test_drop_duplicate_geometries():
411+
a = Point((0, 0))
412+
b = Point((0, 2))
413+
c = Point((0, 1))
414+
ac = LineString([a, c])
415+
cb = LineString([c, b])
416+
# use an index that doesn't start from 0 to check our indexing hygiene
417+
index = pd.Index([2, 3, 5, 7, 11, 13])
418+
gdf_with_dupes = GeoDataFrame(
419+
index=index,
420+
data=[a, a, b, ac, ac, cb],
421+
columns=["geometry"]
422+
)
423+
deduped = snkit.network.drop_duplicate_geometries(gdf_with_dupes)
424+
# we should have just the first of each duplicate item
425+
assert (deduped.index == pd.Index([2, 5, 7, 13])).all()
426+
427+
409428
@mark.skipif(not USE_NX, reason="networkx not available")
410429
def test_to_networkx(connected):
411430
"""Test conversion to networkx"""

0 commit comments

Comments
 (0)