Skip to content

Commit

Permalink
Merge pull request #55 from tomalrussell/fix/drop_duplicate_geometries
Browse files Browse the repository at this point in the history
fix indexing error in drop_duplicate_geometries
  • Loading branch information
tomalrussell authored Jul 25, 2022
2 parents f67043f + d017342 commit f03b0f4
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/snkit/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,11 +484,13 @@ def node_connectivity_degree(node, network):

def drop_duplicate_geometries(gdf, keep="first"):
"""Drop duplicate geometries from a dataframe"""

# convert to wkb so drop_duplicates will work
# discussed in https://github.com/geopandas/geopandas/issues/521
mask = gdf.geometry.apply(lambda geom: geom.wkb)
# use dropped duplicates index to drop from actual dataframe
return gdf.iloc[mask.drop_duplicates(keep=keep).index]
mask = gdf.geometry.apply(lambda geom: geom.wkb).drop_duplicates(keep=keep).index

# use mask to drop from actual dataframe
return gdf.loc[mask]


def nearest_point_on_edges(point, edges):
Expand Down
19 changes: 19 additions & 0 deletions tests/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
warnings.filterwarnings("ignore", category=DeprecationWarning)
from geopandas import GeoDataFrame

import pandas as pd
from pandas.testing import assert_frame_equal
from pytest import fixture, mark
from shapely.geometry import Point, LineString, MultiPoint, MultiLineString
Expand Down Expand Up @@ -406,6 +407,24 @@ def test_passing_slice():
assert_frame_equal(actual, expected)


def test_drop_duplicate_geometries():
a = Point((0, 0))
b = Point((0, 2))
c = Point((0, 1))
ac = LineString([a, c])
cb = LineString([c, b])
# use an index that doesn't start from 0 to check our indexing hygiene
index = pd.Index([2, 3, 5, 7, 11, 13])
gdf_with_dupes = GeoDataFrame(
index=index,
data=[a, a, b, ac, ac, cb],
columns=["geometry"]
)
deduped = snkit.network.drop_duplicate_geometries(gdf_with_dupes)
# we should have just the first of each duplicate item
assert (deduped.index == pd.Index([2, 5, 7, 13])).all()


@mark.skipif(not USE_NX, reason="networkx not available")
def test_to_networkx(connected):
"""Test conversion to networkx"""
Expand Down

0 comments on commit f03b0f4

Please sign in to comment.