diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..63add1b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +[tool.pytest.ini_options] +pythonpath = [ + "src", +] diff --git a/src/snkit/network.py b/src/snkit/network.py index a1b88f8..a8b2a32 100644 --- a/src/snkit/network.py +++ b/src/snkit/network.py @@ -27,7 +27,9 @@ shape, mapping, ) -from shapely.ops import split, linemerge +from shapely.ops import split, linemerge, unary_union + +from collections import Counter # optional progress bars if "SNKIT_PROGRESS" in os.environ and os.environ["SNKIT_PROGRESS"] in ("1", "TRUE"): @@ -267,12 +269,48 @@ def split_edges_at_nodes(network, tolerance=1e-9): # combine dfs edges = pandas.concat(split_edges, axis=0) - # reset index and drop edges = edges.reset_index().drop("index", axis=1) - # return new network with split edges + return Network(nodes=network.nodes, edges=edges) +def split_edges_at_intersections(network, tolerance=1e-9): + """Split network edges where they intersect line geometries""" + split_edges = [] + split_points = [] + for edge in tqdm( + network.edges.itertuples(index=False), desc="split", total=len(network.edges) + ): + # note: the symmetry of intersection is not exploited here. + # (If A intersects B, then B intersects A) + # since edges are not modified within the loop, this has just + # potential performance consequences. + + hits_points = edges_intersecting_points(edge.geometry, network.edges, tolerance) + + # store the split edges and intersection points + split_points.extend(hits_points) + hits_points = MultiPoint(hits_points) + edges = split_edge_at_points(edge, hits_points, tolerance) + split_edges.append(edges) + + # add the (potentially) split edges + edges = pandas.concat(split_edges, axis=0) + edges = edges.reset_index().drop("index", axis=1) + + # combine the original nodes with the new intersection nodes + # dropping the duplicates. + # note: there are at least duplicates from above since intersections + # are checked twice + # note: intersection nodes are appended, and if any duplicates, the + # original counterparts are kept. + nodes = GeoDataFrame(geometry=split_points) + nodes = pandas.concat([network.nodes, nodes], axis=0).drop_duplicates() + nodes = nodes.reset_index().drop("index", axis=1) + + return Network(nodes=nodes, edges=edges) + + def link_nodes_to_edges_within(network, distance, condition=None, tolerance=1e-9): """Link nodes to all edges within some distance""" new_node_geoms = [] @@ -521,6 +559,43 @@ def edges_within(point, edges, distance): return d_within(point, edges, distance) +def edges_intersecting_points(line, edges, tolerance=1e-9): + """Return intersection points of intersecting edges""" + hits = edges_intersecting(line, edges, tolerance) + + hits_points = [] + for hit in hits.geometry: + # first extract the actual intersections from the hits + # for being new geometrical objects, they are not in the sindex + intersection = line.intersection(hit) + # if the line is not simple, there is a self-crossing point + # (note that it will always interact with itself) + # note that __eq__ is used on purpose instead of equals() + # this is stricter: for geometries constructed in the same way + # it makes sense since the sindex is used here + if line == hit and not line.is_simple: + # there is not built-in way to find self-crossing points + # duplicated points after unary_union are the intersections + intersection = unary_union(line) + segments_coordinates = [] + for seg in intersection.geoms: + segments_coordinates.extend(list(seg.coords)) + intersection = [ + Point(p) for p, c in Counter(segments_coordinates).items() if c > 1 + ] + intersection = MultiPoint(intersection) + + # then extract the intersection points + hits_points = intersection_endpoints(intersection, hits_points) + + return hits_points + + +def edges_intersecting(line, edges, tolerance=1e-9): + """Find edges intersecting line""" + return intersects(line, edges, tolerance) + + def nodes_intersecting(line, nodes, tolerance=1e-9): """Find nodes intersecting line""" return intersects(line, nodes, tolerance) @@ -569,12 +644,45 @@ def line_endpoints(line): return start, end +def intersection_endpoints(geom, output=None): + """Return the points from an intersection geometry + + It extracts the starting and ending points of intersection + geometries recursively and appends them to `output`. + This doesn't handle polygons or collections of polygons. + """ + if output is None: + output = [] + + geom_type = geom.geom_type + if geom.is_empty: + pass + elif geom_type == "Point": + output.append(geom) + elif geom_type == "LineString": + start = Point(geom.coords[0]) + end = Point(geom.coords[-1]) + output.append(start) + output.append(end) + # recursively for collections of geometries + # note that there is no shared inheritance relationship + elif ( + geom_type == "MultiPoint" + or geom_type == "MultiLineString" + or geom_type == "GeometryCollection" + ): + for geom_ in geom.geoms: + output = intersection_endpoints(geom_, output) + + return output + + def split_edge_at_points(edge, points, tolerance=1e-9): """Split edge at point/multipoint""" try: segments = split_line(edge.geometry, points, tolerance) except ValueError: - # if splitting fails, e.g. becuase points is empty GeometryCollection + # if splitting fails, e.g. because points is empty GeometryCollection segments = [edge.geometry] edges = GeoDataFrame([edge] * len(segments)) edges.geometry = segments @@ -584,6 +692,13 @@ def split_edge_at_points(edge, points, tolerance=1e-9): def split_line(line, points, tolerance=1e-9): """Split line at point or multipoint, within some tolerance""" to_split = snap_line(line, points, tolerance) + # when the splitter is a self-intersection point, shapely splits in + # two parts only in a semi-arbitrary way, see the related question: + # https://gis.stackexchange.com/questions/435879/python-shapely-split-a-complex-line-at-self-intersections?noredirect=1#comment711214_435879 + # checking only that the line is complex might not be enough + # but the difference operation is useless in the worst case + if not to_split.is_simple: + to_split = to_split.difference(points) return list(split(to_split, points).geoms) @@ -675,14 +790,20 @@ def to_networkx(network, directed=False, weight_col=None): G.add_nodes_from(network.nodes.id.to_list()) # add nodal positions from geom - for node_id, x, y in zip(network.nodes.id, network.nodes.geometry.x, network.nodes.geometry.y): + for node_id, x, y in zip( + network.nodes.id, network.nodes.geometry.x, network.nodes.geometry.y + ): G.nodes[node_id]["pos"] = (x, y) # get edges from network data if weight_col is None: # default to geometry length edges_as_list = list( - zip(network.edges.from_id, network.edges.to_id, network.edges.geometry.length) + zip( + network.edges.from_id, + network.edges.to_id, + network.edges.geometry.length, + ) ) else: edges_as_list = list( diff --git a/tests/test_init.py b/tests/test_init.py index 7b345b0..8d18a15 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -141,6 +141,250 @@ def split_with_ids(): return snkit.Network(edges=edges, nodes=nodes) +@fixture +def unsplit_intersection(): + """Edges intersection, both edges not split + b + | + c--|--d + | + a + """ + a = Point((1, 0)) + b = Point((1, 2)) + c = Point((0, 1)) + d = Point((2, 1)) + nodes = GeoDataFrame(data={"geometry": [a, b, c, d]}) + ab = LineString([a, b]) + cd = LineString([c, d]) + edges = GeoDataFrame(data={"geometry": [ab, cd]}) + return snkit.Network(edges=edges, nodes=nodes) + + +@fixture +def split_intersection(): + """Edges intersection, both edges split + b + | + c--x--d + | + a + """ + a = Point((1, 0)) + b = Point((1, 2)) + c = Point((0, 1)) + d = Point((2, 1)) + x = Point((1, 1)) + nodes = GeoDataFrame(data={"geometry": [a, b, c, d, x]}) + ax = LineString([a, x]) + xb = LineString([x, b]) + cx = LineString([c, x]) + xd = LineString([x, d]) + edges = GeoDataFrame(data={"geometry": [ax, xb, cx, xd]}) + return snkit.Network(edges=edges, nodes=nodes) + + +@fixture +def unsplit_multiple_intersections(): + """Multiple edges intersections, all edges unsplit + b f + | | + c--|---|--d + | | + a e + """ + a = Point((1, 0)) + b = Point((1, 2)) + c = Point((0, 1)) + d = Point((3, 1)) + e = Point((2, 0)) + f = Point((2, 2)) + nodes = GeoDataFrame(data={"geometry": [a, b, c, d, e, f]}) + ab = LineString([a, b]) + cd = LineString([c, d]) + ef = LineString([e, f]) + edges = GeoDataFrame(data={"geometry": [ab, cd, ef]}) + return snkit.Network(edges=edges, nodes=nodes) + + +@fixture +def split_multiple_intersections(): + """Multiple edges intersections, all edges split + b f + | | + c--x---y--d + | | + a e + """ + a = Point((1, 0)) + b = Point((1, 2)) + c = Point((0, 1)) + d = Point((3, 1)) + e = Point((2, 0)) + f = Point((2, 2)) + x = Point((1, 1)) + y = Point((2, 1)) + nodes = GeoDataFrame(data={"geometry": [a, b, c, d, e, f, x, y]}) + ax = LineString([a, x]) + xb = LineString([x, b]) + cx = LineString([c, x]) + xy = LineString([x, y]) + yd = LineString([y, d]) + ey = LineString([e, y]) + yf = LineString([y, f]) + edges = GeoDataFrame(data={"geometry": [ax, xb, cx, xy, yd, ey, yf]}) + return snkit.Network(edges=edges, nodes=nodes) + + +@fixture +def unsplit_overlapping_lines(): + """Overlapping lines for a section + c--d + || + b--| + | + a + """ + a = Point((1, 0)) + b = Point((0, 1)) + c = Point((1, 2)) + d = Point((2, 2)) + # x is just a construction point + x = Point((1, 1)) + nodes = GeoDataFrame(data={"geometry": [a, b, c, d]}) + ac = LineString([a, c]) + bd = LineString([b, x, c, d]) + edges = GeoDataFrame(data={"geometry": [ac, bd]}) + return snkit.Network(edges=edges, nodes=nodes) + + +@fixture +def split_overlapping_lines(): + """Split of overlapping lines for a section + c--d + || + b--x + | + a + """ + a = Point((1, 0)) + b = Point((0, 1)) + c = Point((1, 2)) + d = Point((2, 2)) + x = Point((1, 1)) + nodes = GeoDataFrame(data={"geometry": [a, b, c, d, x]}) + ax = LineString([a, x]) + bx = LineString([b, x]) + xc = LineString([x, c]) + cd = LineString([c, d]) + # note that there are two edges 'xc' + edges = GeoDataFrame(data={"geometry": [ax, xc, bx, xc, cd]}) + return snkit.Network(edges=edges, nodes=nodes) + + +@fixture +def unsplit_heterogeneous_intersection(): + """Crossing point and overlapping line + b--c + | | + e--|--------f + | --d + a + + * --d overlaps with e-f + """ + a = Point((1, 0)) + b = Point((1, 2)) + c = Point((2, 2)) + # y is just a construction point + y = Point((2, 1)) + d = Point((3, 1)) + e = Point((0, 1)) + f = Point((4, 1)) + nodes = GeoDataFrame(data={"geometry": [a, b, c, d, e, f]}) + ad = LineString([a, b, c, y, d]) + ef = LineString([e, f]) + edges = GeoDataFrame(data={"geometry": [ad, ef]}) + return snkit.Network(edges=edges, nodes=nodes) + + +@fixture +def split_heterogeneous_intersection(): + """Crossing point and overlapping line, all edges split + b--c + | | + e--x--y--d--f + | -- + a + """ + a = Point((1, 0)) + b = Point((1, 2)) + c = Point((2, 2)) + d = Point((3, 1)) + e = Point((0, 1)) + f = Point((4, 1)) + x = Point((1, 1)) + y = Point((2, 1)) + # note: this is order sensitive although it shouldn't matter + nodes = GeoDataFrame(data={"geometry": [a, b, c, d, e, f, y, x]}) + ax = LineString([a, x]) + xb = LineString([x, b]) + bc = LineString([b, c]) + cy = LineString([c, y]) + yd = LineString([y, d]) + ex = LineString([e, x]) + xy = LineString([x, y]) + df = LineString([d, f]) + # note that there are two edges 'yd' + edges = GeoDataFrame(data={"geometry": [ax, xb, bc, cy, yd, ex, xy, yd, df]}) + return snkit.Network(edges=edges, nodes=nodes) + + +@fixture +def unsplit_self_intersection(): + """A line crossing with itself + b--c + | | + e--|--d + | + a + """ + a = Point((1, 0)) + e = Point((0, 1)) + # 'b', 'c' and 'd' are construction points, not nodes + b = Point((1, 2)) + c = Point((2, 2)) + d = Point((2, 1)) + nodes = GeoDataFrame(data={"geometry": [a, e]}) + ae = LineString([a, b, c, d, e]) + edges = GeoDataFrame(data={"geometry": [ae]}) + return snkit.Network(edges=edges, nodes=nodes) + + +@fixture +def split_self_intersection(): + """A line crossing with itself, edge split + b--c + | | + e--x--d + | + a + """ + a = Point((1, 0)) + e = Point((0, 1)) + x = Point((1, 1)) + # 'b', 'c' and 'd' are construction points, not nodes + b = Point((1, 2)) + c = Point((2, 2)) + d = Point((2, 1)) + nodes = GeoDataFrame(data={"geometry": [a, e, x]}) + ax = LineString([a, x]) + xx = LineString([x, b, c, d, x]) + xe = LineString([x, e]) + edges = GeoDataFrame(data={"geometry": [ax, xx, xe]}) + return snkit.Network(edges=edges, nodes=nodes) + + @fixture def gap(): """T-junction with nodes, edges not quite intersecting: @@ -230,50 +474,70 @@ def test_split_at_nodes(unsplit, split): assert_frame_equal(split.edges, actual.edges) -def test_split_multilinestrings(): - """Explode multilinestrings into linestrings""" +def test_split_at_intersections(unsplit_intersection, split_intersection): + """Should split edges at edges intersections""" + actual = snkit.network.split_edges_at_intersections(unsplit_intersection) + assert_frame_equal(split_intersection.edges, actual.edges) + assert_frame_equal(split_intersection.nodes, actual.nodes) - # point coordinates comprising three linestrings - mls_coords = [ - ( - (0, 0), - (0, 1), - ), - ( - (1, 1), - (2, 2), - (2, 1), - ), - ( - (0, 1), - (-1, 1), - (-1, 2), - (-1, 0), - ), - ] - # point coordsinate comprising a single linestring - ls_coords = [(4, 0), (4, 1), (4, 2)] - # make input network edges - edges = GeoDataFrame( - { - "data": ["MLS", "LS"], - "geometry": [MultiLineString(mls_coords), LineString(ls_coords)], - } +def test_split_at_intersection_already_split(split_intersection): + """Shouldn't do anything""" + actual = snkit.network.split_edges_at_intersections(split_intersection) + assert_frame_equal(split_intersection.edges, actual.edges) + assert_frame_equal(split_intersection.nodes, actual.nodes) + + +def test_split_at_intersection_endnode(unsplit, split): + """Should split the edge at the endnode intersection + + There shouldn't be any duplicate (no additional node). + """ + actual = snkit.network.split_edges_at_intersections(unsplit) + assert_frame_equal(split.edges, actual.edges) + assert_frame_equal(split.nodes, actual.nodes) + + +def test_split_at_intersection_multiple( + unsplit_multiple_intersections, split_multiple_intersections +): + """Should split the edges at each intersection""" + actual = snkit.network.split_edges_at_intersections(unsplit_multiple_intersections) + assert_frame_equal(split_multiple_intersections.edges, actual.edges) + assert_frame_equal(split_multiple_intersections.nodes, actual.nodes) + + +def test_split_intersection_overlapping_edges( + unsplit_overlapping_lines, split_overlapping_lines +): + """Should split at the start and end of the intersecting sector + + The intersecting sector should be duplicated. + """ + actual = snkit.network.split_edges_at_intersections(unsplit_overlapping_lines) + assert_frame_equal(split_overlapping_lines.edges, actual.edges) + assert_frame_equal(split_overlapping_lines.nodes, actual.nodes) + + +def test_split_intersection_heterogeneous( + unsplit_heterogeneous_intersection, split_heterogeneous_intersection +): + """Should split at intersection points and sectors (endpoints)""" + actual = snkit.network.split_edges_at_intersections( + unsplit_heterogeneous_intersection ) - multi_network = snkit.network.Network(edges=edges) - # check we have two rows (multilinestring and linestring) in the input network - assert len(multi_network.edges) == 2 + assert_frame_equal(split_heterogeneous_intersection.edges, actual.edges) + assert_frame_equal(split_heterogeneous_intersection.nodes, actual.nodes) - # split and check we have four rows (the resulting linestrings) in the output network - split_network = snkit.network.split_multilinestrings(multi_network) - assert len(split_network.edges) == 4 - # check data is replicated from multilinestring to child linestrings - assert list(split_network.edges["data"].values) == ["MLS"] * 3 + ["LS"] +def test_split_intersection_self(unsplit_self_intersection, split_self_intersection): + """Should split at the intersection point - # check everything is reindexed - assert list(split_network.edges.index.values) == list(range(4)) + This will give 3 edges: two 'normal' edges, and a self-loop + """ + actual = snkit.network.split_edges_at_intersections(unsplit_self_intersection) + assert_frame_equal(split_self_intersection.edges, actual.edges) + assert_frame_equal(split_self_intersection.nodes, actual.nodes) def test_split_line(): @@ -343,6 +607,52 @@ def test_split_line(): # consider points which will be considered duplicate within tolerance +def test_split_multilinestrings(): + """Explode multilinestrings into linestrings""" + + # point coordinates comprising three linestrings + mls_coords = [ + ( + (0, 0), + (0, 1), + ), + ( + (1, 1), + (2, 2), + (2, 1), + ), + ( + (0, 1), + (-1, 1), + (-1, 2), + (-1, 0), + ), + ] + # point coordsinate comprising a single linestring + ls_coords = [(4, 0), (4, 1), (4, 2)] + + # make input network edges + edges = GeoDataFrame( + { + "data": ["MLS", "LS"], + "geometry": [MultiLineString(mls_coords), LineString(ls_coords)], + } + ) + multi_network = snkit.network.Network(edges=edges) + # check we have two rows (multilinestring and linestring) in the input network + assert len(multi_network.edges) == 2 + + # split and check we have four rows (the resulting linestrings) in the output network + split_network = snkit.network.split_multilinestrings(multi_network) + assert len(split_network.edges) == 4 + + # check data is replicated from multilinestring to child linestrings + assert list(split_network.edges["data"].values) == ["MLS"] * 3 + ["LS"] + + # check everything is reindexed + assert list(split_network.edges.index.values) == list(range(4)) + + def test_link_nodes_to_edges_within(gap, bridged): """Nodes should link to edges within a distance, splitting the node at a new point if necessary.