Breakthrough-Energy · danielolsen · Mar 23, 2022 · Mar 24, 2022 · Mar 23, 2022 · Mar 24, 2022
diff --git a/prereise/gather/griddata/hifld/data_process/helpers.py b/prereise/gather/griddata/hifld/data_process/helpers.py
@@ -1,3 +1,5 @@
+import pandas as pd
+
 from prereise.gather.griddata.hifld import const
 
 
@@ -19,3 +21,33 @@ def map_state_and_county_to_interconnect(state_abv, county):
                 return region
         return const.state_county_splits[state_upper]["default"]
     raise ValueError(f"Got an unexpected state: {state_abv}")
+
+
+def distribute_demand_from_zones_to_buses(zone_demand, bus):
+    """Decomposes zone demand to bus demand based on bus 'Pd' column.
+    :param pandas.DataFrame zone_demand: demand by zone. Index is timestamp, columns are
+        zone IDs, values are zone demand (MW).
+    :param pandas.DataFrame bus: table of bus data, containing at least 'zone_id' and
+        'Pd' columns.
+    :return: (*pandas.DataFrame*) -- data frame of demand. Index is timestamp, columns
+        are bus IDs, values are bus demand (MW).
+    :raises ValueError: if the columns of ``zone_demand`` don't match the set of zone
+        IDs within the 'zone_id' column of ``bus``.
+    """
+    if set(bus["zone_id"].unique()) != set(zone_demand.columns):
+        raise ValueError("zones don't match between zone_demand and bus dataframes")
+    grouped_buses = bus.groupby("zone_id")
+    bus_zone_pd = grouped_buses["Pd"].transform("sum")
+    bus_zone_share = pd.concat(
+        [pd.Series(bus["Pd"] / bus_zone_pd, name="zone_share"), bus["zone_id"]], axis=1
+    )
+    zone_bus_shares = bus_zone_share.pivot_table(
+        index="bus_id",
+        columns="zone_id",
+        values="zone_share",
+        dropna=False,
+        fill_value=0,
+    )
+    bus_demand = zone_demand.dot(zone_bus_shares.T)
+
+    return bus_demand
diff --git a/prereise/gather/griddata/hifld/data_process/tests/test_topology.py b/prereise/gather/griddata/hifld/data_process/tests/test_topology.py
@@ -1,9 +1,12 @@
+import numpy as np
 import pandas as pd
+import pytest
 from powersimdata.utility.distance import haversine, ll2uv
 
 from prereise.gather.griddata.hifld.data_process.topology import (
     connect_islands_with_minimum_cost,
     find_adj_state_of_2_conn_comp,
+    identify_bottlenecks,
     min_dist_of_2_conn_comp,
     min_dist_of_2_conn_comp_kdtree,
 )
@@ -225,3 +228,105 @@ def test_connect_islands_with_minimum_cost():
     for e in all_edges + mst_edges:
         e[2].pop("weight")
     assert (all_edges, mst_edges) == expected_return_kdtree
+
+
+def test_identify_bottlenecks():
+    branch = pd.DataFrame(
+        [
+            {"from_bus_id": 1, "to_bus_id": 2},
+            {"from_bus_id": 2, "to_bus_id": 3},
+            {"from_bus_id": 2, "to_bus_id": 4},
+            {"from_bus_id": 2, "to_bus_id": 5},
+            {"from_bus_id": 2, "to_bus_id": 6},
+            {"from_bus_id": 3, "to_bus_id": 4},
+            {"from_bus_id": 5, "to_bus_id": 6},
+            {"from_bus_id": 5, "to_bus_id": 7},
+            {"from_bus_id": 6, "to_bus_id": 7},
+            {"from_bus_id": 7, "to_bus_id": 8},
+            {"from_bus_id": 7, "to_bus_id": 11},
+            {"from_bus_id": 8, "to_bus_id": 9},
+            {"from_bus_id": 8, "to_bus_id": 11},
+            {"from_bus_id": 8, "to_bus_id": 12},
+            {"from_bus_id": 8, "to_bus_id": 14},
+            {"from_bus_id": 8, "to_bus_id": 15},
+            {"from_bus_id": 15, "to_bus_id": 8},  # Same nodes, different orientation
+            {"from_bus_id": 9, "to_bus_id": 10},
+            {"from_bus_id": 9, "to_bus_id": 11},
+            {"from_bus_id": 10, "to_bus_id": 11},
+            {"from_bus_id": 10, "to_bus_id": 16},
+            {"from_bus_id": 10, "to_bus_id": 17},
+            {"from_bus_id": 10, "to_bus_id": 18},
+            {"from_bus_id": 12, "to_bus_id": 13},
+            {"from_bus_id": 13, "to_bus_id": 14},
+            {"from_bus_id": 13, "to_bus_id": 15},
+            {"from_bus_id": 17, "to_bus_id": 18},
+        ]
+    )
+    branch["capacity"] = branch["from_bus_id"] * branch["to_bus_id"] / 50
+    demand = pd.Series(np.arange(0.1, 1.9, 0.1), index=range(1, 19))
+    bottlenecks = identify_bottlenecks(
+        branch, demand, root=frozenset({7, 8, 9, 10, 11})
+    )
+    expected_all_keys = {
+        (8, frozenset({8, 12, 13, 14, 15})),
+        (frozenset({7, 8, 9, 10, 11}), 8),
+        (10, frozenset({10, 16})),
+        (10, frozenset({10, 17, 18})),
+        (frozenset({7, 8, 9, 10, 11}), 10),
+        (2, frozenset({2, 3, 4})),
+        (2, frozenset({1, 2})),
+        (frozenset({2, 5, 6, 7}), 2),
+        (7, frozenset({2, 5, 6, 7})),
+        (frozenset({7, 8, 9, 10, 11}), 7),
+    }
+    expected_constrained_keys = {
+        (frozenset({7, 8, 9, 10, 11}), 8),
+        (frozenset({7, 8, 9, 10, 11}), 10),
+        (2, frozenset({2, 3, 4})),
+        (2, frozenset({1, 2})),
+        (frozenset({2, 5, 6, 7}), 2),
+        (7, frozenset({2, 5, 6, 7})),
+        (frozenset({7, 8, 9, 10, 11}), 7),
+    }
+    assert set(bottlenecks["all"]) == expected_all_keys
+    assert set(bottlenecks["constrained"]) == expected_constrained_keys
+    for k, v in bottlenecks["all"].items():
+        assert set(v.keys()) == {"capacity", "demand", "descendants"}
+        assert isinstance(v["capacity"], float)
+        assert isinstance(v["demand"], float)
+        assert isinstance(v["descendants"], set)
+    for k, v in bottlenecks["constrained"].items():
+        assert v == bottlenecks["all"][k]
+    results_to_check = {
+        (8, frozenset({8, 12, 13, 14, 15})): {
+            "descendants": set(),
+            "demand": 5.4,
+            "capacity": 8.96,
+        },
+        (2, frozenset({2, 3, 4})): {
+            "descendants": set(),
+            "demand": 0.7,
+            "capacity": 0.28,
+        },
+        (frozenset({2, 5, 6, 7}), 2): {
+            "descendants": {1, 3, 4},
+            "demand": 1.0,
+            "capacity": 0.44,
+        },
+        (7, frozenset({2, 5, 6, 7})): {
+            "descendants": {1, 2, 3, 4},
+            "demand": 2.1,
+            "capacity": 1.54,
+        },
+        (frozenset({7, 8, 9, 10, 11}), 7): {
+            "descendants": {1, 2, 3, 4, 5, 6},
+            "demand": 2.8,
+            "capacity": 2.66,
+        },
+    }
+    for key, result in results_to_check.items():
+        for subkey, subresult in result.items():
+            if subkey == "descendants":
+                assert bottlenecks["all"][key][subkey] == result[subkey]
+            else:
+                assert bottlenecks["all"][key][subkey] == pytest.approx(result[subkey])
diff --git a/prereise/gather/griddata/hifld/data_process/topology.py b/prereise/gather/griddata/hifld/data_process/topology.py
@@ -10,6 +10,9 @@
 from tqdm import tqdm
 
 from prereise.gather.griddata.hifld.const import abv_state_neighbor
+from prereise.gather.griddata.hifld.data_process.helpers import (
+    distribute_demand_from_zones_to_buses,
+)
 
 
 def min_dist_of_2_conn_comp(
@@ -315,3 +318,149 @@ def add_interconnects_by_connected_components(
     for i, name in enumerate(interconnect_size_rank):
         substations.loc[sorted_interconnects[i], "interconnect"] = name
     substations.drop(seams_substations, inplace=True)
+
+
+def find_descendants(graph, bctree, parent, grandparent, result=None):
+    """Given a graph and its block-cut tree representation, aggregate demand from leaves
+    towards root and identify capacity bottlenecks along the way.
+
+    :param networkx.Graph graph: original graph, where nodes have at least a 'demand'
+        attribute and edges have at least a 'capacity' attribute.
+    :param networkx.Graph bctree: block-cut tree representation of ``graph``, where
+        articulation point IDs are their original node ID and block ID are a frozenset
+        of the nodes within the block (including articulation points).
+    :param int/frozenset parent: a node ID within ``bctree`` to aggregate 'downstream'
+        demand for and to evaluate capacity constraints towards.
+    :param int/frozenset grandparent: a node ID within ``bctree`` which is directly
+        'upstream' of ``parent``, used to identify 'downstream' directions. If None is
+        passed, then ``parent`` is considered to be the 'root' node, i.e. all directions
+        are 'downstream'.
+    :param dict result: a dictionary to be used to store results. If None is passed, one
+        will be created.
+    :return: (*dict*) -- a dictionary of information for the ``parent`` node. Note: this
+        dictionary is the value of the ``parent`` key within the higher-level ``result``
+        dictionary.
+    """
+    if result is None:
+        result = {}
+    children = set(bctree[parent]) - {grandparent}
+    if isinstance(parent, int):
+        # parent is an articulation point, children are blocks
+        result[parent] = {
+            "descendants": set().union(*children) - {parent},
+            "demand": graph.nodes[parent]["demand"],
+        }
+    else:
+        # parent is a block, children are articulation points
+        result[parent] = {
+            "descendants": children.copy(),
+            "demand": sum(graph.nodes[p]["demand"] for p in parent),
+        }
+    for child in children:
+        # We need to find all descendants of the child to be able sum their demands
+        child_descendants = find_descendants(graph, bctree, child, parent, result)
+        result[(parent, child)] = {
+            "descendants": child_descendants["descendants"] - {parent},
+        }
+        result[parent]["descendants"] |= child_descendants["descendants"]
+        if isinstance(child, int):
+            # parent is a block, child is an articulation point
+            # downstream branches are connected to child and within parent
+            result[(parent, child)]["capacity"] = sum(
+                graph[child][n]["capacity"] for n in graph[child] if n in parent
+            )
+            result[(parent, child)]["demand"] = child_descendants["demand"]
+            # Avoid double-counting demand from the articulation point
+            result[parent]["demand"] += (
+                result[(parent, child)]["demand"] - graph.nodes[child]["demand"]
+            )
+        else:
+            # parent is an articulation point, child is a block
+            # downstream branches are connected to parent and within child
+            result[(parent, child)]["capacity"] = sum(
+                graph[parent][n]["capacity"] for n in graph[parent] if n in child
+            )
+            # We don't want to include the articulation point's demand
+            result[(parent, child)]["demand"] = (
+                child_descendants["demand"] - graph.nodes[parent]["demand"]
+            )
+            result[parent]["demand"] += result[(parent, child)]["demand"]
+
+    return result[parent]
+
+
+def identify_bottlenecks(branch, demand, root=None):
+    """Given a table of branch connectivity and capacities, and bus demands, identify
+    bottlenecks 'downstream' of the largest connected component
+
+    :param pandas.DataFrame branch: branch info, containing at least: 'from_bus_id',
+        'to_bus_id', and 'capacity' columns.
+    :param dict/pandas.Series demand: mapping of bus IDs to demand.
+    :param int/frozenset root: node to use as the root of the constructed BC tree. If
+        None, the largest connected component will be chosen.
+    :return: (*dict*) -- dictionary with keys:
+        - "all": a dictionary, keys are tuples of either (articulation point, block) or
+            (block, articulation point), values are dictionaries of information on that
+            potential constraints (keys of 'descendants', 'capacity', and 'demand').
+        - "constrained": a dictionary, keys and values are idenical to the 'all'
+            dictionary except that only entries where demand is greater than capacity
+            are contained.
+        - "root": the root node.
+    """
+    # Build transmission graph
+    sorted_buses = branch[["from_bus_id", "to_bus_id"]].apply(sorted, axis=1).map(tuple)
+    aggregated_capacity = branch.groupby(sorted_buses)["capacity"].sum()
+    aggregated_branch = pd.DataFrame(
+        aggregated_capacity.index.tolist(), columns=["bus1", "bus2"]
+    ).assign(capacity=aggregated_capacity.tolist())
+    graph = nx.convert_matrix.from_pandas_edgelist(
+        aggregated_branch, "bus1", "bus2", ["capacity"]
+    )
+    nx.set_node_attributes(graph, {k: {"demand": v} for k, v in demand.items()})
+    # Build block-cut graph of biconnected components
+    biconnected_components = list(
+        frozenset(s) for s in nx.algorithms.biconnected_components(graph)
+    )
+    articulation_points = list(nx.algorithms.articulation_points(graph))
+    bctree = nx.Graph()
+    bctree.add_nodes_from(articulation_points)
+    bctree.add_nodes_from(biconnected_components)
+    bctree.add_edges_from(
+        (a, b) for a in articulation_points for b in biconnected_components if a in b
+    )
+    # Using the largest component as the 'root', identify 'downstream' bottlenecks
+    if root is None:
+        root = frozenset(max(biconnected_components, key=len))
+    descendants = {}
+    find_descendants(graph, bctree, parent=root, grandparent=None, result=descendants)
+    # Filter out BC-tree node information, keep block/articulation pairs only
+    descendant_pairs = {k: v for k, v in descendants.items() if isinstance(k, tuple)}
+    # Identify pairs with capacity constraints
+    constrained_pairs = {
+        k: v for k, v in descendant_pairs.items() if v["demand"] > v["capacity"]
+    }
+    return {"all": descendant_pairs, "constrained": constrained_pairs, "root": root}
+
+
+def report_bottlenecks(branch, bus, zone_demand):
+    """Separate the full branch table by interconnect, and report bottlenecks for each.
+
+    :param pandas.DataFrame branch: full branch table.
+    :param pandas.DataFrame branch: full bus table.
+    :param dict/pandas.Series demand: mapping of demand for each bus.
+    """
+    bus_demand = distribute_demand_from_zones_to_buses(zone_demand, bus).max()
+    bottlenecks = {
+        interconnect: identify_bottlenecks(filtered_branch, bus_demand)
+        for interconnect, filtered_branch in branch.rename(
+            {"rateA": "capacity"}, axis=1
+        ).groupby("interconnect")
+    }
+    for interconnect, result in bottlenecks.items():
+        print("interconnect")
+        root = result["root"]
+        constrained = result["constrained"]
+        for (k1, k2), info in constrained.items():
+            k1 = "root" if k1 == root else k1 if isinstance(k1, int) else set(k1)
+            k2 = "root" if k2 == root else k2 if isinstance(k2, int) else set(k2)
+            print("from", k1, "to", f"{k2}:", info)
diff --git a/prereise/gather/griddata/hifld/orchestration.py b/prereise/gather/griddata/hifld/orchestration.py
@@ -11,6 +11,7 @@
     build_solar,
     build_wind,
 )
+from prereise.gather.griddata.hifld.data_process.topology import report_bottlenecks
 from prereise.gather.griddata.hifld.data_process.transmission import build_transmission
 
 
@@ -34,6 +35,7 @@ def create_csvs(
     hydro_kwargs,
     solar_kwargs,
     wind_kwargs,
+    zone_demand=None,
 ):
     """Process HIFLD source data to CSVs compatible with PowerSimData.
 
@@ -45,11 +47,15 @@ def create_csvs(
         :func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
     :param dict wind_kwargs: keyword arguments to pass to
         :func:`prereise.gather.griddata.hifld.data_process.profiles.build_wind`.
+    :param pandas.DataFrame zone_demand: per-zone demand profiles, used to evaluate
+        local transmission constraints. If None, this step will be skipped.
     """
     _check_profile_kwargs(
         {"hydro": hydro_kwargs, "solar": solar_kwargs, "wind": wind_kwargs}
     )
     full_tables = create_grid(output_folder)
+    if zone_demand is not None:
+        report_bottlenecks(full_tables["branch"], full_tables["bus"], zone_demand)
     create_profiles(
         full_tables["plant"],
         profile_year,