Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add local transmission capacity analysis function #279

Draft
wants to merge 11 commits into
base: hifld
Choose a base branch
from
32 changes: 32 additions & 0 deletions prereise/gather/griddata/hifld/data_process/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd

from prereise.gather.griddata.hifld import const


Expand All @@ -19,3 +21,33 @@ def map_state_and_county_to_interconnect(state_abv, county):
return region
return const.state_county_splits[state_upper]["default"]
raise ValueError(f"Got an unexpected state: {state_abv}")


def distribute_demand_from_zones_to_buses(zone_demand, bus):
"""Decomposes zone demand to bus demand based on bus 'Pd' column.
:param pandas.DataFrame zone_demand: demand by zone. Index is timestamp, columns are
zone IDs, values are zone demand (MW).
:param pandas.DataFrame bus: table of bus data, containing at least 'zone_id' and
'Pd' columns.
:return: (*pandas.DataFrame*) -- data frame of demand. Index is timestamp, columns
are bus IDs, values are bus demand (MW).
:raises ValueError: if the columns of ``zone_demand`` don't match the set of zone
IDs within the 'zone_id' column of ``bus``.
"""
if set(bus["zone_id"].unique()) != set(zone_demand.columns):
raise ValueError("zones don't match between zone_demand and bus dataframes")
grouped_buses = bus.groupby("zone_id")
bus_zone_pd = grouped_buses["Pd"].transform("sum")
bus_zone_share = pd.concat(
[pd.Series(bus["Pd"] / bus_zone_pd, name="zone_share"), bus["zone_id"]], axis=1
)
zone_bus_shares = bus_zone_share.pivot_table(
index="bus_id",
columns="zone_id",
values="zone_share",
dropna=False,
fill_value=0,
)
bus_demand = zone_demand.dot(zone_bus_shares.T)

return bus_demand
105 changes: 105 additions & 0 deletions prereise/gather/griddata/hifld/data_process/tests/test_topology.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import numpy as np
import pandas as pd
import pytest
from powersimdata.utility.distance import haversine, ll2uv

from prereise.gather.griddata.hifld.data_process.topology import (
connect_islands_with_minimum_cost,
find_adj_state_of_2_conn_comp,
identify_bottlenecks,
min_dist_of_2_conn_comp,
min_dist_of_2_conn_comp_kdtree,
)
Expand Down Expand Up @@ -225,3 +228,105 @@ def test_connect_islands_with_minimum_cost():
for e in all_edges + mst_edges:
e[2].pop("weight")
assert (all_edges, mst_edges) == expected_return_kdtree


def test_identify_bottlenecks():
branch = pd.DataFrame(
[
{"from_bus_id": 1, "to_bus_id": 2},
{"from_bus_id": 2, "to_bus_id": 3},
{"from_bus_id": 2, "to_bus_id": 4},
{"from_bus_id": 2, "to_bus_id": 5},
{"from_bus_id": 2, "to_bus_id": 6},
{"from_bus_id": 3, "to_bus_id": 4},
{"from_bus_id": 5, "to_bus_id": 6},
{"from_bus_id": 5, "to_bus_id": 7},
{"from_bus_id": 6, "to_bus_id": 7},
{"from_bus_id": 7, "to_bus_id": 8},
{"from_bus_id": 7, "to_bus_id": 11},
{"from_bus_id": 8, "to_bus_id": 9},
{"from_bus_id": 8, "to_bus_id": 11},
{"from_bus_id": 8, "to_bus_id": 12},
{"from_bus_id": 8, "to_bus_id": 14},
{"from_bus_id": 8, "to_bus_id": 15},
{"from_bus_id": 15, "to_bus_id": 8}, # Same nodes, different orientation
{"from_bus_id": 9, "to_bus_id": 10},
{"from_bus_id": 9, "to_bus_id": 11},
{"from_bus_id": 10, "to_bus_id": 11},
{"from_bus_id": 10, "to_bus_id": 16},
{"from_bus_id": 10, "to_bus_id": 17},
{"from_bus_id": 10, "to_bus_id": 18},
{"from_bus_id": 12, "to_bus_id": 13},
{"from_bus_id": 13, "to_bus_id": 14},
{"from_bus_id": 13, "to_bus_id": 15},
{"from_bus_id": 17, "to_bus_id": 18},
]
)
branch["capacity"] = branch["from_bus_id"] * branch["to_bus_id"] / 50
demand = pd.Series(np.arange(0.1, 1.9, 0.1), index=range(1, 19))
bottlenecks = identify_bottlenecks(
branch, demand, root=frozenset({7, 8, 9, 10, 11})
)
expected_all_keys = {
(8, frozenset({8, 12, 13, 14, 15})),
(frozenset({7, 8, 9, 10, 11}), 8),
(10, frozenset({10, 16})),
(10, frozenset({10, 17, 18})),
(frozenset({7, 8, 9, 10, 11}), 10),
(2, frozenset({2, 3, 4})),
(2, frozenset({1, 2})),
(frozenset({2, 5, 6, 7}), 2),
(7, frozenset({2, 5, 6, 7})),
(frozenset({7, 8, 9, 10, 11}), 7),
}
expected_constrained_keys = {
(frozenset({7, 8, 9, 10, 11}), 8),
(frozenset({7, 8, 9, 10, 11}), 10),
(2, frozenset({2, 3, 4})),
(2, frozenset({1, 2})),
(frozenset({2, 5, 6, 7}), 2),
(7, frozenset({2, 5, 6, 7})),
(frozenset({7, 8, 9, 10, 11}), 7),
}
assert set(bottlenecks["all"]) == expected_all_keys
assert set(bottlenecks["constrained"]) == expected_constrained_keys
for k, v in bottlenecks["all"].items():
assert set(v.keys()) == {"capacity", "demand", "descendants"}
assert isinstance(v["capacity"], float)
assert isinstance(v["demand"], float)
assert isinstance(v["descendants"], set)
for k, v in bottlenecks["constrained"].items():
assert v == bottlenecks["all"][k]
results_to_check = {
(8, frozenset({8, 12, 13, 14, 15})): {
"descendants": set(),
"demand": 5.4,
"capacity": 8.96,
},
(2, frozenset({2, 3, 4})): {
"descendants": set(),
"demand": 0.7,
"capacity": 0.28,
},
(frozenset({2, 5, 6, 7}), 2): {
"descendants": {1, 3, 4},
"demand": 1.0,
"capacity": 0.44,
},
(7, frozenset({2, 5, 6, 7})): {
"descendants": {1, 2, 3, 4},
"demand": 2.1,
"capacity": 1.54,
},
(frozenset({7, 8, 9, 10, 11}), 7): {
"descendants": {1, 2, 3, 4, 5, 6},
"demand": 2.8,
"capacity": 2.66,
},
}
for key, result in results_to_check.items():
for subkey, subresult in result.items():
if subkey == "descendants":
assert bottlenecks["all"][key][subkey] == result[subkey]
else:
assert bottlenecks["all"][key][subkey] == pytest.approx(result[subkey])
149 changes: 149 additions & 0 deletions prereise/gather/griddata/hifld/data_process/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from tqdm import tqdm

from prereise.gather.griddata.hifld.const import abv_state_neighbor
from prereise.gather.griddata.hifld.data_process.helpers import (
distribute_demand_from_zones_to_buses,
)


def min_dist_of_2_conn_comp(
Expand Down Expand Up @@ -315,3 +318,149 @@ def add_interconnects_by_connected_components(
for i, name in enumerate(interconnect_size_rank):
substations.loc[sorted_interconnects[i], "interconnect"] = name
substations.drop(seams_substations, inplace=True)


def find_descendants(graph, bctree, parent, grandparent, result=None):
"""Given a graph and its block-cut tree representation, aggregate demand from leaves
towards root and identify capacity bottlenecks along the way.

:param networkx.Graph graph: original graph, where nodes have at least a 'demand'
attribute and edges have at least a 'capacity' attribute.
:param networkx.Graph bctree: block-cut tree representation of ``graph``, where
articulation point IDs are their original node ID and block ID are a frozenset
of the nodes within the block (including articulation points).
:param int/frozenset parent: a node ID within ``bctree`` to aggregate 'downstream'
demand for and to evaluate capacity constraints towards.
:param int/frozenset grandparent: a node ID within ``bctree`` which is directly
'upstream' of ``parent``, used to identify 'downstream' directions. If None is
passed, then ``parent`` is considered to be the 'root' node, i.e. all directions
are 'downstream'.
:param dict result: a dictionary to be used to store results. If None is passed, one
will be created.
:return: (*dict*) -- a dictionary of information for the ``parent`` node. Note: this
dictionary is the value of the ``parent`` key within the higher-level ``result``
dictionary.
"""
if result is None:
result = {}
children = set(bctree[parent]) - {grandparent}
if isinstance(parent, int):
# parent is an articulation point, children are blocks
result[parent] = {
"descendants": set().union(*children) - {parent},
"demand": graph.nodes[parent]["demand"],
}
else:
# parent is a block, children are articulation points
result[parent] = {
"descendants": children.copy(),
"demand": sum(graph.nodes[p]["demand"] for p in parent),
}
for child in children:
# We need to find all descendants of the child to be able sum their demands
child_descendants = find_descendants(graph, bctree, child, parent, result)
result[(parent, child)] = {
"descendants": child_descendants["descendants"] - {parent},
}
result[parent]["descendants"] |= child_descendants["descendants"]
if isinstance(child, int):
# parent is a block, child is an articulation point
# downstream branches are connected to child and within parent
result[(parent, child)]["capacity"] = sum(
graph[child][n]["capacity"] for n in graph[child] if n in parent
)
result[(parent, child)]["demand"] = child_descendants["demand"]
# Avoid double-counting demand from the articulation point
result[parent]["demand"] += (
result[(parent, child)]["demand"] - graph.nodes[child]["demand"]
)
else:
# parent is an articulation point, child is a block
# downstream branches are connected to parent and within child
result[(parent, child)]["capacity"] = sum(
graph[parent][n]["capacity"] for n in graph[parent] if n in child
)
# We don't want to include the articulation point's demand
result[(parent, child)]["demand"] = (
child_descendants["demand"] - graph.nodes[parent]["demand"]
)
result[parent]["demand"] += result[(parent, child)]["demand"]

return result[parent]


def identify_bottlenecks(branch, demand, root=None):
"""Given a table of branch connectivity and capacities, and bus demands, identify
bottlenecks 'downstream' of the largest connected component

:param pandas.DataFrame branch: branch info, containing at least: 'from_bus_id',
'to_bus_id', and 'capacity' columns.
:param dict/pandas.Series demand: mapping of bus IDs to demand.
:param int/frozenset root: node to use as the root of the constructed BC tree. If
None, the largest connected component will be chosen.
:return: (*dict*) -- dictionary with keys:
- "all": a dictionary, keys are tuples of either (articulation point, block) or
(block, articulation point), values are dictionaries of information on that
potential constraints (keys of 'descendants', 'capacity', and 'demand').
- "constrained": a dictionary, keys and values are idenical to the 'all'
dictionary except that only entries where demand is greater than capacity
are contained.
- "root": the root node.
"""
# Build transmission graph
sorted_buses = branch[["from_bus_id", "to_bus_id"]].apply(sorted, axis=1).map(tuple)
aggregated_capacity = branch.groupby(sorted_buses)["capacity"].sum()
aggregated_branch = pd.DataFrame(
aggregated_capacity.index.tolist(), columns=["bus1", "bus2"]
).assign(capacity=aggregated_capacity.tolist())
graph = nx.convert_matrix.from_pandas_edgelist(
aggregated_branch, "bus1", "bus2", ["capacity"]
)
nx.set_node_attributes(graph, {k: {"demand": v} for k, v in demand.items()})
# Build block-cut graph of biconnected components
biconnected_components = list(
frozenset(s) for s in nx.algorithms.biconnected_components(graph)
)
articulation_points = list(nx.algorithms.articulation_points(graph))
bctree = nx.Graph()
bctree.add_nodes_from(articulation_points)
bctree.add_nodes_from(biconnected_components)
bctree.add_edges_from(
(a, b) for a in articulation_points for b in biconnected_components if a in b
)
# Using the largest component as the 'root', identify 'downstream' bottlenecks
if root is None:
root = frozenset(max(biconnected_components, key=len))
descendants = {}
find_descendants(graph, bctree, parent=root, grandparent=None, result=descendants)
# Filter out BC-tree node information, keep block/articulation pairs only
descendant_pairs = {k: v for k, v in descendants.items() if isinstance(k, tuple)}
# Identify pairs with capacity constraints
constrained_pairs = {
k: v for k, v in descendant_pairs.items() if v["demand"] > v["capacity"]
}
return {"all": descendant_pairs, "constrained": constrained_pairs, "root": root}


def report_bottlenecks(branch, bus, zone_demand):
"""Separate the full branch table by interconnect, and report bottlenecks for each.

:param pandas.DataFrame branch: full branch table.
:param pandas.DataFrame branch: full bus table.
:param dict/pandas.Series demand: mapping of demand for each bus.
"""
bus_demand = distribute_demand_from_zones_to_buses(zone_demand, bus).max()
bottlenecks = {
interconnect: identify_bottlenecks(filtered_branch, bus_demand)
for interconnect, filtered_branch in branch.rename(
{"rateA": "capacity"}, axis=1
).groupby("interconnect")
}
for interconnect, result in bottlenecks.items():
print("interconnect")
root = result["root"]
constrained = result["constrained"]
for (k1, k2), info in constrained.items():
k1 = "root" if k1 == root else k1 if isinstance(k1, int) else set(k1)
k2 = "root" if k2 == root else k2 if isinstance(k2, int) else set(k2)
print("from", k1, "to", f"{k2}:", info)
6 changes: 6 additions & 0 deletions prereise/gather/griddata/hifld/orchestration.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
build_solar,
build_wind,
)
from prereise.gather.griddata.hifld.data_process.topology import report_bottlenecks
from prereise.gather.griddata.hifld.data_process.transmission import build_transmission


Expand All @@ -34,6 +35,7 @@ def create_csvs(
hydro_kwargs,
solar_kwargs,
wind_kwargs,
zone_demand=None,
):
"""Process HIFLD source data to CSVs compatible with PowerSimData.

Expand All @@ -45,11 +47,15 @@ def create_csvs(
:func:`prereise.gather.solardata.nsrdb.sam.retrieve_data_individual`.
:param dict wind_kwargs: keyword arguments to pass to
:func:`prereise.gather.griddata.hifld.data_process.profiles.build_wind`.
:param pandas.DataFrame zone_demand: per-zone demand profiles, used to evaluate
local transmission constraints. If None, this step will be skipped.
"""
_check_profile_kwargs(
{"hydro": hydro_kwargs, "solar": solar_kwargs, "wind": wind_kwargs}
)
full_tables = create_grid(output_folder)
if zone_demand is not None:
report_bottlenecks(full_tables["branch"], full_tables["bus"], zone_demand)
create_profiles(
full_tables["plant"],
profile_year,
Expand Down