Skip to content

Commit 39058fb

Browse files
committed
refactor: map generators to substations using voltages
1 parent a0a3b8b commit 39058fb

File tree

1 file changed

+84
-50
lines changed

1 file changed

+84
-50
lines changed

prereise/gather/griddata/hifld/data_process/generators.py

+84-50
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from math import asin
2+
13
import numpy as np
24
import pandas as pd
35
from scipy.optimize import curve_fit
@@ -24,7 +26,9 @@ def floatify(value, default=float("nan")):
2426
return default
2527

2628

27-
def map_generators_to_sub_by_location(generators, substations, inplace=True):
29+
def map_generators_to_sub_by_location(
30+
generators, substations, inplace=True, report_worst=None
31+
):
2832
"""Determine the closest substation to each generator. For generators without
2933
latitude and longitude, an attempt will be made to match via ZIP code, and failing
3034
that a pandas.NA value will be returned.
@@ -37,17 +41,38 @@ def map_generators_to_sub_by_location(generators, substations, inplace=True):
3741
'sub_id' column or to return a new one. If ``inplace`` is `True`, entries in
3842
`generators` which have non-sensical combinations of 'state' and 'interconnect'
3943
columns will have their 'interconnect' entries modified.
44+
:param int report_worst: if not None, display the distances of the worst N mappings.
4045
:return: (*pandas.DataFrame/None*) -- if ``inplace`` is `False`, return the modified
4146
DataFrame; otherwise return nothing.
4247
"""
4348

44-
def get_sub_id_of_closest_substation(generator, state_trees, subs_state_lookup):
49+
def get_closest_substation(generator, state_trees, subs_state_lookup):
4550
if not isinstance(generator["xyz"], list):
4651
return pd.NA
47-
grouper_key = (generator["interconnect"], generator["state"])
48-
_, array_index = state_trees[grouper_key].query(generator["xyz"])
49-
sub_index = subs_state_lookup[grouper_key][array_index]
50-
return sub_index
52+
if pd.isnull(generator["voltage_class"]) or generator["Pmax"] < 100:
53+
grouper_key = generator["interconnect"]
54+
else:
55+
grouper_key = (generator["interconnect"], generator["voltage_class"])
56+
chord_dist, array_index = voltage_trees[grouper_key].query(generator["xyz"])
57+
sub_id = subs_voltage_lookup[grouper_key][array_index]
58+
# Translate chord distance (unit circle) to great circle distance (miles)
59+
dist_in_miles = 3963 * 2 * asin(chord_dist / 2) # use 3963 mi as earth radius
60+
return pd.Series({"dist": dist_in_miles, "sub_id": sub_id})
61+
62+
def classify_voltages(voltage, voltage_ranges):
63+
for v_range, bounds in voltage_ranges.items():
64+
if bounds["min"] <= voltage <= bounds["max"]:
65+
return v_range
66+
return float("nan")
67+
68+
voltage_ranges = {
69+
"under 100": {"min": 0, "max": 99},
70+
"100-161": {"min": 100, "max": 161},
71+
"220-287": {"min": 220, "max": 287},
72+
"345": {"min": 345, "max": 345},
73+
"500": {"min": 500, "max": 500},
74+
"735 and above": {"min": 735, "max": float("inf")},
75+
}
5176

5277
# Translate lat/lon to 3D positions (assume spherical earth, origin at center)
5378
substations_with_xyz = substations.assign(
@@ -64,68 +89,67 @@ def get_sub_id_of_closest_substation(generator, state_trees, subs_state_lookup):
6489
)
6590
)
6691

67-
# Group substations by state to build KDTrees
68-
subs_state_lookup = substations_with_xyz.groupby(["interconnect", "STATE"]).groups
92+
# Bin voltages into broad classes
93+
substations_with_xyz["voltage_class"] = substations["MAX_VOLT"].map(
94+
lambda x: classify_voltages(x, voltage_ranges)
95+
)
96+
generators_with_xyz["voltage_class"] = generators["Grid Voltage (kV)"].map(
97+
lambda x: classify_voltages(x, voltage_ranges)
98+
)
99+
100+
# Group substations by voltage to build KDTrees
101+
subs_voltage_lookup = {
102+
(interconnect, voltage_level): substations_with_xyz.query(
103+
"interconnect == @interconnect and MAX_VOLT >= @voltage_range['min']"
104+
).index
105+
for interconnect in generators["interconnect"].unique()
106+
for voltage_level, voltage_range in voltage_ranges.items()
107+
}
69108
# Group substations by ZIP code for a fallback for generators without coordinates
70109
subs_zip_groupby = substations_with_xyz.groupby(["interconnect", "ZIP"])
71110

72111
# Create a KDTree for each combination of state and interconnect
73-
state_trees = {
112+
voltage_trees = {
74113
key: KDTree(np.array(substations_with_xyz.loc[sub_ids, "xyz"].tolist()))
75-
for key, sub_ids in subs_state_lookup.items()
114+
for key, sub_ids in subs_voltage_lookup.items()
115+
if len(sub_ids) > 0
76116
}
77-
# Ensure that we have a tree for every generator
78-
gens_state_groupby = generators_with_xyz.groupby(["interconnect", "state"])
79-
missing_groups = set(gens_state_groupby.groups) - set(state_trees)
80-
if len(missing_groups) > 0:
81-
# There are some combinations of generator (interconnect, state) without subs
82-
allowable_border_states = {"KS", "NE", "OK"}
83-
for interconnect, state in missing_groups:
84-
if state in allowable_border_states:
85-
# Assume that the interconnect and state are correct
86-
print(
87-
f"no substations within ({interconnect}, {state}), "
88-
f"will map generators to substations within {interconnect} instead"
89-
)
90-
# Find all substations for the interconnection
91-
new_subs = substations_with_xyz.query("interconnect == @interconnect")
92-
# Extend the 'true' combinations of (interconnect, state) with fakes
93-
state_trees[(interconnect, state)] = KDTree(
94-
np.array(new_subs["xyz"].tolist())
95-
)
96-
subs_state_lookup[(interconnect, state)] = new_subs.index
97-
else:
98-
# Assume that the state is correct, the interconnect is wrong
99-
print(
100-
f"no substations within ({interconnect}, {state}), "
101-
f"will map generators to substations within {state} instead"
102-
)
103-
(assumed,) = {
104-
interconnect
105-
for interconnect, state_list in const.interconnect2state.items()
106-
if interconnect not in {"ignore", "split"} and state in state_list
107-
}
108-
gens_to_fix = gens_state_groupby.get_group((interconnect, state)).index
109-
generators_with_xyz.loc[gens_to_fix, "interconnect"] = assumed
117+
# Create a KDTree for each interconnect (all voltages)
118+
subs_interconnect_groupby = substations_with_xyz.groupby("interconnect")
119+
for interconnect in generators["interconnect"].unique():
120+
tree_subs = subs_interconnect_groupby.get_group(interconnect)
121+
voltage_trees[interconnect] = KDTree(np.array(tree_subs["xyz"].tolist()))
122+
subs_voltage_lookup[interconnect] = tree_subs.index
110123

111124
# Query the appropriate tree for each generator to get the closest substation ID
112-
sub_ids = generators_with_xyz.apply(
113-
lambda x: get_sub_id_of_closest_substation(x, state_trees, subs_state_lookup),
125+
mapping_results = generators_with_xyz.apply(
126+
lambda x: get_closest_substation(x, voltage_trees, subs_voltage_lookup),
114127
axis=1,
115128
)
116129
# For generators without coordinates, try to pick a substation with a matching ZIP
117-
for g in generators.loc[sub_ids.isnull()].index:
130+
for g in generators.loc[mapping_results["sub_id"].isnull()].index:
118131
try:
119132
candidates = subs_zip_groupby.get_group(generators.loc[g, "ZIP"])
120-
sub_ids.loc[g] = candidates.index[0] # arbitrary choose the first one
133+
# arbitrary choose the first one
134+
mapping_results.loc[g, "sub_id"] = candidates.index[0]
121135
except KeyError:
122136
continue # No coordinates, no matching ZIP, we're out of luck
123137

138+
if report_worst is not None:
139+
print(
140+
mapping_results.sort_values("sub_dist", ascending=False)
141+
.join(generators[["Plant Code", "Grid Voltage (kV)", "Pmax"]])
142+
.head(report_worst)
143+
)
144+
124145
if inplace:
125-
generators["sub_id"] = sub_ids
146+
generators["sub_id"] = mapping_results["sub_id"]
147+
generators["sub_dist"] = mapping_results["dist"]
126148
generators["interconnect"] = generators_with_xyz["interconnect"]
127149
else:
128-
return generators_with_xyz.drop("xyz", axis=1).assign(sub_id=sub_ids)
150+
return generators_with_xyz.drop(["xyz", "voltage_class"], axis=1).join(
151+
mapping_results
152+
)
129153

130154

131155
def map_generator_to_bus_by_sub(generator, bus_groupby):
@@ -375,7 +399,16 @@ def build_plant(bus, substations, kwargs={}):
375399
epa_ampd_groupby = epa_ampd.groupby(["ORISPL_CODE", "UNITID"])
376400

377401
# Add information to generators based on Form 860 Plant table
378-
generators = generators.merge(plants, on="Plant Code", suffixes=(None, "_860Plant"))
402+
# Merging this way allows column-on-column merge while preserving original index
403+
generators = (
404+
generators.reset_index()
405+
.merge(
406+
plants,
407+
on="Plant Code",
408+
suffixes=(None, "_860Plant"),
409+
)
410+
.set_index("index")
411+
)
379412
generators.rename(
380413
{"Latitude": "lat", "Longitude": "lon", "Zip": "ZIP"}, axis=1, inplace=True
381414
)
@@ -385,6 +418,7 @@ def build_plant(bus, substations, kwargs={}):
385418
.map(const.balancingauthority2interconnect)
386419
.combine_first(generators["NERC Region"].map(const.nercregion2interconnect))
387420
)
421+
generators["Grid Voltage (kV)"] = generators["Grid Voltage (kV)"].map(floatify)
388422

389423
# Ensure we have Pmax and Pmin for each generator
390424
generators["Pmax"] = generators[

0 commit comments

Comments
 (0)