1
+ from math import asin
2
+
1
3
import numpy as np
2
4
import pandas as pd
3
5
from scipy .optimize import curve_fit
@@ -24,7 +26,9 @@ def floatify(value, default=float("nan")):
24
26
return default
25
27
26
28
27
- def map_generators_to_sub_by_location (generators , substations , inplace = True ):
29
+ def map_generators_to_sub_by_location (
30
+ generators , substations , inplace = True , report_worst = None
31
+ ):
28
32
"""Determine the closest substation to each generator. For generators without
29
33
latitude and longitude, an attempt will be made to match via ZIP code, and failing
30
34
that a pandas.NA value will be returned.
@@ -37,17 +41,38 @@ def map_generators_to_sub_by_location(generators, substations, inplace=True):
37
41
'sub_id' column or to return a new one. If ``inplace`` is `True`, entries in
38
42
`generators` which have non-sensical combinations of 'state' and 'interconnect'
39
43
columns will have their 'interconnect' entries modified.
44
+ :param int report_worst: if not None, display the distances of the worst N mappings.
40
45
:return: (*pandas.DataFrame/None*) -- if ``inplace`` is `False`, return the modified
41
46
DataFrame; otherwise return nothing.
42
47
"""
43
48
44
- def get_sub_id_of_closest_substation (generator , state_trees , subs_state_lookup ):
49
+ def get_closest_substation (generator , state_trees , subs_state_lookup ):
45
50
if not isinstance (generator ["xyz" ], list ):
46
51
return pd .NA
47
- grouper_key = (generator ["interconnect" ], generator ["state" ])
48
- _ , array_index = state_trees [grouper_key ].query (generator ["xyz" ])
49
- sub_index = subs_state_lookup [grouper_key ][array_index ]
50
- return sub_index
52
+ if pd .isnull (generator ["voltage_class" ]) or generator ["Pmax" ] < 100 :
53
+ grouper_key = generator ["interconnect" ]
54
+ else :
55
+ grouper_key = (generator ["interconnect" ], generator ["voltage_class" ])
56
+ chord_dist , array_index = voltage_trees [grouper_key ].query (generator ["xyz" ])
57
+ sub_id = subs_voltage_lookup [grouper_key ][array_index ]
58
+ # Translate chord distance (unit circle) to great circle distance (miles)
59
+ dist_in_miles = 3963 * 2 * asin (chord_dist / 2 ) # use 3963 mi as earth radius
60
+ return pd .Series ({"dist" : dist_in_miles , "sub_id" : sub_id })
61
+
62
+ def classify_voltages (voltage , voltage_ranges ):
63
+ for v_range , bounds in voltage_ranges .items ():
64
+ if bounds ["min" ] <= voltage <= bounds ["max" ]:
65
+ return v_range
66
+ return float ("nan" )
67
+
68
+ voltage_ranges = {
69
+ "under 100" : {"min" : 0 , "max" : 99 },
70
+ "100-161" : {"min" : 100 , "max" : 161 },
71
+ "220-287" : {"min" : 220 , "max" : 287 },
72
+ "345" : {"min" : 345 , "max" : 345 },
73
+ "500" : {"min" : 500 , "max" : 500 },
74
+ "735 and above" : {"min" : 735 , "max" : float ("inf" )},
75
+ }
51
76
52
77
# Translate lat/lon to 3D positions (assume spherical earth, origin at center)
53
78
substations_with_xyz = substations .assign (
@@ -64,68 +89,67 @@ def get_sub_id_of_closest_substation(generator, state_trees, subs_state_lookup):
64
89
)
65
90
)
66
91
67
- # Group substations by state to build KDTrees
68
- subs_state_lookup = substations_with_xyz .groupby (["interconnect" , "STATE" ]).groups
92
+ # Bin voltages into broad classes
93
+ substations_with_xyz ["voltage_class" ] = substations ["MAX_VOLT" ].map (
94
+ lambda x : classify_voltages (x , voltage_ranges )
95
+ )
96
+ generators_with_xyz ["voltage_class" ] = generators ["Grid Voltage (kV)" ].map (
97
+ lambda x : classify_voltages (x , voltage_ranges )
98
+ )
99
+
100
+ # Group substations by voltage to build KDTrees
101
+ subs_voltage_lookup = {
102
+ (interconnect , voltage_level ): substations_with_xyz .query (
103
+ "interconnect == @interconnect and MAX_VOLT >= @voltage_range['min']"
104
+ ).index
105
+ for interconnect in generators ["interconnect" ].unique ()
106
+ for voltage_level , voltage_range in voltage_ranges .items ()
107
+ }
69
108
# Group substations by ZIP code for a fallback for generators without coordinates
70
109
subs_zip_groupby = substations_with_xyz .groupby (["interconnect" , "ZIP" ])
71
110
72
111
# Create a KDTree for each combination of state and interconnect
73
- state_trees = {
112
+ voltage_trees = {
74
113
key : KDTree (np .array (substations_with_xyz .loc [sub_ids , "xyz" ].tolist ()))
75
- for key , sub_ids in subs_state_lookup .items ()
114
+ for key , sub_ids in subs_voltage_lookup .items ()
115
+ if len (sub_ids ) > 0
76
116
}
77
- # Ensure that we have a tree for every generator
78
- gens_state_groupby = generators_with_xyz .groupby (["interconnect" , "state" ])
79
- missing_groups = set (gens_state_groupby .groups ) - set (state_trees )
80
- if len (missing_groups ) > 0 :
81
- # There are some combinations of generator (interconnect, state) without subs
82
- allowable_border_states = {"KS" , "NE" , "OK" }
83
- for interconnect , state in missing_groups :
84
- if state in allowable_border_states :
85
- # Assume that the interconnect and state are correct
86
- print (
87
- f"no substations within ({ interconnect } , { state } ), "
88
- f"will map generators to substations within { interconnect } instead"
89
- )
90
- # Find all substations for the interconnection
91
- new_subs = substations_with_xyz .query ("interconnect == @interconnect" )
92
- # Extend the 'true' combinations of (interconnect, state) with fakes
93
- state_trees [(interconnect , state )] = KDTree (
94
- np .array (new_subs ["xyz" ].tolist ())
95
- )
96
- subs_state_lookup [(interconnect , state )] = new_subs .index
97
- else :
98
- # Assume that the state is correct, the interconnect is wrong
99
- print (
100
- f"no substations within ({ interconnect } , { state } ), "
101
- f"will map generators to substations within { state } instead"
102
- )
103
- (assumed ,) = {
104
- interconnect
105
- for interconnect , state_list in const .interconnect2state .items ()
106
- if interconnect not in {"ignore" , "split" } and state in state_list
107
- }
108
- gens_to_fix = gens_state_groupby .get_group ((interconnect , state )).index
109
- generators_with_xyz .loc [gens_to_fix , "interconnect" ] = assumed
117
+ # Create a KDTree for each interconnect (all voltages)
118
+ subs_interconnect_groupby = substations_with_xyz .groupby ("interconnect" )
119
+ for interconnect in generators ["interconnect" ].unique ():
120
+ tree_subs = subs_interconnect_groupby .get_group (interconnect )
121
+ voltage_trees [interconnect ] = KDTree (np .array (tree_subs ["xyz" ].tolist ()))
122
+ subs_voltage_lookup [interconnect ] = tree_subs .index
110
123
111
124
# Query the appropriate tree for each generator to get the closest substation ID
112
- sub_ids = generators_with_xyz .apply (
113
- lambda x : get_sub_id_of_closest_substation (x , state_trees , subs_state_lookup ),
125
+ mapping_results = generators_with_xyz .apply (
126
+ lambda x : get_closest_substation (x , voltage_trees , subs_voltage_lookup ),
114
127
axis = 1 ,
115
128
)
116
129
# For generators without coordinates, try to pick a substation with a matching ZIP
117
- for g in generators .loc [sub_ids .isnull ()].index :
130
+ for g in generators .loc [mapping_results [ "sub_id" ] .isnull ()].index :
118
131
try :
119
132
candidates = subs_zip_groupby .get_group (generators .loc [g , "ZIP" ])
120
- sub_ids .loc [g ] = candidates .index [0 ] # arbitrary choose the first one
133
+ # arbitrary choose the first one
134
+ mapping_results .loc [g , "sub_id" ] = candidates .index [0 ]
121
135
except KeyError :
122
136
continue # No coordinates, no matching ZIP, we're out of luck
123
137
138
+ if report_worst is not None :
139
+ print (
140
+ mapping_results .sort_values ("sub_dist" , ascending = False )
141
+ .join (generators [["Plant Code" , "Grid Voltage (kV)" , "Pmax" ]])
142
+ .head (report_worst )
143
+ )
144
+
124
145
if inplace :
125
- generators ["sub_id" ] = sub_ids
146
+ generators ["sub_id" ] = mapping_results ["sub_id" ]
147
+ generators ["sub_dist" ] = mapping_results ["dist" ]
126
148
generators ["interconnect" ] = generators_with_xyz ["interconnect" ]
127
149
else :
128
- return generators_with_xyz .drop ("xyz" , axis = 1 ).assign (sub_id = sub_ids )
150
+ return generators_with_xyz .drop (["xyz" , "voltage_class" ], axis = 1 ).join (
151
+ mapping_results
152
+ )
129
153
130
154
131
155
def map_generator_to_bus_by_sub (generator , bus_groupby ):
@@ -375,7 +399,16 @@ def build_plant(bus, substations, kwargs={}):
375
399
epa_ampd_groupby = epa_ampd .groupby (["ORISPL_CODE" , "UNITID" ])
376
400
377
401
# Add information to generators based on Form 860 Plant table
378
- generators = generators .merge (plants , on = "Plant Code" , suffixes = (None , "_860Plant" ))
402
+ # Merging this way allows column-on-column merge while preserving original index
403
+ generators = (
404
+ generators .reset_index ()
405
+ .merge (
406
+ plants ,
407
+ on = "Plant Code" ,
408
+ suffixes = (None , "_860Plant" ),
409
+ )
410
+ .set_index ("index" )
411
+ )
379
412
generators .rename (
380
413
{"Latitude" : "lat" , "Longitude" : "lon" , "Zip" : "ZIP" }, axis = 1 , inplace = True
381
414
)
@@ -385,6 +418,7 @@ def build_plant(bus, substations, kwargs={}):
385
418
.map (const .balancingauthority2interconnect )
386
419
.combine_first (generators ["NERC Region" ].map (const .nercregion2interconnect ))
387
420
)
421
+ generators ["Grid Voltage (kV)" ] = generators ["Grid Voltage (kV)" ].map (floatify )
388
422
389
423
# Ensure we have Pmax and Pmin for each generator
390
424
generators ["Pmax" ] = generators [
0 commit comments