Skip to content

Commit 5949dd4

Browse files
authored
Added functionality for importing daily ERA5 data (#136)
Closes #136
1 parent f5d3b73 commit 5949dd4

5 files changed

Lines changed: 132 additions & 27 deletions

File tree

pygem/bin/run/run_calibration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -581,9 +581,9 @@ def run(list_packed_vars):
581581
gcm_elev = gcm.importGCMfxnearestneighbor_xarray(
582582
gcm.elev_fn, gcm.elev_vn, main_glac_rgi
583583
)
584-
# Lapse rate [degC m-1]
584+
# Lapse rate [degC m-1] (always monthly)
585585
gcm_lr, gcm_dates = gcm.importGCMvarnearestneighbor_xarray(
586-
gcm.lr_fn, gcm.lr_vn, main_glac_rgi, dates_table
586+
gcm.lr_fn, gcm.lr_vn, main_glac_rgi, dates_table, upscale_var_timestep=True
587587
)
588588

589589
# ===== LOOP THROUGH GLACIERS TO RUN CALIBRATION =====

pygem/class_climate.py

Lines changed: 122 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None):
111111
+ pygem_prms['climate']['paths']['cesm2_fp_fx_ending']
112112
)
113113
# Extra information
114-
self.timestep = pygem_prms['time']['timestep']
114+
# self.timestep = pygem_prms['time']['timestep']
115+
self.timestep = 'monthly' # future scenario is always monthly timestep
115116
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
116117
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
117118
self.sim_climate_scenario = sim_climate_scenario
@@ -164,7 +165,8 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None):
164165
+ pygem_prms['climate']['paths']['gfdl_fp_fx_ending']
165166
)
166167
# Extra information
167-
self.timestep = pygem_prms['time']['timestep']
168+
# self.timestep = pygem_prms['time']['timestep']
169+
self.timestep = 'monthly' # future scenario is always monthly timestep
168170
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
169171
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
170172
self.sim_climate_scenario = sim_climate_scenario
@@ -190,12 +192,18 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None):
190192
self.elev_fn = pygem_prms['climate']['paths']['era5_elev_fn']
191193
self.lr_fn = pygem_prms['climate']['paths']['era5_lr_fn']
192194
# Variable filepaths
193-
self.var_fp = (
194-
pygem_prms['root'] + pygem_prms['climate']['paths']['era5_relpath']
195-
)
196-
self.fx_fp = (
197-
pygem_prms['root'] + pygem_prms['climate']['paths']['era5_relpath']
198-
)
195+
if pygem_prms['climate']['paths']['era5_fullpath']:
196+
self.var_fp = ''
197+
self.fx_fp = ''
198+
else:
199+
self.var_fp = (
200+
pygem_prms['root']
201+
+ pygem_prms['climate']['paths']['era5_relpath']
202+
)
203+
self.fx_fp = (
204+
pygem_prms['root']
205+
+ pygem_prms['climate']['paths']['era5_relpath']
206+
)
199207
# Extra information
200208
self.timestep = pygem_prms['time']['timestep']
201209
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
@@ -295,7 +303,8 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None):
295303
+ '/'
296304
)
297305
# Extra information
298-
self.timestep = pygem_prms['time']['timestep']
306+
# self.timestep = pygem_prms['time']['timestep']
307+
self.timestep = 'monthly' # future scenario is always monthly timestep
299308
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
300309
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
301310
self.sim_climate_scenario = sim_climate_scenario
@@ -341,7 +350,8 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None):
341350
+ '/'
342351
)
343352
# Extra information
344-
self.timestep = pygem_prms['time']['timestep']
353+
# self.timestep = pygem_prms['time']['timestep']
354+
self.timestep = 'monthly' # future scenario is always monthly timestep
345355
self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname']
346356
self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname']
347357
self.sim_climate_scenario = sim_climate_scenario
@@ -443,6 +453,7 @@ def importGCMvarnearestneighbor_xarray(
443453
main_glac_rgi,
444454
dates_table,
445455
realizations=['r1i1p1f1', 'r4i1p1f1'],
456+
upscale_var_timestep=False,
446457
):
447458
"""
448459
Import time series of variables and extract nearest neighbor.
@@ -472,13 +483,99 @@ def importGCMvarnearestneighbor_xarray(
472483
timestep, i.e., be from the beginning/middle/end of month)
473484
"""
474485
# Import netcdf file
475-
if not os.path.exists(self.var_fp + filename):
476-
if os.path.exists(self.var_fp + filename.replace('r1i1p1f1', 'r4i1p1f1')):
477-
filename = filename.replace('r1i1p1f1', 'r4i1p1f1')
478-
if os.path.exists(self.var_fp + filename.replace('_native', '')):
479-
filename = filename.replace('_native', '')
486+
if self.timestep == 'monthly':
487+
if not os.path.exists(self.var_fp + filename):
488+
if os.path.exists(
489+
self.var_fp + filename.replace('r1i1p1f1', 'r4i1p1f1')
490+
):
491+
filename = filename.replace('r1i1p1f1', 'r4i1p1f1')
492+
if os.path.exists(self.var_fp + filename.replace('_native', '')):
493+
filename = filename.replace('_native', '')
494+
495+
data = xr.open_dataset(self.var_fp + filename)
496+
elif self.timestep == 'daily':
497+
year_start = pd.Timestamp(dates_table['date'].values[0]).year
498+
year_end = pd.Timestamp(dates_table['date'].values[-1]).year
499+
500+
lats = main_glac_rgi[self.rgi_lat_colname].values
501+
lons = main_glac_rgi[self.rgi_lon_colname].values
502+
# define lat/lon window around all glaciers in run (bounds expanded to nearest 0.25 degrees)
503+
min_lat, max_lat = np.floor(lats.min() * 4) / 4, np.ceil(lats.max() * 4) / 4
504+
min_lon, max_lon = np.floor(lons.min() * 4) / 4, np.ceil(lons.max() * 4) / 4
505+
if 'YYYY' in filename:
506+
datasets = []
507+
for yr in range(year_start, year_end + 1):
508+
data_yr = xr.open_dataset(
509+
self.var_fp + filename.replace('YYYY', str(yr))
510+
)
511+
if 'valid_time' in data_yr.coords or 'valid_time' in data_yr.dims:
512+
data_yr = data_yr.rename({'valid_time': self.time_vn})
513+
514+
# convert longitude from -180—180 to 0—360
515+
if data_yr.longitude.min() < 0:
516+
data_yr = data_yr.assign_coords(
517+
longitude=(data_yr.longitude % 360)
518+
)
519+
520+
# subset for desired lats and lons
521+
data_yr = data_yr.sel(
522+
latitude=slice(max_lat, min_lat),
523+
longitude=slice(min_lon, max_lon),
524+
)
525+
526+
# append data to list
527+
datasets.append(data_yr)
528+
529+
# combine along the time dimension
530+
data = xr.concat(datasets, dim=self.time_vn)
531+
532+
else:
533+
data = xr.open_dataset(self.var_fp + filename)
534+
data = data.sel(
535+
latitude=slice(max_lat, min_lat), longitude=slice(min_lon, max_lon)
536+
)
537+
538+
# mask out leap days
539+
if pygem_prms['time']['option_leapyear'] == 0 and not upscale_var_timestep:
540+
time_index = pd.to_datetime(data[self.time_vn].values)
541+
mask = ~((time_index.month == 2) & (time_index.day == 29))
542+
data = data.isel({self.time_vn: mask})
543+
544+
# Upscale timestep to match data (e.g., monthly lapserate for daily data)
545+
if upscale_var_timestep:
546+
# convert time to datetime
547+
time_monthly = pd.to_datetime(data[self.time_vn].values)
548+
var_monthly = data[vn] # xarray DataArray with dims (time, lat, lon)
549+
550+
# create empty DataArray for daily data
551+
daily_times = dates_table['date'].values
552+
daily_data = xr.DataArray(
553+
np.zeros(
554+
(len(daily_times), len(data.latitude), len(data.longitude))
555+
),
556+
dims=(self.time_vn, 'latitude', 'longitude'),
557+
coords={
558+
self.time_vn: daily_times,
559+
'latitude': data.latitude,
560+
'longitude': data.longitude,
561+
},
562+
name=vn,
563+
)
564+
565+
# loop through months and fill daily slots
566+
for i, t in enumerate(time_monthly):
567+
# find all days in this month
568+
idx = np.where(
569+
(dates_table['year'] == t.year)
570+
& (dates_table['month'] == t.month)
571+
)[0]
572+
573+
# assign monthly values to these daily indices
574+
daily_data[idx, :, :] = var_monthly.isel(time=i).values
575+
576+
# convert to Dataset with data variable vn
577+
data = daily_data.to_dataset(name=vn)
480578

481-
data = xr.open_dataset(self.var_fp + filename)
482579
glac_variable_series = np.zeros((main_glac_rgi.shape[0], dates_table.shape[0]))
483580

484581
# Check GCM provides required years of data
@@ -525,19 +622,22 @@ def importGCMvarnearestneighbor_xarray(
525622
pd.Series(data[self.time_vn]).apply(
526623
lambda x: x.strftime('%Y-%m-%d')
527624
)
528-
== dates_table['date'].apply(lambda x: x.strftime('%Y-%m-%d'))[0]
625+
== dates_table['date']
626+
.apply(lambda x: x.strftime('%Y-%m-%d'))
627+
.iloc[0]
529628
)
530629
)[0][0]
531630
end_idx = (
532631
np.where(
533632
pd.Series(data[self.time_vn]).apply(
534633
lambda x: x.strftime('%Y-%m-%d')
535634
)
536-
== dates_table['date'].apply(lambda x: x.strftime('%Y-%m-%d'))[
537-
dates_table.shape[0] - 1
538-
]
635+
== dates_table['date']
636+
.apply(lambda x: x.strftime('%Y-%m-%d'))
637+
.iloc[-1]
539638
)
540639
)[0][0]
640+
541641
# Extract the time series
542642
time_series = pd.Series(data[self.time_vn][start_idx : end_idx + 1])
543643
# Find Nearest Neighbor
@@ -577,6 +677,7 @@ def importGCMvarnearestneighbor_xarray(
577677
# Find unique latitude/longitudes
578678
latlon_nearidx = list(zip(lat_nearidx, lon_nearidx))
579679
latlon_nearidx_unique = list(set(latlon_nearidx))
680+
580681
# Create dictionary of time series for each unique latitude/longitude
581682
glac_variable_dict = {}
582683
for latlon in latlon_nearidx_unique:
@@ -634,4 +735,5 @@ def importGCMvarnearestneighbor_xarray(
634735
)
635736
elif vn != self.lr_vn:
636737
print('Check units of air temperature or precipitation')
738+
637739
return glac_variable_series, time_series

pygem/pygem_modelsetup.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,9 @@ def datesmodelrun(
100100
dates_table['month'] = dates_table['date'].dt.month
101101
dates_table['day'] = dates_table['date'].dt.day
102102
dates_table['daysinmonth'] = dates_table['date'].dt.daysinmonth
103+
dates_table['timestep'] = np.arange(len(dates_table['date']))
103104
# Set date as index
104-
dates_table.set_index('date', inplace=True)
105+
dates_table.set_index('timestep', inplace=True)
105106
# Remove leap year days if user selected this with option_leapyear
106107
if pygem_prms['time']['option_leapyear'] == 0:
107108
# First, change 'daysinmonth' number
@@ -119,9 +120,8 @@ def datesmodelrun(
119120
# Water year for northern hemisphere using USGS definition (October 1 - September 30th),
120121
# e.g., water year for 2000 is from October 1, 1999 - September 30, 2000
121122
dates_table['wateryear'] = dates_table['year']
122-
for step in range(dates_table.shape[0]):
123-
if dates_table.loc[step, 'month'] >= 10:
124-
dates_table.loc[step, 'wateryear'] = dates_table.loc[step, 'year'] + 1
123+
dates_table.loc[dates_table['month'] >= 10, 'wateryear'] = dates_table['year'] + 1
124+
125125
# Add column for seasons
126126
# create a season dictionary to assist groupby functions
127127
seasondict = {}

pygem/setup/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def _validate_config(self, config):
157157
'climate.sim_wateryear': str,
158158
'climate.constantarea_years': int,
159159
'climate.paths': dict,
160+
'climate.paths.era5_fullpath': bool,
160161
'climate.paths.era5_relpath': str,
161162
'climate.paths.era5_temp_fn': str,
162163
'climate.paths.era5_tempstd_fn': str,

pygem/setup/config.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,15 @@ climate:
6666
# ===== CLIMATE DATA FILEPATHS AND FILENAMES =====
6767
paths:
6868
# ERA5 (default reference climate data)
69+
era5_fullpath: False # bool. 'True' ignores 'root' and 'era5_relpath' for ERA5 data and assumes below filenames are absolute
6970
era5_relpath: /climate_data/ERA5/
7071
era5_temp_fn: ERA5_temp_monthly.nc
7172
era5_tempstd_fn: ERA5_tempstd_monthly.nc
7273
era5_prec_fn: ERA5_totalprecip_monthly.nc
7374
era5_elev_fn: ERA5_geopotential.nc
7475
era5_pressureleveltemp_fn: ERA5_pressureleveltemp_monthly.nc
7576
era5_lr_fn: ERA5_lapserates_monthly.nc
77+
7678
# CMIP5 (GCM data)
7779
cmip5_relpath: /climate_data/cmip5/
7880
cmip5_fp_var_ending: _r1i1p1_monNG/
@@ -324,7 +326,7 @@ time:
324326
wateryear_month_start: 10 # water year starting month
325327
winter_month_start: 10 # first month of winter (for HMA winter is October 1 - April 30)
326328
summer_month_start: 5 # first month of summer (for HMA summer is May 1 - Sept 30)
327-
timestep: monthly # time step ('monthly' only option at present)
329+
timestep: monthly # time step ('monthly' or 'daily')
328330

329331
# ===== MODEL CONSTANTS =====
330332
constants:

0 commit comments

Comments
 (0)