diff --git a/pygem/bin/run/run_calibration.py b/pygem/bin/run/run_calibration.py index f2d623f2..be0f01ed 100755 --- a/pygem/bin/run/run_calibration.py +++ b/pygem/bin/run/run_calibration.py @@ -581,9 +581,9 @@ def run(list_packed_vars): gcm_elev = gcm.importGCMfxnearestneighbor_xarray( gcm.elev_fn, gcm.elev_vn, main_glac_rgi ) - # Lapse rate [degC m-1] + # Lapse rate [degC m-1] (always monthly) gcm_lr, gcm_dates = gcm.importGCMvarnearestneighbor_xarray( - gcm.lr_fn, gcm.lr_vn, main_glac_rgi, dates_table + gcm.lr_fn, gcm.lr_vn, main_glac_rgi, dates_table, upscale_var_timestep=True ) # ===== LOOP THROUGH GLACIERS TO RUN CALIBRATION ===== diff --git a/pygem/class_climate.py b/pygem/class_climate.py index 300d13d5..d2564eff 100755 --- a/pygem/class_climate.py +++ b/pygem/class_climate.py @@ -111,7 +111,8 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None): + pygem_prms['climate']['paths']['cesm2_fp_fx_ending'] ) # Extra information - self.timestep = pygem_prms['time']['timestep'] + # self.timestep = pygem_prms['time']['timestep'] + self.timestep = 'monthly' # future scenario is always monthly timestep self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname'] self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname'] self.sim_climate_scenario = sim_climate_scenario @@ -164,7 +165,8 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None): + pygem_prms['climate']['paths']['gfdl_fp_fx_ending'] ) # Extra information - self.timestep = pygem_prms['time']['timestep'] + # self.timestep = pygem_prms['time']['timestep'] + self.timestep = 'monthly' # future scenario is always monthly timestep self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname'] self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname'] self.sim_climate_scenario = sim_climate_scenario @@ -190,12 +192,18 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None): self.elev_fn = pygem_prms['climate']['paths']['era5_elev_fn'] self.lr_fn = pygem_prms['climate']['paths']['era5_lr_fn'] # Variable filepaths - self.var_fp = ( - pygem_prms['root'] + pygem_prms['climate']['paths']['era5_relpath'] - ) - self.fx_fp = ( - pygem_prms['root'] + pygem_prms['climate']['paths']['era5_relpath'] - ) + if pygem_prms['climate']['paths']['era5_fullpath']: + self.var_fp = '' + self.fx_fp = '' + else: + self.var_fp = ( + pygem_prms['root'] + + pygem_prms['climate']['paths']['era5_relpath'] + ) + self.fx_fp = ( + pygem_prms['root'] + + pygem_prms['climate']['paths']['era5_relpath'] + ) # Extra information self.timestep = pygem_prms['time']['timestep'] self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname'] @@ -295,7 +303,8 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None): + '/' ) # Extra information - self.timestep = pygem_prms['time']['timestep'] + # self.timestep = pygem_prms['time']['timestep'] + self.timestep = 'monthly' # future scenario is always monthly timestep self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname'] self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname'] self.sim_climate_scenario = sim_climate_scenario @@ -341,7 +350,8 @@ def __init__(self, name=str(), sim_climate_scenario=str(), realization=None): + '/' ) # Extra information - self.timestep = pygem_prms['time']['timestep'] + # self.timestep = pygem_prms['time']['timestep'] + self.timestep = 'monthly' # future scenario is always monthly timestep self.rgi_lat_colname = pygem_prms['rgi']['rgi_lat_colname'] self.rgi_lon_colname = pygem_prms['rgi']['rgi_lon_colname'] self.sim_climate_scenario = sim_climate_scenario @@ -443,6 +453,7 @@ def importGCMvarnearestneighbor_xarray( main_glac_rgi, dates_table, realizations=['r1i1p1f1', 'r4i1p1f1'], + upscale_var_timestep=False, ): """ Import time series of variables and extract nearest neighbor. @@ -472,13 +483,99 @@ def importGCMvarnearestneighbor_xarray( timestep, i.e., be from the beginning/middle/end of month) """ # Import netcdf file - if not os.path.exists(self.var_fp + filename): - if os.path.exists(self.var_fp + filename.replace('r1i1p1f1', 'r4i1p1f1')): - filename = filename.replace('r1i1p1f1', 'r4i1p1f1') - if os.path.exists(self.var_fp + filename.replace('_native', '')): - filename = filename.replace('_native', '') + if self.timestep == 'monthly': + if not os.path.exists(self.var_fp + filename): + if os.path.exists( + self.var_fp + filename.replace('r1i1p1f1', 'r4i1p1f1') + ): + filename = filename.replace('r1i1p1f1', 'r4i1p1f1') + if os.path.exists(self.var_fp + filename.replace('_native', '')): + filename = filename.replace('_native', '') + + data = xr.open_dataset(self.var_fp + filename) + elif self.timestep == 'daily': + year_start = pd.Timestamp(dates_table['date'].values[0]).year + year_end = pd.Timestamp(dates_table['date'].values[-1]).year + + lats = main_glac_rgi[self.rgi_lat_colname].values + lons = main_glac_rgi[self.rgi_lon_colname].values + # define lat/lon window around all glaciers in run (bounds expanded to nearest 0.25 degrees) + min_lat, max_lat = np.floor(lats.min() * 4) / 4, np.ceil(lats.max() * 4) / 4 + min_lon, max_lon = np.floor(lons.min() * 4) / 4, np.ceil(lons.max() * 4) / 4 + if 'YYYY' in filename: + datasets = [] + for yr in range(year_start, year_end + 1): + data_yr = xr.open_dataset( + self.var_fp + filename.replace('YYYY', str(yr)) + ) + if 'valid_time' in data_yr.coords or 'valid_time' in data_yr.dims: + data_yr = data_yr.rename({'valid_time': self.time_vn}) + + # convert longitude from -180—180 to 0—360 + if data_yr.longitude.min() < 0: + data_yr = data_yr.assign_coords( + longitude=(data_yr.longitude % 360) + ) + + # subset for desired lats and lons + data_yr = data_yr.sel( + latitude=slice(max_lat, min_lat), + longitude=slice(min_lon, max_lon), + ) + + # append data to list + datasets.append(data_yr) + + # combine along the time dimension + data = xr.concat(datasets, dim=self.time_vn) + + else: + data = xr.open_dataset(self.var_fp + filename) + data = data.sel( + latitude=slice(max_lat, min_lat), longitude=slice(min_lon, max_lon) + ) + + # mask out leap days + if pygem_prms['time']['option_leapyear'] == 0 and not upscale_var_timestep: + time_index = pd.to_datetime(data[self.time_vn].values) + mask = ~((time_index.month == 2) & (time_index.day == 29)) + data = data.isel({self.time_vn: mask}) + + # Upscale timestep to match data (e.g., monthly lapserate for daily data) + if upscale_var_timestep: + # convert time to datetime + time_monthly = pd.to_datetime(data[self.time_vn].values) + var_monthly = data[vn] # xarray DataArray with dims (time, lat, lon) + + # create empty DataArray for daily data + daily_times = dates_table['date'].values + daily_data = xr.DataArray( + np.zeros( + (len(daily_times), len(data.latitude), len(data.longitude)) + ), + dims=(self.time_vn, 'latitude', 'longitude'), + coords={ + self.time_vn: daily_times, + 'latitude': data.latitude, + 'longitude': data.longitude, + }, + name=vn, + ) + + # loop through months and fill daily slots + for i, t in enumerate(time_monthly): + # find all days in this month + idx = np.where( + (dates_table['year'] == t.year) + & (dates_table['month'] == t.month) + )[0] + + # assign monthly values to these daily indices + daily_data[idx, :, :] = var_monthly.isel(time=i).values + + # convert to Dataset with data variable vn + data = daily_data.to_dataset(name=vn) - data = xr.open_dataset(self.var_fp + filename) glac_variable_series = np.zeros((main_glac_rgi.shape[0], dates_table.shape[0])) # Check GCM provides required years of data @@ -525,7 +622,9 @@ def importGCMvarnearestneighbor_xarray( pd.Series(data[self.time_vn]).apply( lambda x: x.strftime('%Y-%m-%d') ) - == dates_table['date'].apply(lambda x: x.strftime('%Y-%m-%d'))[0] + == dates_table['date'] + .apply(lambda x: x.strftime('%Y-%m-%d')) + .iloc[0] ) )[0][0] end_idx = ( @@ -533,11 +632,12 @@ def importGCMvarnearestneighbor_xarray( pd.Series(data[self.time_vn]).apply( lambda x: x.strftime('%Y-%m-%d') ) - == dates_table['date'].apply(lambda x: x.strftime('%Y-%m-%d'))[ - dates_table.shape[0] - 1 - ] + == dates_table['date'] + .apply(lambda x: x.strftime('%Y-%m-%d')) + .iloc[-1] ) )[0][0] + # Extract the time series time_series = pd.Series(data[self.time_vn][start_idx : end_idx + 1]) # Find Nearest Neighbor @@ -577,6 +677,7 @@ def importGCMvarnearestneighbor_xarray( # Find unique latitude/longitudes latlon_nearidx = list(zip(lat_nearidx, lon_nearidx)) latlon_nearidx_unique = list(set(latlon_nearidx)) + # Create dictionary of time series for each unique latitude/longitude glac_variable_dict = {} for latlon in latlon_nearidx_unique: @@ -634,4 +735,5 @@ def importGCMvarnearestneighbor_xarray( ) elif vn != self.lr_vn: print('Check units of air temperature or precipitation') + return glac_variable_series, time_series diff --git a/pygem/pygem_modelsetup.py b/pygem/pygem_modelsetup.py index 090fa8c6..d47894ec 100755 --- a/pygem/pygem_modelsetup.py +++ b/pygem/pygem_modelsetup.py @@ -100,8 +100,9 @@ def datesmodelrun( dates_table['month'] = dates_table['date'].dt.month dates_table['day'] = dates_table['date'].dt.day dates_table['daysinmonth'] = dates_table['date'].dt.daysinmonth + dates_table['timestep'] = np.arange(len(dates_table['date'])) # Set date as index - dates_table.set_index('date', inplace=True) + dates_table.set_index('timestep', inplace=True) # Remove leap year days if user selected this with option_leapyear if pygem_prms['time']['option_leapyear'] == 0: # First, change 'daysinmonth' number @@ -119,9 +120,8 @@ def datesmodelrun( # Water year for northern hemisphere using USGS definition (October 1 - September 30th), # e.g., water year for 2000 is from October 1, 1999 - September 30, 2000 dates_table['wateryear'] = dates_table['year'] - for step in range(dates_table.shape[0]): - if dates_table.loc[step, 'month'] >= 10: - dates_table.loc[step, 'wateryear'] = dates_table.loc[step, 'year'] + 1 + dates_table.loc[dates_table['month'] >= 10, 'wateryear'] = dates_table['year'] + 1 + # Add column for seasons # create a season dictionary to assist groupby functions seasondict = {} diff --git a/pygem/setup/config.py b/pygem/setup/config.py index 86d28094..608daf7b 100644 --- a/pygem/setup/config.py +++ b/pygem/setup/config.py @@ -157,6 +157,7 @@ def _validate_config(self, config): 'climate.sim_wateryear': str, 'climate.constantarea_years': int, 'climate.paths': dict, + 'climate.paths.era5_fullpath': bool, 'climate.paths.era5_relpath': str, 'climate.paths.era5_temp_fn': str, 'climate.paths.era5_tempstd_fn': str, diff --git a/pygem/setup/config.yaml b/pygem/setup/config.yaml index 6bb8e578..9755acee 100644 --- a/pygem/setup/config.yaml +++ b/pygem/setup/config.yaml @@ -66,6 +66,7 @@ climate: # ===== CLIMATE DATA FILEPATHS AND FILENAMES ===== paths: # ERA5 (default reference climate data) + era5_fullpath: False # bool. 'True' ignores 'root' and 'era5_relpath' for ERA5 data and assumes below filenames are absolute era5_relpath: /climate_data/ERA5/ era5_temp_fn: ERA5_temp_monthly.nc era5_tempstd_fn: ERA5_tempstd_monthly.nc @@ -73,6 +74,7 @@ climate: era5_elev_fn: ERA5_geopotential.nc era5_pressureleveltemp_fn: ERA5_pressureleveltemp_monthly.nc era5_lr_fn: ERA5_lapserates_monthly.nc + # CMIP5 (GCM data) cmip5_relpath: /climate_data/cmip5/ cmip5_fp_var_ending: _r1i1p1_monNG/ @@ -324,7 +326,7 @@ time: wateryear_month_start: 10 # water year starting month winter_month_start: 10 # first month of winter (for HMA winter is October 1 - April 30) summer_month_start: 5 # first month of summer (for HMA summer is May 1 - Sept 30) - timestep: monthly # time step ('monthly' only option at present) + timestep: monthly # time step ('monthly' or 'daily') # ===== MODEL CONSTANTS ===== constants: