diff --git a/AUTHORS.rst b/AUTHORS.rst index a5fa30c1..2bd4eca4 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -17,3 +17,4 @@ Contributors * Sébastien Biner `@sbiner `_ * David Huard `@huard `_ * Gabriel Rondeau-Genesse `@RondeauG `_ +* Aslı Beşe `@aslibese `_ diff --git a/src/miranda/convert/_data_corrections.py b/src/miranda/convert/_data_corrections.py index 0ef24be9..53f2bac0 100644 --- a/src/miranda/convert/_data_corrections.py +++ b/src/miranda/convert/_data_corrections.py @@ -342,6 +342,14 @@ def _preprocess_correct(d: xr.Dataset, *, ops: list[partial]) -> xr.Dataset: return ds +def _correct_standard_names(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: + key = "_corrected_standard_name" + for var, val in _iter_entry_key(d, m, "variables", key, p): + if val: + d[var].attrs["standard_name"] = val + return d + + def _correct_units_names(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: key = "_corrected_units" for var, val in _iter_entry_key(d, m, "variables", key, p): @@ -450,53 +458,55 @@ def _transform(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: return d_out -def _offset_time(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: - key = "_offset_time" - d_out = xr.Dataset(coords=d.coords, attrs=d.attrs) - converted = [] - offset, offset_meaning = None, None - - time_freq = dict() - expected_period = _get_section_entry_key( - m, "dimensions", "time", "_ensure_correct_time", p - ) - if isinstance(expected_period, str): - time_freq["expected_period"] = expected_period - - for vv, offs in _iter_entry_key(d, m, "dimensions", key, p): - if offs: - # Offset time by value of one time-step - if offset is None and offset_meaning is None: - try: - offset, offset_meaning = get_time_frequency(d, **time_freq) - except TypeError: - logging.error( - "Unable to parse the time frequency. Verify data integrity before retrying." - ) - raise - - msg = f"Offsetting data for `{vv}` by `{offset[0]} {offset_meaning}(s)`." - - logging.info(msg) - with xr.set_options(keep_attrs=True): - out = d[vv] - out["time"] = out.time - np.timedelta64(offset[0], offset[1]) - d_out[vv] = out - converted.append(vv) - prev_history = d.attrs.get("history", "") - history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}" - d_out.attrs.update(dict(history=history)) - elif offs is False: - msg = f"No time offsetting needed for `{vv}` in `{p}` (Explicitly set to False)." - - logging.info(msg) - continue - - # Copy unconverted variables - for vv in d.data_vars: - if vv not in converted: - d_out[vv] = d[vv] - return d_out +# TODO: Determine if this function is still needed + +# def _offset_time(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: +# key = "_offset_time" +# d_out = xr.Dataset(coords=d.coords, attrs=d.attrs) +# converted = [] +# offset, offset_meaning = None, None +# +# time_freq = dict() +# expected_period = _get_section_entry_key( +# m, "dimensions", "time", "_ensure_correct_time", p +# ) +# if isinstance(expected_period, str): +# time_freq["expected_period"] = expected_period +# +# for vv, offs in _iter_entry_key(d, m, "dimensions", key, p): +# if offs: +# # Offset time by value of one time-step +# if offset is None and offset_meaning is None: +# try: +# offset, offset_meaning = get_time_frequency(d, **time_freq) +# except TypeError: +# logging.error( +# "Unable to parse the time frequency. Verify data integrity before retrying." +# ) +# raise +# +# msg = f"Offsetting data for `{vv}` by `{offset[0]} {offset_meaning}(s)`." +# +# logging.info(msg) +# with xr.set_options(keep_attrs=True): +# out = d[vv] +# out["time"] = out.time - np.timedelta64(offset[0], offset[1]) +# d_out[vv] = out +# converted.append(vv) +# prev_history = d.attrs.get("history", "") +# history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}" +# d_out.attrs.update(dict(history=history)) +# elif offs is False: +# msg = f"No time offsetting needed for `{vv}` in `{p}` (Explicitly set to False)." +# +# logging.info(msg) +# continue +# +# # Copy unconverted variables +# for vv in d.data_vars: +# if vv not in converted: +# d_out[vv] = d[vv] +# return d_out def _invert_sign(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: @@ -536,7 +546,7 @@ def _units_cf_conversion(d: xr.Dataset, m: dict) -> xr.Dataset: for vv, unit in _iter_entry_key(d, m, "variables", "units", None): if unit: with xr.set_options(keep_attrs=True): - d[vv] = units.convert_units_to(d[vv], unit, context="hydro") + d[vv] = units.convert_units_to(d[vv], unit) prev_history = d.attrs.get("history", "") history = f"Converted variable `{vv}` to CF-compliant units (`{unit}`). {prev_history}" d.attrs.update(dict(history=history)) @@ -888,6 +898,7 @@ def dataset_corrections(ds: xr.Dataset, project: str) -> xr.Dataset: metadata_definition = load_json_data_mappings(project) ds = _correct_units_names(ds, project, metadata_definition) + ds = _correct_standard_names(ds, project, metadata_definition) ds = _transform(ds, project, metadata_definition) ds = _invert_sign(ds, project, metadata_definition) ds = _units_cf_conversion(ds, metadata_definition) @@ -895,10 +906,9 @@ def dataset_corrections(ds: xr.Dataset, project: str) -> xr.Dataset: ds = dims_conversion(ds, project, metadata_definition) ds = _ensure_correct_time(ds, project, metadata_definition) - ds = _offset_time(ds, project, metadata_definition) - + # TODO validate this is needed + # ds = _offset_time(ds, project, metadata_definition) ds = variable_conversion(ds, project, metadata_definition) - ds = metadata_conversion(ds, project, metadata_definition) ds.attrs["history"] = ( diff --git a/src/miranda/convert/data/ecmwf_cf_attrs.json b/src/miranda/convert/data/ecmwf_cf_attrs.json index 1c080ac3..c8aa133a 100644 --- a/src/miranda/convert/data/ecmwf_cf_attrs.json +++ b/src/miranda/convert/data/ecmwf_cf_attrs.json @@ -80,7 +80,6 @@ "_cf_variable_name": "tdps", "_corrected_units": false, "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "This parameter is the temperature to which the air, at 2 metres above the surface of the Earth, would have to be cooled for saturation to occur. It is a measure of the humidity of the air. Combined with temperature and pressure, it can be used to calculate the relative humidity. 2m dew point temperature is calculated by interpolating between the lowest model level and the Earth's surface, taking account of the atmospheric conditions.", @@ -92,7 +91,6 @@ "_cf_variable_name": "psl", "_corrected_units": false, "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: mean (interval: 1 hour)", "description": "This parameter is the pressure (force per unit area) of the atmosphere adjusted to the height of mean sea level. It is a measure of the weight that all the air in a column vertically above the area of Earth's surface would have at that point, if the point were located at the mean sea level. It is calculated over all surfaces - land, sea and in-land water.", @@ -103,19 +101,17 @@ "pev": { "_cf_variable_name": "evspsblpot", "_corrected_units": false, + "_corrected_standard_name": "lwe_thickness_of_water_evaporation_amount", "_invert_sign": { "era5-land": true, "era5-single-levels": true, - "era5-single-levels-preliminary-back-extension": true - }, - "_offset_time": { - "era5-land": true, - "era5-single-levels": true, + "era5-single-levels-monthly-means": true, "era5-single-levels-preliminary-back-extension": true }, "_transformation": { "era5-land": "deaccumulate", "era5-single-levels": "amount2rate", + "era5-single-levels-monthly-means": "amount2rate", "era5-single-levels-preliminary-back-extension": "amount2rate" }, "cell_methods": "time: mean (interval: 1 hour)", @@ -128,7 +124,6 @@ "ptype": { "_cf_variable_name": "prtype", "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "This parameter describes the type of precipitation at the surface, at the specified time. A precipitation type is assigned wherever there is a non-zero value of precipitation. In the ECMWF Integrated Forecasting System (IFS) there are only two predicted precipitation variables: rain and snow. Precipitation type is derived from these two predicted variables in combination with atmospheric conditions, such as temperature. Values of precipitation type defined in the IFS: 0: No precipitation, 1: Rain, 3: Freezing rain (i.e. supercooled raindrops which freeze on contact with the ground and other surfaces), 5: Snow, 6: Wet snow (i.e. snow particles which are starting to melt); 7: Mixture of rain and snow, 8: Ice pellets. These precipitation types are consistent with WMO Code Table 4.201. Other types in this WMO table are not defined in the IFS.", @@ -140,7 +135,6 @@ "_cf_variable_name": "hus", "_corrected_units": 1, "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "The total mass of moist air is the sum of the dry air, water vapour, cloud liquid, cloud ice, rain and falling snow.", @@ -151,7 +145,6 @@ "r": { "_cf_variable_name": "hur", "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "This parameter is the water vapour pressure as a percentage of the value at which the air becomes saturated (the point at which water vapour begins to condense into liquid water or deposition into ice). For temperatures over 0°C (273.15 K) it is calculated for saturation over water. At temperatures below -23°C it is calculated for saturation over ice. Between -23°C and 0°C this parameter is calculated by interpolating between the ice and water values using a quadratic function.", @@ -163,7 +156,6 @@ "_cf_variable_name": "snr", "_corrected_units": "kg m-3", "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "The ECMWF Integrated Forecasting System (IFS) represents snow as a single additional layer over the uppermost soil level. The snow may cover all or part of the grid box.", @@ -176,7 +168,6 @@ "_cf_variable_name": "snw", "_corrected_units": "Mg m-2", "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "comment": "Liquid water equivalent of snow converted to snow amount using a water density of 1000 kg/m³.", @@ -191,7 +182,6 @@ "era5-land": "m" }, "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "Snow thickness in m of liquid water equivalent converted to snow thickness using a water density of 1000 kg/m³ and a snow density of 300 kg/m³.", @@ -208,15 +198,12 @@ } }, "_corrected_units": "m", + "_corrected_standard_name": "lwe_thickness_of_precipitation_amount", "_invert_sign": false, - "_offset_time": { - "era5-land": true, - "era5-single-levels": true, - "era5-single-levels-preliminary-back-extension": true - }, "_transformation": { "era5-land": "deaccumulate", "era5-single-levels": "amount2rate", + "era5-single-levels-monthly-means": "amount2rate", "era5-single-levels-preliminary-back-extension": "amount2rate" }, "cell_methods": "time: mean (interval: 1 hour)", @@ -234,11 +221,6 @@ "era5-single-levels": true, "era5-single-levels-preliminary-back-extension": true }, - "_offset_time": { - "era5-land": true, - "era5-single-levels": true, - "era5-single-levels-preliminary-back-extension": true - }, "_transformation": { "era5-land": "deaccumulate", "era5-single-levels": "amount2rate", @@ -255,7 +237,6 @@ "_cf_variable_name": "ps", "_corrected_units": false, "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "This parameter is the pressure (force per unit area) of the atmosphere on the surface of land, sea and in-land water. It is a measure of the weight of all the air in a column vertically above the area of the Earth's surface represented at a fixed point.", @@ -271,11 +252,6 @@ "era5-single-levels": true, "era5-single-levels-preliminary-back-extension": true }, - "_offset_time": { - "era5-land": true, - "era5-single-levels": true, - "era5-single-levels-preliminary-back-extension": true - }, "_transformation": { "era5-land": "deaccumulate", "era5-single-levels": "amount2rate", @@ -292,11 +268,6 @@ "_cf_variable_name": "rss", "_corrected_units": "J m-2", "_invert_sign": false, - "_offset_time": { - "era5-land": true, - "era5-single-levels": true, - "era5-single-levels-preliminary-back-extension": true - }, "_transformation": { "era5-land": "deaccumulate", "era5-single-levels": "amount2rate", @@ -319,11 +290,6 @@ }, "_corrected_units": "J m-2", "_invert_sign": false, - "_offset_time": { - "era5-land": true, - "era5-single-levels": true, - "era5-single-levels-preliminary-back-extension": true - }, "_transformation": { "era5-land": "deaccumulate", "era5-single-levels": "amount2rate", @@ -340,7 +306,6 @@ "_cf_variable_name": "rls", "_corrected_units": "J m-2", "_invert_sign": false, - "_offset_time": true, "_transformation": { "era5-land": "deaccumulate", "era5-single-levels": "amount2rate", @@ -363,11 +328,6 @@ }, "_corrected_units": "J m-2", "_invert_sign": false, - "_offset_time": { - "era5-land": true, - "era5-single-levels": true, - "era5-single-levels-preliminary-back-extension": true - }, "_transformation": { "era5-land": "deaccumulate", "era5-single-levels": "amount2rate", @@ -384,7 +344,6 @@ "_cf_variable_name": "mrsolv1", "_corrected_units": "1", "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "This parameter is the volume of water in soil layer 1 (0 - 7cm, the surface is at 0cm).", @@ -396,7 +355,6 @@ "_cf_variable_name": "mrsolv2", "_corrected_units": "1", "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "This parameter is the volume of water in soil layer 2 (7 - 28cm, the surface is at 0cm).", @@ -408,7 +366,6 @@ "_cf_variable_name": "mrsolv3", "_corrected_units": "1", "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "This parameter is the volume of water in soil layer 3 (28 - 100cm, the surface is at 0cm).", @@ -420,7 +377,6 @@ "_cf_variable_name": "mrsolv4", "_corrected_units": "1", "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "This parameter is the volume of water in soil layer 4 (100 - 289cm, the surface is at 0cm).", @@ -432,7 +388,6 @@ "_cf_variable_name": "tas", "_corrected_units": false, "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "description": "This parameter is the temperature of air at 2m above the surface of land, sea or in-land waters. 2m temperature is calculated by interpolating between the lowest model level and the Earth's surface, taking account of the atmospheric conditions.", @@ -449,11 +404,12 @@ } }, "_corrected_units": false, + "_corrected_standard_name": "lwe_thickness_of_precipitation_amount", "_invert_sign": false, - "_offset_time": true, "_transformation": { "era5-land": "deaccumulate", "era5-single-levels": "amount2rate", + "era5-single-levels-monthly-means": "amount2rate", "era5-single-levels-preliminary-back-extension": "amount2rate" }, "cell_methods": "time: mean (interval: 1 hour)", @@ -467,7 +423,6 @@ "_cf_variable_name": "uas", "_corrected_units": false, "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "comments": "Care should be taken when comparing this parameter with observations, because wind observations vary on small space and time scales and are affected by the local terrain, vegetation and buildings that are represented only on average in the ECMWF Integrated Forecasting System.", @@ -480,7 +435,6 @@ "_cf_variable_name": "vas", "_corrected_units": false, "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "comments": "Care should be taken when comparing this parameter with observations, because wind observations vary on small space and time scales and are affected by the local terrain, vegetation and buildings that are represented only on average in the ECMWF Integrated Forecasting System.", @@ -493,7 +447,6 @@ "_cf_variable_name": "z", "_corrected_units": "m2 s-2", "_invert_sign": false, - "_offset_time": false, "_transformation": false, "cell_methods": "time: point", "comment": "The geopotential height can be calculated by dividing the geopotential by the Earth's gravitational acceleration, g (=9.80665 m s-2).", @@ -501,6 +454,28 @@ "long_name": "Geopotential", "standard_name": "geopotential", "units": "m2 s-2" + }, + "sst": { + "_cf_variable_name": "tos", + "_corrected_units": false, + "_invert_sign": false, + "_transformation": false, + "cell_methods": "time: point", + "description": "This parameter (SST) is the temperature of sea water near the surface. In ERA5, this parameter is a foundation SST, which means there are no variations due to the daily cycle of the sun (diurnal variations).", + "long_name": "Sea surface temperature", + "standard_name": "sea_surface_temperature", + "units": "degC" + }, + "siconc": { + "_cf_variable_name": "siconc", + "_corrected_units": "1", + "_invert_sign": false, + "_transformation": false, + "cell_methods": "time: point", + "description": "This parameter is the percentage of a grid box which is covered by sea ice. Sea ice can only occur in a grid box which includes ocean or inland water according to the land-sea mask and lake cover, at the resolution being used.", + "long_name": "Sea ice area percentage", + "standard_name": "sea_ice_area_fraction", + "units": "%" } } }