-
Notifications
You must be signed in to change notification settings - Fork 42
Add scripts to preprocess the GHCN snow data #2019
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
0644723
Add the configures and scripts to preprocess and assimilate the GHCN
jiaruidong2017 f08c891
Delete parm/snow/snow_obs_list.yaml.j2
jiaruidong2017 2c8581b
Revert the change to the obs list
jiaruidong2017 e4a399f
Move ghcn-stations.txt to the FIX directory
jiaruidong2017 37b8096
Add notes to indicate the code origin
jiaruidong2017 9123df8
Fix norm error
jiaruidong2017 57df835
Merge branch 'develop' into feature/snow-ghcn2
jiaruidong2017 e073e96
Merge branch 'develop' into feature/snow-ghcn2
jiaruidong2017 8095bea
Merge branch 'develop' into feature/snow-ghcn2
CoryMartin-NOAA ccce171
Merge branch 'feature/snow-ghcn2' of https://github.com/NOAA-EMC/GDAS…
jiaruidong2017 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| stage: | ||
| mkdir: | ||
| - '{{ snow_prepobs_path }}' | ||
| copy_opt: | ||
| - ['{{ COMIN_OBS }}/{{ OPREFIX }}ghcn_snow.csv', '{{ DATA }}'] | ||
| - ['{{ FIXgfs }}/gdas/snow/ghcnd-stations.txt', '{{ DATA }}'] | ||
| ghcn2ioda: | ||
| copy_opt: | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| - ['{{ DATA }}/{{ OPREFIX }}ghcn_snow.nc', '{{ snow_prepobs_path }}'] | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,204 @@ | ||
| #!/usr/bin/env python3 | ||
| # | ||
| # (C) Copyright 2021-2022 NOAA/NWS/NCEP/EMC | ||
| # | ||
| # This software is licensed under the terms of the Apache Licence Version 2.0 | ||
| # This file is copied from the JEDI ioda-converts repo. | ||
| # | ||
|
|
||
| import argparse | ||
jiaruidong2017 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| import numpy as np | ||
| import pandas as pd | ||
| import sys | ||
| from datetime import datetime, timezone | ||
| from dateutil.parser import parse | ||
|
|
||
| import pyiodaconv.ioda_conv_engines as iconv | ||
| from collections import defaultdict, OrderedDict | ||
| from pyiodaconv.orddicts import DefaultOrderedDict | ||
| from pyiodaconv.def_jedi_utils import epoch, iso8601_string | ||
| from pyiodaconv.def_jedi_utils import record_time | ||
|
|
||
| locationKeyList = [ | ||
| ("latitude", "float"), | ||
| ("longitude", "float"), | ||
| ("stationElevation", "float"), | ||
| ("dateTime", "long"), | ||
| ] | ||
|
|
||
| AttrData = { | ||
| } | ||
|
|
||
| DimDict = { | ||
| } | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| VarDims = { | ||
| 'totalSnowDepth': ['Location'], | ||
| } | ||
|
|
||
| float_missing_value = iconv.get_default_fill_val(np.float32) | ||
| int_missing_value = iconv.get_default_fill_val(np.int32) | ||
| long_missing_value = iconv.get_default_fill_val(np.int64) | ||
|
|
||
|
|
||
| def get_epoch_time(adatetime): | ||
|
|
||
| # take python datetime object and convert to seconds from epoch | ||
| time_offset = round((adatetime - epoch).total_seconds()) | ||
|
|
||
| return time_offset | ||
|
|
||
|
|
||
| class ghcn(object): | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| def __init__(self, filename, fixfile, date, warn): | ||
| self.filename = filename | ||
| self.fixfile = fixfile | ||
| self.date = date | ||
| self.warn = warn | ||
| self.varDict = defaultdict(lambda: defaultdict(dict)) | ||
| self.metaDict = defaultdict(lambda: defaultdict(dict)) | ||
| self.outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict)) | ||
| self.varAttrs = defaultdict(lambda: DefaultOrderedDict(OrderedDict)) | ||
| self._read() | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| def _read(self): | ||
|
|
||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # set up variable names for IODA | ||
| iodavar = 'totalSnowDepth' | ||
| self.varDict[iodavar]['valKey'] = iodavar, iconv.OvalName() | ||
| self.varDict[iodavar]['errKey'] = iodavar, iconv.OerrName() | ||
| self.varDict[iodavar]['qcKey'] = iodavar, iconv.OqcName() | ||
| self.varAttrs[iodavar, iconv.OvalName()]['coordinates'] = 'longitude latitude' | ||
| self.varAttrs[iodavar, iconv.OerrName()]['coordinates'] = 'longitude latitude' | ||
| self.varAttrs[iodavar, iconv.OqcName()]['coordinates'] = 'longitude latitude' | ||
| self.varAttrs[iodavar, iconv.OvalName()]['units'] = 'mm' | ||
| self.varAttrs[iodavar, iconv.OerrName()]['units'] = 'mm' | ||
|
|
||
| # read in the GHCN data csv file | ||
| cols = ["ID", "DATETIME", "ELEMENT", "DATA_VALUE", "M_FLAG", "Q_FLAG", "S_FLAG", "OBS_TIME"] | ||
| sub_cols = ["ID", "DATETIME", "ELEMENT", "DATA_VALUE"] | ||
| df30_list = [] | ||
| # Fix dtypeWarning with mixed types via set low_memory=False | ||
| df20all = pd.read_csv(self.filename, header=None, names=cols, low_memory=False) | ||
| df20 = df20all[sub_cols] | ||
| df20all = None | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| df30_list.append(df20) | ||
| df20 = None | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # select only snow depth, valid obs, and date | ||
| df30 = pd.concat(df30_list, ignore_index=True) | ||
| df30 = df30[df30["ELEMENT"] == "SNWD"] | ||
| df30 = df30[df30["DATA_VALUE"].astype('float32') >= 0.0] | ||
| df30["DATETIME"] = df30.apply(lambda row: parse(str(row["DATETIME"])).date(), axis=1) | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| startdate = self.date | ||
| valid_date = datetime.strptime(startdate, "%Y%m%d%H") | ||
| valid_date = valid_date.replace(tzinfo=timezone.utc) | ||
| select_date = valid_date.strftime('%Y%m%d') | ||
| new_date = parse(select_date).date() | ||
| df30 = df30[df30["DATETIME"] == new_date] | ||
|
|
||
| # Read in the GHCN station files | ||
| cols = ["ID", "LATITUDE", "LONGITUDE", "ELEVATION", "STATE", "NAME", "GSN_FLAG", "HCNCRN_FLAG", "WMO_ID"] | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| df10all = pd.read_csv(self.fixfile, header=None, sep='\r\n', engine='python') | ||
| df10all = df10all[0].str.split('\\s+', expand=True) | ||
| df10 = df10all.iloc[:, 0:4] | ||
| df10all = None | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| sub_cols = {0: "ID", 1: "LATITUDE", 2: "LONGITUDE", 3: "ELEVATION"} | ||
| df10 = df10.rename(columns=sub_cols) | ||
| df10 = df10.drop_duplicates(subset=["ID"]) | ||
|
|
||
| # merge on ID to pull coordinates from the fix file | ||
| df300 = pd.merge(df30, df10[['ID', 'LATITUDE', 'LONGITUDE', 'ELEVATION']], on='ID', how='left') | ||
|
|
||
| # if merge (left) cannot find ID in df10, will insert NaN | ||
| if (any(df300['LATITUDE'].isna())): | ||
| if (self.warn): | ||
| print(f"\n WARNING: ignoring ghcn stations missing from station_list") | ||
| else: | ||
| sys.exit(f"\n ERROR: ghcn data files contains station not in station_list.") | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| sites = df300["ID"].values | ||
| vals = df300["DATA_VALUE"].values | ||
| lats = df300["LATITUDE"].values | ||
| lons = df300["LONGITUDE"].values | ||
| alts = df300["ELEVATION"].values | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| vals = vals.astype('float32') | ||
| lats = lats.astype('float32') | ||
| lons = lons.astype('float32') | ||
| alts = alts.astype('float32') | ||
|
|
||
| # set qflg to 0 (do we need this?), error to 40. | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| qflg = np.full(vals.shape, 0, dtype='int32') | ||
| errs = np.full(vals.shape, 40.0, dtype='float32') | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # get datetime from input | ||
| my_date = datetime.strptime(startdate, "%Y%m%d%H") | ||
| my_date = my_date.replace(tzinfo=timezone.utc) | ||
| epoch_time = np.int64(get_epoch_time(my_date)) | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| times = np.full(vals.shape, epoch_time, dtype='int64') | ||
|
|
||
| # add metadata variables | ||
| self.outdata[('dateTime', 'MetaData')] = times | ||
| self.outdata[('stationIdentification', 'MetaData')] = sites | ||
| self.outdata[('latitude', 'MetaData')] = lats | ||
| self.outdata[('longitude', 'MetaData')] = lons | ||
| self.outdata[('stationElevation', 'MetaData')] = alts | ||
| self.varAttrs[('stationElevation', 'MetaData')]['units'] = 'm' | ||
| self.varAttrs[('dateTime', 'MetaData')]['units'] = iso8601_string | ||
| self.varAttrs[('dateTime', 'MetaData')]['_FillValue'] = long_missing_value | ||
|
|
||
| self.outdata[self.varDict[iodavar]['valKey']] = vals | ||
| self.outdata[self.varDict[iodavar]['errKey']] = errs | ||
| self.outdata[self.varDict[iodavar]['qcKey']] = qflg | ||
|
|
||
| DimDict['Location'] = len(self.outdata[('dateTime', 'MetaData')]) | ||
|
|
||
|
|
||
| def main(): | ||
|
|
||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| parser = argparse.ArgumentParser( | ||
| description=('Read GHCN snow depth file(s) and Converter' | ||
| ' of native csv format for observations of snow' | ||
| ' depth from GHCN to IODA netCDF format.') | ||
| ) | ||
| parser.add_argument('-i', '--input', | ||
| help="name of ghcn snow depth input file(s)", | ||
| type=str, required=True) | ||
| parser.add_argument('-f', '--fixfile', | ||
| help="name of ghcn station fixed file(s)", | ||
| type=str, required=True) | ||
| parser.add_argument('-o', '--output', | ||
| help="name of ioda output file", | ||
| type=str, required=True) | ||
| parser.add_argument('-d', '--date', | ||
| help="base date (YYYYMMDDHH)", type=str, required=True) | ||
| parser.add_argument('--warn_on_missing_stn', | ||
| help="if present: missing stations in the fix file will warn, rather than exit", | ||
| action='store_true') | ||
|
|
||
| args = parser.parse_args() | ||
|
|
||
| # start timer | ||
| tic = record_time() | ||
|
|
||
| # Read in the GHCN snow depth data | ||
| snod = ghcn(args.input, args.fixfile, args.date, args.warn_on_missing_stn) | ||
|
|
||
| # report time | ||
| toc = record_time(tic=tic) | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # setup the IODA writer | ||
| writer = iconv.IodaWriter(args.output, locationKeyList, DimDict) | ||
|
|
||
| # write all data out | ||
| writer.BuildIoda(snod.outdata, VarDims, snod.varAttrs, AttrData) | ||
|
|
||
| # report time | ||
| toc = record_time(tic=tic) | ||
CoryMartin-NOAA marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| main() | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.