diff --git a/guide/pytools.rst b/guide/pytools.rst index a49951401..299aa4413 100644 --- a/guide/pytools.rst +++ b/guide/pytools.rst @@ -77,19 +77,65 @@ Here is an example configuration file for radiosondes: - name: windNorthward type: RADIOSONDE_V_WIND_COMPONENT + observation category: + name: conventional vertical coordinate: name: MetaData/pressure units: "pressure (Pa)" -The `name:` entries are the names of the variables in the JEDI IODA observation file, and the `type:` entries +The `observation category:` section is how the tool discerns between different categories of observation types. +Currently, the tool recognizes "conventional" (radiosonde, aircraft, etc.) and "radiance" (amsu-a, goes, etc.) with the `name:` specification. +The idea with the observation category is to group instruments where the data are organized in a similar fashion. +For example, in the future, perhaps "radar" and "gpsro" could be added to the list of categories. + +The `name:` entries for variables are the names of the variables in the JEDI IODA observation file, and the `type:` entries are the corresponding DART observation types. The `vertical coordinate` section specifies the variable that will be used as the vertical coordinate in the DART observation sequence file. Note that in the `vertical coordinate` section, the `name:` entry must specify the full netcdf path to the variabile in the JEDI IODA observation file, and the `units:` entry specifies the units to be used in the DART observation sequence file. +Here is an example radiance obs type (AMSU-A): + +.. code-block:: yaml + + --- + + ioda to obsq converter: + observation variables: + - name: brightnessTemperature + type: NOAA_19_AMSUA_TB + + observation category: + name: radiance + channel numbers: 1, 2, 3, 12-15 + vertical coordinate: + units: "pressure (Pa)" + data value: 35000.0 + metadata: + sensor key: NOAA_19_AMSUA + rttov sensor db: /home/stephenh/projects/NCAR_DART/DART/observations/forward_operators/rttov_sensor_db.csv + sat az variable: MetaData/sensorAzimuthAngle + sat ze variable: MetaData/sensorZenithAngle + +Some of the entries are similar to those in the radiosonde (conventional) category. +Note that in the radiance category, the `vertical coordinate:` spec needs a `name:` (as before) and a `data value:` (instead of units). +Currently, the `data value:` is repeated on all of the observations. + +Two new entries under the `observation catageory:` spec have been introduced. +The `channel numbers:` spec allows the user to select a subset of channels to convert. +The value for `channel numbers:` is a comma separated list where each entry can be an integer or a range of integers. + +The `metadata:` spec allows the user to configure what gets placed in the obs sequence file's metadata section. +The `sensor key:` and `rttov sensor db:` spec go hand-in-hand, and describe how to pull out id numbers for the platform, satellite and sensor. +The `sat az variable:` and `sat ze variable:` are the JEDI IODA names for the satellite azimuth and zenith angles respectively. + +Another piece of information that comes from the `rttov_sensor_db:` file is the spectral band (eg. infrared, microwave). +Currently, only infrared ("ir") and microwave ("mw") are recognized. +The purpose of the `metadata:` spec is to have flexibility with the configuration for infrared, microwave, visible, etc. instruments. + .. note:: The `ioda2obsq` tool is under active development and has limited functionality at this time. - It is expected that more features will be added soon, including support for satellite radiance observation types. - In its current state, it is primarily intended for use with radiosonde and similar conventional observation types. \ No newline at end of file + It is expected that more features will be added soon, including support for additional satellite radiance observation types. + In its current state, it is primarily intended for use with radiosonde and similar conventional observation types, as well as infrared and micorwave radiance instruments. diff --git a/pytools/pyjedi/src/pyjedi/_utils.py b/pytools/pyjedi/src/pyjedi/_utils.py index ae652e6e2..d654400dd 100644 --- a/pytools/pyjedi/src/pyjedi/_utils.py +++ b/pytools/pyjedi/src/pyjedi/_utils.py @@ -3,8 +3,160 @@ import netCDF4 as nc4 import numpy as np import re +import yaml import pydartdiags.obs_sequence.obs_sequence as obsq +missingVal = -888888.0 + +def _expandChanNums(chanNumsStr): + """Expand channel numbers YAML spec into a list of channel numbers + + Args: chanNumsStr (str): Comma-separated string of channel numbers with optional ranges + + The optional ranges is of the form (-) where both and are + inclusive. Eg, 12-15 expands to [12, 13, 14, 15]. + + """ + chanNums = [] + for part in chanNumsStr.split(','): + part = part.strip() + if '-' in part: + start, end = part.split('-') + chanNums.extend(range(int(start), int(end) + 1)) + else: + chanNums.append(int(part)) + return chanNums + +def _parseYamlConfig(configFile): + """Parse the YAML configuration file + + Args: + configFile (str): Path to the YAML configuration file + + Returns: + Tuple containing the processed "observation variables" configuration, and + the processed "observation category" configuration + + The configuration file should contain the following sections: + - observation variables: + Required + - List of IODA variable names and types (and optional channels) to convert + - observation category: + Required + - name + Name of the category. For now, accept: "radiance" or "conventional" + Some possibilities for the future could be "radar", "gpsro", etc. + The remaining contents of this section are dependent on the name of the category + - vertical coordinate: + Required for radiance and conventional obs types + name and units of the vertical coordinate variable for conventional + units and data value of the vertical coordinate variable for radiance + - channel numbers: + Required for radiance obs types + list of numbers which can include ranges of numbers (eg, 12-15 for 12, 13, 14, 15) + - metadata: + Required for radiance obst types + sensor key and rttov sensor db (path to the db file) + + """ + + with open(configFile, 'r') as file: + config = yaml.safe_load(file) + iodaVarsConfig = config['ioda to obsq converter']['observation variables'] + obsCategoryConfig = config['ioda to obsq converter']['observation category'] + + # check for proper values of the obs category name + if (obsCategoryConfig['name'] not in ['radiance', 'conventional']): + raise ValueError("Invalid observation category name: " + obsCategoryConfig['name']) + obsCategory = obsCategoryConfig['name'] + + # check for proper obs category configuration + if (obsCategory == 'conventional'): + # Conventional obs type. + if ('vertical coordinate' not in obsCategoryConfig): + raise ValueError("Must specify 'vertical coordinate' for conventional observations.") + else: + # need name and units for vertical coordinate + if ('name' not in obsCategoryConfig['vertical coordinate']): + raise ValueError("Must specify 'name' for vertical coordinate in conventional observations.") + if ('units' not in obsCategoryConfig['vertical coordinate']): + raise ValueError("Must specify 'units' for vertical coordinate in conventional observations.") + elif (obsCategory == 'radiance'): + # Radiance obs type. + if ('vertical coordinate' not in obsCategoryConfig): + raise ValueError("Must specify 'vertical coordinate' for radiance observations.") + else: + # need units and data value for vertical coordinate + if ('units' not in obsCategoryConfig['vertical coordinate']): + raise ValueError("Must specify 'units' for vertical coordinate in radiance observations.") + if ('data value' not in obsCategoryConfig['vertical coordinate']): + raise ValueError("Must specify 'data value' for vertical coordinate in radiance observations.") + if ('channel numbers' not in obsCategoryConfig): + raise ValueError("Must specify 'channel numbers' for radiance observations.") + if ('metadata' not in obsCategoryConfig): + raise ValueError("Must specify 'metadata' for radiance observations.") + else: + # need sensor key and rttov sensor db for metadata + if ('sensor key' not in obsCategoryConfig['metadata']): + raise ValueError("Must specify 'sensor key' for metadata in radiance observations.") + if ('rttov sensor db' not in obsCategoryConfig['metadata']): + raise ValueError("Must specify 'rttov sensor db' for metadata in radiance observations.") + if ('sat az variable' not in obsCategoryConfig['metadata']): + raise ValueError("Must specify 'sat az variable' for metadata in radiance observations.") + if ('sat ze variable' not in obsCategoryConfig['metadata']): + raise ValueError("Must specify 'sat ze variable' for metadata in radiance observations.") + + # Expand the channel numbers into the variable names by creating variable + # names with the given name and each channel number appended to the end of the given name. + # This is how the separate channels are stored in the ioda dataframe (see _ioda2iodaDF below). + channelNumbers = _expandChanNums(obsCategoryConfig['channel numbers']) + newIodaVarsConfig = [] + for iodaVarConfig in iodaVarsConfig: + varName = iodaVarConfig['name'] + varType = iodaVarConfig['type'] + for chanNum in channelNumbers: + newConfig = {} + newConfig['name'] = f"{varName}_{chanNum}" + newConfig['type'] = varType + newIodaVarsConfig.append(newConfig) + iodaVarsConfig = newIodaVarsConfig + + # Replace the channel numbers string in the category config with the expanded channel numbers + obsCategoryConfig['channel numbers'] = channelNumbers + + # Build the sensor db dictionary, and extract the sensor key entry + # Do this here to be able to look up the sensor entry in one shot. + sensorDB = _buildSensorDict(obsCategoryConfig) + sensorKey = obsCategoryConfig['metadata']['sensor key'] + sensorDbEntry = sensorDB.get(sensorKey, None) + if (sensorDbEntry == None): + raise ValueError("No entry in the sensor DB for sensor key: " + sensorKey) + obsCategoryConfig['metadata']['sensor db entry'] = sensorDbEntry + + else: + # Currently, can only use conventional or radiance + raise ValueError("Invalid observation category name: " + obsCategory, \ + ", must use one of 'radiance' or 'conventional'") + + return iodaVarsConfig, obsCategoryConfig + +def _buildSensorDict(obsCategoryConfig): + """Build the sensor database dictionary from the observation category config. + + Args: + obsCategoryConfig (dict): Observation category configuration + + Returns: + dict: Sensor database dictionary + """ + + sensorDB = {} + sensorDF = pd.read_csv(obsCategoryConfig['metadata']['rttov sensor db'], header=None) + sensorDF.columns = ['key', 'platform_id', 'sat_id', 'sensor_id', 'spectral_band', 'rttov_coeff_file'] + sensorDB = sensorDF.set_index('key').to_dict(orient='index') + + return sensorDB + def _iodaDtime2obsqDtime(epoch, iodaDtime): """Convert IODA datetime into obs_seq datetime @@ -46,16 +198,17 @@ def _iodaDtime2obsqDtime(epoch, iodaDtime): return (seconds, days, time) -def _ioda2iodaDF(iodaFile, mask_obsvalue=None, sort_by=None, derived=False): +def _ioda2iodaDF(iodaFile, obsCategoryConfig, mask_obsvalue=None, sort_by=None, derived=False): """Creates pandas dataframe from a IODA observation data file. Args: 1. iodaFile - IODA observation data file - mask_obsvalue (None, optional): Mask ObsValue _FillValue and NaN - Set this to something like netCDF4.default_fillvals[dtype] - where dtype can be 'f4' or 'f8' for floating point arrays - sort_by (list of str, optional): Sort dataframe by columns - derived: Set to true if using @DerivedObsValue + 2. obsCategoryConfig (dict): Observation category configuration + 3. mask_obsvalue (None, optional): Mask ObsValue _FillValue and NaN + Set this to something like netCDF4.default_fillvals[dtype] + where dtype can be 'f4' or 'f8' for floating point arrays + 4. sort_by (list of str, optional): Sort dataframe by columns + 5 derived: Set to true if using @DerivedObsValue Returns: pandas.DataFrame: IODA formatted dataframe @@ -72,12 +225,20 @@ def _ioda2iodaDF(iodaFile, mask_obsvalue=None, sort_by=None, derived=False): assert('dateTime' in ds.groups['MetaData'].variables) epochdatetime = ds.groups['MetaData'].variables['dateTime'].units.split()[-1] + # Grab the channel numbers if available + channelNums = None + if ('Channel' in ds.variables): + channelNums = ds.variables['Channel'][:] + + # Grab the observation category config channel numbers + obsChannels = obsCategoryConfig.get('channel numbers', []) + loc = 0 for var in dsets: fullVarName = var.group().name + "/" + var.name if (var.dimensions[0] != 'Location'): - print('WARNING: Can only process variables with Location as the first dimension') - print('WARNING: skipping variable: ', fullVarName) + log.warning('Can only process variables with Location as the first dimension') + log.warning(' Skipping variable: %s', fullVarName) continue if var.group().name != 'VarMetaData': @@ -88,18 +249,39 @@ def _ioda2iodaDF(iodaFile, mask_obsvalue=None, sort_by=None, derived=False): loc += 1 # multi-channel BT if var.name in ("brightnessTemperature", "emissivity"): - if 'Channel' not in var.dimensions: - log.warning("Expected channel dim not found") + if (var.dimensions[1] != 'Channel'): + log.warning("Can only process variables with Channel as the second dimension") + log.warning(" Skipping variable: %s", fullVarName) + continue + + # If obsChannels is empty, skip the variable + if not obsChannels: + log.warning("No observation channels specified") + log.warning(" Please add desired channels to the YAML configuration file.") + log.warning(" Skipping variable: %s", fullVarName) continue - # 2D BT are usually of (Location, Channel) shape - # but the order may not be guaranteed so - # force nchan as first dim before looping over - # individual channels - channelIndex = var.dimensions.index('Channel') - outarr = np.moveaxis(var[:], channelIndex, 0) - for ci in range(0, channelIndex): - df.insert(loc, fullVarName, outarr[ci, :]) + # Walk through the observation channels and form separate columns + # for each channel + for obsChannel in obsChannels: + # Find the index of the channel in the Channel dimension + if (obsChannel in channelNums): + channelIndex = np.where(channelNums == obsChannel)[0][0] + else: + log.warning("Channel number %s not found in Channel dimension", obsChannel) + log.warning(" Skipping channel number: %s", obsChannel) + continue + + # Insert the column for the specific channel + fullVarNameChannel = f"{fullVarName}_{obsChannel}" + if fullVarNameChannel in df.columns: + log.warning("Column %s already exists") + log.warning(" Skipping column: %s", fullVarNameChannel) + continue + + # Insert the data for the specific channel + outarr = var[:, channelIndex] + df.insert(loc, fullVarNameChannel, outarr) loc += 1 if var.ndim == 1: @@ -180,13 +362,75 @@ def _buildObsSeqFromObsqDF(obsqDF, maxNumObs=None, nonQcNames=None, qcNames=None return obsSeq -def _iodaDF2obsqDF(iodaDF, epochDT, iodaVarName, iodaVarType, vertCoordName, vertCoordUnits): +def _buildRadianceMetadata(iodaDF, obsCategoryConfig, chanNum): + """Build metadata for radiance observations. + + Args: + 1. iodaDF - pandas dataframe in the ioda layout + 2. obsCategoryConfig - Observation category configuration + 3. chanNum - Channel number + + Return: + A list of metadata entries for the radiance observations. + """ + + # Grab the sensor database info + sensorDbEntry = obsCategoryConfig['metadata']['sensor db entry'] + spectralBand = sensorDbEntry['spectral_band'] + platformId = sensorDbEntry['platform_id'] + satId = sensorDbEntry['sat_id'] + sensorId = sensorDbEntry['sensor_id'] + + # Grab the instrument metadata variable names + satAzVar = obsCategoryConfig['metadata']['sat az variable'] + satZeVar = obsCategoryConfig['metadata']['sat ze variable'] + + # Form the line in the metadata containing the instrument info + instrumentInfo = f" {platformId} {satId} {sensorId} {chanNum}" + + # For now we only handle infrared and microwave instruments + radMetaData = [] + if (spectralBand == 'ir'): + # Format contains five strings: + # 'visir' + # ' sat_az sat_ze sun_az sun_ze' + # ' {instrumentInfo}' + # ' specularity' + # ' oldkey' + specularityInfo = f" {missingVal}" + for i, (satAz, satZe) in enumerate(zip(iodaDF[satAzVar], iodaDF[satZeVar])): + orientationInfo = f" {satAz} {satZe} {missingVal} {missingVal}" + oldKeyInfo = f" {i + 1}" + radMetaData.append([ 'visr', orientationInfo, instrumentInfo, specularityInfo, oldKeyInfo ]) + elif (spectralBand == 'mw'): + # Format contains six strings: + # 'mw' + # ' sat_az sat_ze + # ' {instrumentInfo}' + # ' magfield cosbk' + # ' fastem_p1 fastem_p2 fastem_p3 fastem_p4 fastem_p5' + # ' oldkey' + magfieldInfo = f" {missingVal} {missingVal}" + fastemInfo = f" {missingVal} {missingVal} {missingVal} {missingVal} {missingVal}" + for i, (satAz, satZe) in enumerate(zip(iodaDF[satAzVar], iodaDF[satZeVar])): + orientationInfo = f" {satAz} {satZe}" + oldKeyInfo = f" {i + 1}" + radMetaData.append([ 'mw', orientationInfo, instrumentInfo, magfieldInfo, fastemInfo, oldKeyInfo ]) + else: + raise ValueError("Unrecognized spectral band: " + spectralBand) + + return radMetaData + +def _iodaDF2obsqDF(iodaDF, epochDT, iodaVarName, iodaVarType, obsCategoryConfig): """Reformat a pandas dataframe built with ioda2iodaDF into a dataframe suitable for buildObsSeqFromObsqDF. Args: 1. iodaDF - pandas dataframe in the ioda layout 2. epochDT - IODA dateTime epoch value in ISO 8601 string format + 3. iodaVarName - IODA dataframe column name + 4. iodaVarType - IODA dataframe column type + 5. obsCategoryConfig - Observation category configuration Return: This function returns a pandas dataframe in the obs_seq layout. This dataframe @@ -194,6 +438,21 @@ def _iodaDF2obsqDF(iodaDF, epochDT, iodaVarName, iodaVarType, vertCoordName, ver """ + # Set up obs category specific variables. At this point we only recognize + # two categories: + # conventional + # radiance + obsCategory = obsCategoryConfig['name'] + vertCoordName = None + vertCoordUnits = None + vertCoordValue = None + if (obsCategory == 'conventional'): + vertCoordName = obsCategoryConfig['vertical coordinate']['name'] + vertCoordUnits = obsCategoryConfig['vertical coordinate']['units'] + elif (obsCategory == 'radiance'): + vertCoordUnits = obsCategoryConfig['vertical coordinate']['units'] + vertCoordValue = obsCategoryConfig['vertical coordinate']['data value'] + # The ioda dataframe layout has these features: # 1. Each separate variable is in a separate column # 2. The datetime variable (MetaData/dateTime) has @@ -222,11 +481,20 @@ def _iodaDF2obsqDF(iodaDF, epochDT, iodaVarName, iodaVarType, vertCoordName, ver obsqDF.insert(4, 'longitude', np.array(iodaDF['MetaData/longitude'], dtype=np.float32)) obsqDF.insert(5, 'latitude', np.array(iodaDF['MetaData/latitude'], dtype=np.float32)) - obsqDF.insert(6, 'vertical', np.array(iodaDF[vertCoordName], dtype=np.float32)) - obsqDF.insert(7, 'vert_unit', np.full(numLocs, vertCoordUnits, dtype=object)) + if obsCategory == 'conventional': + obsqDF.insert(6, 'vertical', np.array(iodaDF[vertCoordName], dtype=np.float32)) + obsqDF.insert(7, 'vert_unit', np.full(numLocs, vertCoordUnits, dtype=object)) + elif obsCategory == 'radiance': + obsqDF.insert(6, 'vertical', np.full(numLocs, vertCoordValue, dtype=np.float32)) + obsqDF.insert(7, 'vert_unit', np.full(numLocs, vertCoordUnits, dtype=object)) obsqDF.insert(8, 'type', np.full(numLocs, iodaVarType, dtype=object)) - obsqDF.insert(9, 'metadata', np.full(numLocs, '', dtype=object)) + if (obsCategory == 'conventional'): + obsqDF.insert(9, 'metadata', np.full(numLocs, '', dtype=object)) + elif (obsCategory == 'radiance'): + chanNum = int(iodaVarName.split('_')[-1]) + radMetaData = _buildRadianceMetadata(iodaDF, obsCategoryConfig, chanNum) + obsqDF.insert(9, 'metadata', radMetaData) obsqDF.insert(10, 'external_FO', np.full(numLocs, '', dtype=object)) obsqDF.insert(11, 'seconds', obsqSec) diff --git a/pytools/pyjedi/src/pyjedi/ioda2obsq.py b/pytools/pyjedi/src/pyjedi/ioda2obsq.py index 33cbf338f..c0c0c42ab 100644 --- a/pytools/pyjedi/src/pyjedi/ioda2obsq.py +++ b/pytools/pyjedi/src/pyjedi/ioda2obsq.py @@ -1,7 +1,7 @@ import argparse -import yaml import numpy as np import pandas as pd +from ._utils import _parseYamlConfig from ._utils import _ioda2iodaDF from ._utils import _iodaDF2obsqDF from ._utils import _buildObsSeqFromObsqDF @@ -31,19 +31,13 @@ def main(): print("INFO: obs_seq output file: ", outFile) print("") - # Parse the YAML configuration - with open(configFile, 'r') as file: - config = yaml.safe_load(file) - iodaVarsConfig = config['ioda to obsq converter']['observation variables'] - vertCoordConfig = config['ioda to obsq converter']['vertical coordinate'] + # Parse the YAML config + (iodaVarsConfig, obsCategoryConfig) = _parseYamlConfig(configFile) if (verbose): print("DEBUG: IODA variable configuration: ", iodaVarsConfig) - print("DEBUG: vertical coordinate configuration: ", vertCoordConfig) + print("DEBUG: Observation category configuration: ", obsCategoryConfig) print("") - vertCoordName = vertCoordConfig['name'] - vertCoordUnits = vertCoordConfig['units'] - # Conversion steps # 1. read ioda file into a pandas dataframe (which will have a different layout # compared to what the obsq writer requires) @@ -53,14 +47,19 @@ def main(): # and then concatenate that dataframe to the main dataframe # 3. build a pyDARTdiags ObsSequence object from the main dataframe # 4. call the ObsSequence.write_obs_seq method to create the output file - (iodaDF, epochDT) = _ioda2iodaDF(inFile) + (iodaDF, epochDT) = _ioda2iodaDF(inFile, obsCategoryConfig) + if (verbose): + print("DEBUG: IODA dataframe columns: ", iodaDF.columns.tolist()) + print("DEBUG: IODA dataframe shape: ", iodaDF.shape) + print("") + obsqDF = pd.DataFrame() for iodaVarConfig in iodaVarsConfig: iodaVarName = iodaVarConfig['name'] iodaVarType = iodaVarConfig['type'] print("INFO: IODA input variable: ", iodaVarName, " (", iodaVarType, ")") - obsqDF = pd.concat([obsqDF, _iodaDF2obsqDF(iodaDF, epochDT, iodaVarName, iodaVarType, - vertCoordName, vertCoordUnits)], + obsqDF = pd.concat([obsqDF, _iodaDF2obsqDF(iodaDF, epochDT, iodaVarName, + iodaVarType, obsCategoryConfig)], ignore_index = True) print("")