diff --git a/.gitattributes b/.gitattributes index 1204bb208..1b3f11efb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -15,3 +15,4 @@ test/testinput/bufr_tables/** filter=lfs diff=lfs merge=lfs -text *.radiosonde filter=lfs diff=lfs merge=lfs -text *.dump filter=lfs diff=lfs merge=lfs -text *.prepbufr filter=lfs diff=lfs merge=lfs -text +*.hdf filter=lfs diff=lfs merge=lfs -text diff --git a/src/compo/CMakeLists.txt b/src/compo/CMakeLists.txt index e1274d3ac..2d6415baf 100644 --- a/src/compo/CMakeLists.txt +++ b/src/compo/CMakeLists.txt @@ -2,6 +2,7 @@ list(APPEND programs aeronet_aod2ioda.py aeronet_aaod2ioda.py airnow2ioda_nc.py + caliop_l2_ext2ioda.py gcas_nc2ioda.py icartt_nc2ioda.py mls_o3_nc2ioda.py diff --git a/src/compo/caliop_l2_ext2ioda.py b/src/compo/caliop_l2_ext2ioda.py new file mode 100755 index 000000000..fb5a788d3 --- /dev/null +++ b/src/compo/caliop_l2_ext2ioda.py @@ -0,0 +1,316 @@ +#!/usr/bin/env python3 + +# +# (C) Copyright 2025 UCAR +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# + +import argparse +from datetime import datetime, timedelta, timezone +import os, sys + +from pyhdf.HDF import HDF +from pyhdf.VS import VS +from pyhdf.SD import SD, SDC +import numpy as np +import netCDF4 as nc + +import pyiodaconv.ioda_conv_engines as iconv +from collections import defaultdict, OrderedDict +from pyiodaconv.orddicts import DefaultOrderedDict +from pyiodaconv.def_jedi_utils import compute_scan_angle +from pyiodaconv.def_jedi_utils import iso8601_string, epoch + +os.environ["TZ"] = "UTC" + +# globals +CALIPSO_WMO_sat_ID = 787 + +AttrData = { + 'converter': os.path.basename(__file__), + "platformCommonName": "CALIPSO", + "platformLongDescription": "CALIPSO L2 Lidar Data", +} + +metaKeyList = [ + ("latitude", "float", "degrees_north"), + ("longitude", "float", "degrees_east"), + ("dateTime", "long", iso8601_string), + ("pressure", "float", "Pa"), + ("sensorCentralWavelength", "float", "micron"), + ("sensorCentralFrequency", "float", "Hz"), + ("height", "float", "m"), + ("atmosphereLayerThicknessZ", "float", "m"), + ("cloudAerosolDiscriminationHigher", "integer", ""), + ("cloudAerosolDiscriminationLower", "integer", ""), + ("sequenceNumber", "integer", ""), +] + +DimDict = { +} + +VarDims = { + 'extinctionCoefficient': ['Location', 'Channel'], + 'pressure': ['Location'], + 'height': ['Level'], + 'atmosphereLayerThicknessZ': ['Level'], + 'sensorCentralWavelength': ['Channel'], + 'sensorCentralFrequency': ['Channel'], + 'cloudAerosolDiscriminationHigher': ['Location'], + 'cloudAerosolDiscriminationLower': ['Location'], +} + +obsvars = ["extinctionCoefficient"] +channels = [1, 2] +wavelength = np.array([0.532, 1.064]) +speed_light = 2.99792458E8 +frequency = speed_light * 1.0E6 / wavelength + +metaDataName = iconv.MetaDataName() +obsValName = iconv.OvalName() +obsErrName = iconv.OerrName() +qcName = iconv.OqcName() + +varsKeyList = [('valKey', obsValName, 'float', 'longitude latitude height', "km-1"), + ('errKey', obsErrName, 'float', 'longitude latitude height', "km-1"), + ('qcKey', qcName, 'integer', 'longitude latitude height', None)] + +float_missing_value = iconv.get_default_fill_val(np.float32) +double_missing_value = iconv.get_default_fill_val(np.float64) +int_missing_value = iconv.get_default_fill_val(np.int32) +long_missing_value = iconv.get_default_fill_val(np.int64) +string_missing_value = iconv.get_default_fill_val(np.str_) + +missing_vals = {'string': string_missing_value, + 'integer': int_missing_value, + 'long': long_missing_value, + 'float': float_missing_value, + 'double': double_missing_value} + + +class caliop_l2ext(object): + def __init__(self, filenames, date_range): + self.filenames = filenames + wbeg = datetime.strptime(date_range[0], "%Y%m%d%H%M").replace(tzinfo=timezone.utc) - epoch + wend = datetime.strptime(date_range[1], "%Y%m%d%H%M").replace(tzinfo=timezone.utc) - epoch + self.wbeg = wbeg.total_seconds() + self.wend = wend.total_seconds() + self.varDict = defaultdict(lambda: defaultdict(dict)) + self.outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict)) + self.setDicts() + self._read() + + def setDicts(self): + meta_keys = [m_item[0] for m_item in metaKeyList] + # Set units of the MetaData variables and all _FillValues. + self.varAttrs = DefaultOrderedDict(lambda: DefaultOrderedDict(dict)) + for key in meta_keys: + dtypestr = metaKeyList[meta_keys.index(key)][1] + if metaKeyList[meta_keys.index(key)][2]: + self.varAttrs[(key, metaDataName)]['units'] = metaKeyList[meta_keys.index(key)][2] + self.varAttrs[(key, metaDataName)]['_FillValue'] = missing_vals[dtypestr] + + var_keys = [v_item[0] for v_item in varsKeyList] + # set up variable names for IODA + for iodavar in obsvars: + for key in var_keys: + varGroupName = varsKeyList[var_keys.index(key)][1] + dtypestr = varsKeyList[var_keys.index(key)][2] + coord = varsKeyList[var_keys.index(key)][3] + self.varDict[iodavar][key] = iodavar, varGroupName + self.varAttrs[iodavar, varGroupName]['coordinates'] = coord + self.varAttrs[iodavar, varGroupName]['_FillValue'] = missing_vals[dtypestr] + if varsKeyList[var_keys.index(key)][4]: + self.varAttrs[iodavar, varGroupName]['units'] = varsKeyList[var_keys.index(key)][4] + + def caliop_time2dt(self, time): + """ + Convert CALIOP Profile_UTC_Time to datetime.datetime object + + Args: + time: list or array of Profile_UTC_Time from CALIOP file (float number: yymmdd.ffffffff) + """ + dtarr = [datetime.strptime(str(t)[:6], '%y%m%d') for t in time] + delta = [timedelta(frac) for frac in np.mod(time, 1)] + outarr = [(dt + dl).replace(tzinfo=timezone.utc) for dt, dl in zip(dtarr, delta)] + return outarr + + def _read(self): + # default missing value in CALIPSO file + caliop_missing_value = -9999. + caliop_ref_time = datetime(1993, 1, 1, 0, 0, 0) + nchan = len(channels) + output_chidx = np.array(channels, dtype=np.int32) - 1 + + # Make empty lists for the output vars + self.outdata[('latitude', metaDataName)] = np.array([], dtype=np.float32) + self.outdata[('longitude', metaDataName)] = np.array([], dtype=np.float32) + self.outdata[('dateTime', metaDataName)] = np.array([], dtype=np.int64) + self.outdata[('pressure', metaDataName)] = np.array([], dtype=np.float32) + self.outdata[('sequenceNumber', metaDataName)] = np.array([], dtype=np.int32) + self.outdata[('cloudAerosolDiscriminationHigher', metaDataName)] = np.array([], dtype=np.int32) + self.outdata[('cloudAerosolDiscriminationLower', metaDataName)] = np.array([], dtype=np.int32) + for iodavar in obsvars: + self.outdata[self.varDict[iodavar]['valKey']] = np.array([], dtype=np.float32) + self.outdata[self.varDict[iodavar]['errKey']] = np.array([], dtype=np.float32) + self.outdata[self.varDict[iodavar]['qcKey']] = np.array([], dtype=np.int32) + + # Get the lidar data altitude + tmpfile = self.filenames[0] + tmphdf = HDF(tmpfile) + vs = tmphdf.vstart() + metaid = vs.find('metadata') + vd = vs.attach(metaid) + vd.setfields('Lidar_Data_Altitudes') + alt = np.array(vd.read()[0][0]) * 1000. + nlev = alt.size + vd.detach() + vs.end() + + # Calculate the thickness of LiDAR profile + thickness = np.empty_like(alt) + thickness[1:-1] = 0.5 * (alt[:-2] - alt[2:]) + thickness[0] = alt[0] - alt[1] + thickness[-1] = alt[-2] - alt[-1] + + # Adjust thickness near 20.2 km because it should be around 180m above and 60m below 20.2km + tmpidx = np.argmin(np.abs(alt-20200)) + oldthick = thickness[tmpidx] + if alt[tmpidx] > 20200: + thickness[tmpidx] = thickness[tmpidx - 1] + thickness[tmpidx + 1] = thickness[tmpidx + 1] + np.abs(thickness[tmpidx] - oldthick) + else: + thickness[tmpidx] = thickness[tmpidx + 1] + thickness[tmpidx - 1] = thickness[tmpidx - 1] + np.abs(thickness[tmpidx] - oldthick) + + prev_nloc = 0 + for f in self.filenames: + sd = SD(f, SDC.READ) + + pres = sd.select('Pressure').get().ravel() * 1e2 # hPa to Pa + lats = sd.select('Latitude').get()[:, 1] + lons = sd.select('Longitude').get()[:, 1] + proftime = sd.select('Profile_UTC_Time').get()[:, 1] + obs_time = np.array([(pt - epoch).total_seconds() for pt in self.caliop_time2dt(proftime)], + dtype=np.int64) + + nloc = lats.size + profidx = np.arange(prev_nloc, prev_nloc + nloc) + + lats = np.repeat(lats, nlev) + lons = np.repeat(lons, nlev) + profidx = np.repeat(profidx, nlev) + obs_time = np.repeat(obs_time, nlev) + winmsk = ((obs_time >= self.wbeg) & (obs_time <= self.wend)) + prev_nloc += nloc + + obs = np.zeros((nloc * nlev, nchan)) + err = np.zeros_like(obs) + qcf = np.zeros_like(obs) + for i, chidx in enumerate(output_chidx): + wavelength_str = str(int(wavelength[chidx] * 1e3)) + obsvarname = f"Extinction_Coefficient_{wavelength_str}" + errvarname = f"Extinction_Coefficient_Uncertainty_{wavelength_str}" + qcfvarname = f"Extinction_QC_Flag_{wavelength_str}" + # Level 2 QC flag stores 30m level 1 QC flag below 8.3 km in the rightmost dimension + # Based on Young et al. (2018): qc flag value 0, 1, 2, 16, and 18 should be used. + tmpqcf = sd.select(qcfvarname).get() + tmpqcf = tmpqcf.reshape(-1, tmpqcf.shape[2]) + tmpqcf = np.where((tmpqcf[:, 0] == tmpqcf[:, 1]), tmpqcf[:, 0], + np.maximum(tmpqcf[:, 0], tmpqcf[:, 1])) + tmpqcf = np.where(np.isin(tmpqcf, [0, 1, 2, 16, 18]), 0, 1) + + obs[:, i] = sd.select(obsvarname).get().ravel() + err[:, i] = sd.select(errvarname).get().ravel() + qcf[:, i] = tmpqcf + + # Similar to QC_Flag, it stores higher and lower 30 meter layers' CAD score + cad1 = np.zeros_like(pres) + cad2 = np.zeros_like(pres) + tmpcad = sd.select("CAD_Score").get() + tmpcad = tmpcad.reshape(-1, tmpcad.shape[2]) + cad1 = tmpcad[:, 0] + cad2 = tmpcad[:, 1] + + obs = np.where((obs == caliop_missing_value), float_missing_value, obs) + err = np.where((err == caliop_missing_value), float_missing_value, err) + pres = np.where(pres < 0, float_missing_value, pres) + + self.outdata[('latitude', metaDataName)] = np.append(self.outdata[('latitude', metaDataName)], + np.array(lats[winmsk], dtype=np.float32)) + self.outdata[('longitude', metaDataName)] = np.append(self.outdata[('longitude', metaDataName)], + np.array(lons[winmsk], dtype=np.float32)) + self.outdata[('dateTime', metaDataName)] = np.append(self.outdata[('dateTime', metaDataName)], + np.array(obs_time[winmsk], dtype=np.int64)) + self.outdata[('pressure', metaDataName)] = np.append(self.outdata[('pressure', metaDataName)], + np.array(pres[winmsk], dtype=np.float32)) + self.outdata[('sequenceNumber', metaDataName)] = np.append(self.outdata[('sequenceNumber', metaDataName)], + np.array(profidx[winmsk], dtype=np.int32)) + self.outdata[('cloudAerosolDiscriminationHigher', metaDataName)] = np.append( + self.outdata[('cloudAerosolDiscriminationHigher', metaDataName)], np.array(cad1[winmsk], dtype=np.int32)) + self.outdata[('cloudAerosolDiscriminationLower', metaDataName)] = np.append( + self.outdata[('cloudAerosolDiscriminationLower', metaDataName)], np.array(cad2[winmsk], dtype=np.int32)) + + for iodavar in obsvars: + self.outdata[self.varDict[iodavar]['valKey']] = np.append( + self.outdata[self.varDict[iodavar]['valKey']], np.array(obs[winmsk, :], dtype=np.float32)) + self.outdata[self.varDict[iodavar]['errKey']] = np.append( + self.outdata[self.varDict[iodavar]['errKey']], np.array(err[winmsk, :], dtype=np.float32)) + self.outdata[self.varDict[iodavar]['qcKey']] = np.append( + self.outdata[self.varDict[iodavar]['qcKey']], np.array(qcf[winmsk, :], dtype=np.int32)) + + sd.end() + + self.outdata[('sensorCentralWavelength', metaDataName)] = np.array(wavelength, dtype=np.float32)[output_chidx] + self.outdata[('sensorCentralFrequency', metaDataName)] = np.array(frequency, dtype=np.float32)[output_chidx] + self.outdata[('height', metaDataName)] = np.array(alt, dtype=np.float32) + self.outdata[('atmosphereLayerThicknessZ', metaDataName)] = np.array(thickness, dtype=np.float32) + + tmpseq = self.outdata[('sequenceNumber', metaDataName)] + self.outdata[('sequenceNumber', metaDataName)] = tmpseq - min(tmpseq) + + DimDict['Location'] = len(self.outdata[('dateTime', metaDataName)]) + DimDict['Channel'] = np.array(channels) + DimDict['Level'] = np.arange(nlev) + + +def main(): + parser = argparse.ArgumentParser( + description=( + 'Reads the CALIOP Level 2 aerosol profile data ' + ' convert into IODA formatted output files. ' + ' Multiple files are concatenated') + ) + + required = parser.add_argument_group(title='required arguments') + required.add_argument( + '-i', '--input', + help="path of CALIOP APro (HDF4) input file(s)", + type=str, nargs='+', required=True) + required.add_argument( + '-o', '--output', + help='path to output ioda file', + type=str, required=True) + + optional = parser.add_argument_group(title='optional arguments') + optional.add_argument( + '--date_range', + help="extract a date range to fit the data assimilation window" + "format -r YYYYMMDDHHmm YYYYMMDDHHmm", + type=str, metavar=('begindate', 'enddate'), nargs=2, + default=('197001010000', '217001010000')) + + args = parser.parse_args() + + # Read CALIPSO extinction profile data + caliop_l2 = caliop_l2ext(args.input, args.date_range) + + # write everything out + writer = iconv.IodaWriter(args.output, metaKeyList, DimDict) + writer.BuildIoda(caliop_l2.outdata, VarDims, caliop_l2.varAttrs, AttrData) + + +if __name__ == "__main__": + main() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 51bd26f98..328e22526 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -110,6 +110,7 @@ list( APPEND test_input testinput/balloonsonde_sample_20241001T00Z_PT15M.json testinput/tec_groundBased_TENET_20201001T00Z.tec testinput/COWVR_TSDR.020421.20240430T215845.20240430T230345.V1001.sample.h5 + testinput/caliop_l2_APro.hdf ) list( APPEND test_output @@ -211,6 +212,7 @@ list( APPEND test_output testoutput/obs.20241001T000000Z_PT15M_balloonsonde.nc4 testoutput/obs.20201001T000000Z_PT1M_tec_groundbased.nc4 testoutput/obs.20240430T220600Z_PT2M_cowvr_iss.nc4 + testoutput/caliop_l2_APro.nc4 ) if( iodaconv_gnssro_ENABLED ) @@ -1540,6 +1542,18 @@ ecbuild_add_test( TARGET test_${PROJECT_NAME}_viirs_j1_l1b_albedo --date_range 2021080503 2021080509" viirs_j1_l1b_albedo_2021080506.nc ${IODA_CONV_COMP_TOL}) +ecbuild_add_test( TARGET test_${PROJECT_NAME}_caliop_l2_ext + TYPE SCRIPT + ENVIRONMENT "PYTHONPATH=${IODACONV_PYTHONPATH}" + COMMAND bash + ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh + netcdf + "${Python3_EXECUTABLE} ${CMAKE_BINARY_DIR}/bin/caliop_l2_ext2ioda.py + -i testinput/caliop_l2_APro.hdf + -o testrun/caliop_l2_APro.nc4 + --date_range 201907220340 201907220350" + caliop_l2_APro.nc4 ${IODA_CONV_COMP_TOL}) + #============================================================================== # Bufr Ingester tests diff --git a/test/testinput/caliop_l2_APro.hdf b/test/testinput/caliop_l2_APro.hdf new file mode 100644 index 000000000..4d1a04767 --- /dev/null +++ b/test/testinput/caliop_l2_APro.hdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761db88fc63e36b927487c197b6c53bd52ac3a33a7708ecff2cfd17c599e0ddf +size 664331 diff --git a/test/testinput/modis_aod.hdf b/test/testinput/modis_aod.hdf index e8d6e5a7f..1e96b2856 100644 Binary files a/test/testinput/modis_aod.hdf and b/test/testinput/modis_aod.hdf differ diff --git a/test/testoutput/caliop_l2_APro.nc4 b/test/testoutput/caliop_l2_APro.nc4 new file mode 100644 index 000000000..856aa3d62 --- /dev/null +++ b/test/testoutput/caliop_l2_APro.nc4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aab142bc88da1c716e877e12d7d2d5e60f5b3cfa7ccbe49c807f15da874dc1d +size 254120