Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
7ef280d
rebuild calipso level 2 extinction converter
weiwilliam Nov 11, 2024
cab01e7
updates
weiwilliam Nov 11, 2024
09a9f22
ready for further test with UFO
weiwilliam Nov 11, 2024
4138e47
update calipso_l2 converter
weiwilliam Dec 11, 2024
73af1a8
Merge branch 'develop' into feature/calipso_l2_ext
weiwilliam Mar 6, 2025
752313c
updates of level2 extinction converter
weiwilliam Mar 7, 2025
3d7f5f4
updates for qc flags handle
weiwilliam Mar 10, 2025
37ec5ae
calipso converter test
weiwilliam Mar 17, 2025
3e53d88
add CAD score and use larger QC value below 8.3km
weiwilliam Apr 1, 2025
833a0f1
updates on caliop l1 and l2 data
weiwilliam Apr 24, 2025
6eeb13c
Merge branch 'develop' into feature/caliop_converters
weiwilliam Jul 14, 2025
cb2dc1a
add converter for CALIOP level2 extinction coefficient in APro product
weiwilliam Jul 16, 2025
783fbb1
update CALIOP converter and the testoutput file
weiwilliam Jul 16, 2025
93e0a00
fix the output of frequency and dimension
weiwilliam Jul 17, 2025
9ed18d6
Merge branch 'develop' into feature/caliop_converters
weiwilliam Jul 17, 2025
c22cc7e
update Channels
weiwilliam Jul 24, 2025
4cca043
Merge remote-tracking branch 'refs/remotes/origin/feature/caliop_conv…
weiwilliam Jul 24, 2025
a14626d
change from 3d to 2d for ufo hofx output
weiwilliam Aug 11, 2025
794a3f6
Merge branch 'develop' into feature/caliop_converters
weiwilliam Aug 11, 2025
c4e2180
updates on caliop converters and testoutputs and remove the old files
weiwilliam Aug 13, 2025
070df6c
Merge remote-tracking branch 'refs/remotes/origin/feature/caliop_conv…
weiwilliam Aug 13, 2025
224034c
add layer thickness which derived from layer midpoint altitude
weiwilliam Aug 14, 2025
588b6a6
Merge branch 'develop' into feature/caliop_converters
weiwilliam Aug 21, 2025
2df61d5
move modis_aod.hdf and caliop_l2_APro.hdf to LFS
weiwilliam Aug 21, 2025
cce5d95
fix the converter and update the testoutput
weiwilliam Aug 21, 2025
7f6db34
update the date_range to cover the whole sample data
weiwilliam Aug 21, 2025
4791a02
Migrate ioda-converters to jedi-ci action (#1696)
eap Aug 31, 2025
a6ed1ac
Merge remote-tracking branch 'origin/develop' into feature/caliop_con…
weiwilliam Sep 23, 2025
32a83ee
store profiles indice into sequenceNumber
weiwilliam Sep 23, 2025
bfc6398
update caliop converter and its testoutput
weiwilliam Sep 29, 2025
dc0b494
Merge remote-tracking branch 'origin/develop' into feature/caliop_con…
weiwilliam Sep 29, 2025
7051d6d
Merge remote-tracking branch 'origin/develop' into feature/caliop_con…
weiwilliam Oct 1, 2025
1dff7a7
Merge branch 'develop' into feature/caliop_converters
weiwilliam Oct 6, 2025
2126895
Merge branch 'develop' into feature/caliop_converters
weiwilliam Oct 8, 2025
83a6637
Merge branch 'develop' into feature/caliop_converters
weiwilliam Oct 13, 2025
dc31a23
Merge branch 'develop' into feature/caliop_converters
weiwilliam Oct 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ test/testinput/bufr_tables/** filter=lfs diff=lfs merge=lfs -text
*.radiosonde filter=lfs diff=lfs merge=lfs -text
*.dump filter=lfs diff=lfs merge=lfs -text
*.prepbufr filter=lfs diff=lfs merge=lfs -text
*.hdf filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions src/compo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ list(APPEND programs
aeronet_aod2ioda.py
aeronet_aaod2ioda.py
airnow2ioda_nc.py
caliop_l2_ext2ioda.py
gcas_nc2ioda.py
icartt_nc2ioda.py
mls_o3_nc2ioda.py
Expand Down
316 changes: 316 additions & 0 deletions src/compo/caliop_l2_ext2ioda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
#!/usr/bin/env python3

#
# (C) Copyright 2025 UCAR
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
#

import argparse
from datetime import datetime, timedelta, timezone
import os, sys

from pyhdf.HDF import HDF
from pyhdf.VS import VS
from pyhdf.SD import SD, SDC
import numpy as np
import netCDF4 as nc

import pyiodaconv.ioda_conv_engines as iconv
from collections import defaultdict, OrderedDict
from pyiodaconv.orddicts import DefaultOrderedDict
from pyiodaconv.def_jedi_utils import compute_scan_angle
from pyiodaconv.def_jedi_utils import iso8601_string, epoch

os.environ["TZ"] = "UTC"

# globals
CALIPSO_WMO_sat_ID = 787

AttrData = {
'converter': os.path.basename(__file__),
"platformCommonName": "CALIPSO",
"platformLongDescription": "CALIPSO L2 Lidar Data",
}

metaKeyList = [
("latitude", "float", "degrees_north"),
("longitude", "float", "degrees_east"),
("dateTime", "long", iso8601_string),
("pressure", "float", "Pa"),
("sensorCentralWavelength", "float", "micron"),
("sensorCentralFrequency", "float", "Hz"),
("height", "float", "m"),
("atmosphereLayerThicknessZ", "float", "m"),
("cloudAerosolDiscriminationHigher", "integer", ""),
("cloudAerosolDiscriminationLower", "integer", ""),
("sequenceNumber", "integer", ""),
]

DimDict = {
}

VarDims = {
'extinctionCoefficient': ['Location', 'Channel'],
'pressure': ['Location'],
'height': ['Level'],
'atmosphereLayerThicknessZ': ['Level'],
'sensorCentralWavelength': ['Channel'],
'sensorCentralFrequency': ['Channel'],
'cloudAerosolDiscriminationHigher': ['Location'],
'cloudAerosolDiscriminationLower': ['Location'],
}

obsvars = ["extinctionCoefficient"]
channels = [1, 2]
wavelength = np.array([0.532, 1.064])
speed_light = 2.99792458E8
frequency = speed_light * 1.0E6 / wavelength

metaDataName = iconv.MetaDataName()
obsValName = iconv.OvalName()
obsErrName = iconv.OerrName()
qcName = iconv.OqcName()

varsKeyList = [('valKey', obsValName, 'float', 'longitude latitude height', "km-1"),
('errKey', obsErrName, 'float', 'longitude latitude height', "km-1"),
('qcKey', qcName, 'integer', 'longitude latitude height', None)]

float_missing_value = iconv.get_default_fill_val(np.float32)
double_missing_value = iconv.get_default_fill_val(np.float64)
int_missing_value = iconv.get_default_fill_val(np.int32)
long_missing_value = iconv.get_default_fill_val(np.int64)
string_missing_value = iconv.get_default_fill_val(np.str_)

missing_vals = {'string': string_missing_value,
'integer': int_missing_value,
'long': long_missing_value,
'float': float_missing_value,
'double': double_missing_value}


class caliop_l2ext(object):
def __init__(self, filenames, date_range):
self.filenames = filenames
wbeg = datetime.strptime(date_range[0], "%Y%m%d%H%M").replace(tzinfo=timezone.utc) - epoch
wend = datetime.strptime(date_range[1], "%Y%m%d%H%M").replace(tzinfo=timezone.utc) - epoch
self.wbeg = wbeg.total_seconds()
self.wend = wend.total_seconds()
self.varDict = defaultdict(lambda: defaultdict(dict))
self.outdata = defaultdict(lambda: DefaultOrderedDict(OrderedDict))
self.setDicts()
self._read()

def setDicts(self):
meta_keys = [m_item[0] for m_item in metaKeyList]
# Set units of the MetaData variables and all _FillValues.
self.varAttrs = DefaultOrderedDict(lambda: DefaultOrderedDict(dict))
for key in meta_keys:
dtypestr = metaKeyList[meta_keys.index(key)][1]
if metaKeyList[meta_keys.index(key)][2]:
self.varAttrs[(key, metaDataName)]['units'] = metaKeyList[meta_keys.index(key)][2]
self.varAttrs[(key, metaDataName)]['_FillValue'] = missing_vals[dtypestr]

var_keys = [v_item[0] for v_item in varsKeyList]
# set up variable names for IODA
for iodavar in obsvars:
for key in var_keys:
varGroupName = varsKeyList[var_keys.index(key)][1]
dtypestr = varsKeyList[var_keys.index(key)][2]
coord = varsKeyList[var_keys.index(key)][3]
self.varDict[iodavar][key] = iodavar, varGroupName
self.varAttrs[iodavar, varGroupName]['coordinates'] = coord
self.varAttrs[iodavar, varGroupName]['_FillValue'] = missing_vals[dtypestr]
if varsKeyList[var_keys.index(key)][4]:
self.varAttrs[iodavar, varGroupName]['units'] = varsKeyList[var_keys.index(key)][4]

def caliop_time2dt(self, time):
"""
Convert CALIOP Profile_UTC_Time to datetime.datetime object

Args:
time: list or array of Profile_UTC_Time from CALIOP file (float number: yymmdd.ffffffff)
"""
dtarr = [datetime.strptime(str(t)[:6], '%y%m%d') for t in time]
delta = [timedelta(frac) for frac in np.mod(time, 1)]
outarr = [(dt + dl).replace(tzinfo=timezone.utc) for dt, dl in zip(dtarr, delta)]
return outarr

def _read(self):
# default missing value in CALIPSO file
caliop_missing_value = -9999.
caliop_ref_time = datetime(1993, 1, 1, 0, 0, 0)
nchan = len(channels)
output_chidx = np.array(channels, dtype=np.int32) - 1

# Make empty lists for the output vars
self.outdata[('latitude', metaDataName)] = np.array([], dtype=np.float32)
self.outdata[('longitude', metaDataName)] = np.array([], dtype=np.float32)
self.outdata[('dateTime', metaDataName)] = np.array([], dtype=np.int64)
self.outdata[('pressure', metaDataName)] = np.array([], dtype=np.float32)
self.outdata[('sequenceNumber', metaDataName)] = np.array([], dtype=np.int32)
self.outdata[('cloudAerosolDiscriminationHigher', metaDataName)] = np.array([], dtype=np.int32)
self.outdata[('cloudAerosolDiscriminationLower', metaDataName)] = np.array([], dtype=np.int32)
for iodavar in obsvars:
self.outdata[self.varDict[iodavar]['valKey']] = np.array([], dtype=np.float32)
self.outdata[self.varDict[iodavar]['errKey']] = np.array([], dtype=np.float32)
self.outdata[self.varDict[iodavar]['qcKey']] = np.array([], dtype=np.int32)

# Get the lidar data altitude
tmpfile = self.filenames[0]
tmphdf = HDF(tmpfile)
vs = tmphdf.vstart()
metaid = vs.find('metadata')
vd = vs.attach(metaid)
vd.setfields('Lidar_Data_Altitudes')
alt = np.array(vd.read()[0][0]) * 1000.
nlev = alt.size
vd.detach()
vs.end()

# Calculate the thickness of LiDAR profile
thickness = np.empty_like(alt)
thickness[1:-1] = 0.5 * (alt[:-2] - alt[2:])
thickness[0] = alt[0] - alt[1]
thickness[-1] = alt[-2] - alt[-1]

# Adjust thickness near 20.2 km because it should be around 180m above and 60m below 20.2km
tmpidx = np.argmin(np.abs(alt-20200))
oldthick = thickness[tmpidx]
if alt[tmpidx] > 20200:
thickness[tmpidx] = thickness[tmpidx - 1]
thickness[tmpidx + 1] = thickness[tmpidx + 1] + np.abs(thickness[tmpidx] - oldthick)
else:
thickness[tmpidx] = thickness[tmpidx + 1]
thickness[tmpidx - 1] = thickness[tmpidx - 1] + np.abs(thickness[tmpidx] - oldthick)

prev_nloc = 0
for f in self.filenames:
sd = SD(f, SDC.READ)

pres = sd.select('Pressure').get().ravel() * 1e2 # hPa to Pa
lats = sd.select('Latitude').get()[:, 1]
lons = sd.select('Longitude').get()[:, 1]
proftime = sd.select('Profile_UTC_Time').get()[:, 1]
obs_time = np.array([(pt - epoch).total_seconds() for pt in self.caliop_time2dt(proftime)],
dtype=np.int64)

nloc = lats.size
profidx = np.arange(prev_nloc, prev_nloc + nloc)

lats = np.repeat(lats, nlev)
lons = np.repeat(lons, nlev)
profidx = np.repeat(profidx, nlev)
obs_time = np.repeat(obs_time, nlev)
winmsk = ((obs_time >= self.wbeg) & (obs_time <= self.wend))
prev_nloc += nloc

obs = np.zeros((nloc * nlev, nchan))
err = np.zeros_like(obs)
qcf = np.zeros_like(obs)
for i, chidx in enumerate(output_chidx):
wavelength_str = str(int(wavelength[chidx] * 1e3))
obsvarname = f"Extinction_Coefficient_{wavelength_str}"
errvarname = f"Extinction_Coefficient_Uncertainty_{wavelength_str}"
qcfvarname = f"Extinction_QC_Flag_{wavelength_str}"
# Level 2 QC flag stores 30m level 1 QC flag below 8.3 km in the rightmost dimension
# Based on Young et al. (2018): qc flag value 0, 1, 2, 16, and 18 should be used.
tmpqcf = sd.select(qcfvarname).get()
tmpqcf = tmpqcf.reshape(-1, tmpqcf.shape[2])
tmpqcf = np.where((tmpqcf[:, 0] == tmpqcf[:, 1]), tmpqcf[:, 0],
np.maximum(tmpqcf[:, 0], tmpqcf[:, 1]))
tmpqcf = np.where(np.isin(tmpqcf, [0, 1, 2, 16, 18]), 0, 1)

obs[:, i] = sd.select(obsvarname).get().ravel()
err[:, i] = sd.select(errvarname).get().ravel()
qcf[:, i] = tmpqcf

# Similar to QC_Flag, it stores higher and lower 30 meter layers' CAD score
cad1 = np.zeros_like(pres)
cad2 = np.zeros_like(pres)
tmpcad = sd.select("CAD_Score").get()
tmpcad = tmpcad.reshape(-1, tmpcad.shape[2])
cad1 = tmpcad[:, 0]
cad2 = tmpcad[:, 1]

obs = np.where((obs == caliop_missing_value), float_missing_value, obs)
err = np.where((err == caliop_missing_value), float_missing_value, err)
pres = np.where(pres < 0, float_missing_value, pres)

self.outdata[('latitude', metaDataName)] = np.append(self.outdata[('latitude', metaDataName)],
np.array(lats[winmsk], dtype=np.float32))
self.outdata[('longitude', metaDataName)] = np.append(self.outdata[('longitude', metaDataName)],
np.array(lons[winmsk], dtype=np.float32))
self.outdata[('dateTime', metaDataName)] = np.append(self.outdata[('dateTime', metaDataName)],
np.array(obs_time[winmsk], dtype=np.int64))
self.outdata[('pressure', metaDataName)] = np.append(self.outdata[('pressure', metaDataName)],
np.array(pres[winmsk], dtype=np.float32))
self.outdata[('sequenceNumber', metaDataName)] = np.append(self.outdata[('sequenceNumber', metaDataName)],
np.array(profidx[winmsk], dtype=np.int32))
self.outdata[('cloudAerosolDiscriminationHigher', metaDataName)] = np.append(
self.outdata[('cloudAerosolDiscriminationHigher', metaDataName)], np.array(cad1[winmsk], dtype=np.int32))
self.outdata[('cloudAerosolDiscriminationLower', metaDataName)] = np.append(
self.outdata[('cloudAerosolDiscriminationLower', metaDataName)], np.array(cad2[winmsk], dtype=np.int32))

for iodavar in obsvars:
self.outdata[self.varDict[iodavar]['valKey']] = np.append(
self.outdata[self.varDict[iodavar]['valKey']], np.array(obs[winmsk, :], dtype=np.float32))
self.outdata[self.varDict[iodavar]['errKey']] = np.append(
self.outdata[self.varDict[iodavar]['errKey']], np.array(err[winmsk, :], dtype=np.float32))
self.outdata[self.varDict[iodavar]['qcKey']] = np.append(
self.outdata[self.varDict[iodavar]['qcKey']], np.array(qcf[winmsk, :], dtype=np.int32))

sd.end()

self.outdata[('sensorCentralWavelength', metaDataName)] = np.array(wavelength, dtype=np.float32)[output_chidx]
self.outdata[('sensorCentralFrequency', metaDataName)] = np.array(frequency, dtype=np.float32)[output_chidx]
self.outdata[('height', metaDataName)] = np.array(alt, dtype=np.float32)
self.outdata[('atmosphereLayerThicknessZ', metaDataName)] = np.array(thickness, dtype=np.float32)

tmpseq = self.outdata[('sequenceNumber', metaDataName)]
self.outdata[('sequenceNumber', metaDataName)] = tmpseq - min(tmpseq)

DimDict['Location'] = len(self.outdata[('dateTime', metaDataName)])
DimDict['Channel'] = np.array(channels)
DimDict['Level'] = np.arange(nlev)


def main():
parser = argparse.ArgumentParser(
description=(
'Reads the CALIOP Level 2 aerosol profile data '
' convert into IODA formatted output files. '
' Multiple files are concatenated')
)

required = parser.add_argument_group(title='required arguments')
required.add_argument(
'-i', '--input',
help="path of CALIOP APro (HDF4) input file(s)",
type=str, nargs='+', required=True)
required.add_argument(
'-o', '--output',
help='path to output ioda file',
type=str, required=True)

optional = parser.add_argument_group(title='optional arguments')
optional.add_argument(
'--date_range',
help="extract a date range to fit the data assimilation window"
"format -r YYYYMMDDHHmm YYYYMMDDHHmm",
type=str, metavar=('begindate', 'enddate'), nargs=2,
default=('197001010000', '217001010000'))

args = parser.parse_args()

# Read CALIPSO extinction profile data
caliop_l2 = caliop_l2ext(args.input, args.date_range)

# write everything out
writer = iconv.IodaWriter(args.output, metaKeyList, DimDict)
writer.BuildIoda(caliop_l2.outdata, VarDims, caliop_l2.varAttrs, AttrData)


if __name__ == "__main__":
main()
14 changes: 14 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ list( APPEND test_input
testinput/balloonsonde_sample_20241001T00Z_PT15M.json
testinput/tec_groundBased_TENET_20201001T00Z.tec
testinput/COWVR_TSDR.020421.20240430T215845.20240430T230345.V1001.sample.h5
testinput/caliop_l2_APro.hdf
)

list( APPEND test_output
Expand Down Expand Up @@ -211,6 +212,7 @@ list( APPEND test_output
testoutput/obs.20241001T000000Z_PT15M_balloonsonde.nc4
testoutput/obs.20201001T000000Z_PT1M_tec_groundbased.nc4
testoutput/obs.20240430T220600Z_PT2M_cowvr_iss.nc4
testoutput/caliop_l2_APro.nc4
)

if( iodaconv_gnssro_ENABLED )
Expand Down Expand Up @@ -1540,6 +1542,18 @@ ecbuild_add_test( TARGET test_${PROJECT_NAME}_viirs_j1_l1b_albedo
--date_range 2021080503 2021080509"
viirs_j1_l1b_albedo_2021080506.nc ${IODA_CONV_COMP_TOL})

ecbuild_add_test( TARGET test_${PROJECT_NAME}_caliop_l2_ext
TYPE SCRIPT
ENVIRONMENT "PYTHONPATH=${IODACONV_PYTHONPATH}"
COMMAND bash
ARGS ${CMAKE_BINARY_DIR}/bin/iodaconv_comp.sh
netcdf
"${Python3_EXECUTABLE} ${CMAKE_BINARY_DIR}/bin/caliop_l2_ext2ioda.py
-i testinput/caliop_l2_APro.hdf
-o testrun/caliop_l2_APro.nc4
--date_range 201907220340 201907220350"
caliop_l2_APro.nc4 ${IODA_CONV_COMP_TOL})


#==============================================================================
# Bufr Ingester tests
Expand Down
3 changes: 3 additions & 0 deletions test/testinput/caliop_l2_APro.hdf
Git LFS file not shown
Binary file modified test/testinput/modis_aod.hdf
Binary file not shown.
3 changes: 3 additions & 0 deletions test/testoutput/caliop_l2_APro.nc4
Git LFS file not shown