diff --git a/.gitignore b/.gitignore index 15fb98778..19748567f 100644 --- a/.gitignore +++ b/.gitignore @@ -172,6 +172,8 @@ convert_L2b convert_goes_ABI_L1b MOD29E1D_to_obs hf_to_obs +arvor_to_obs +svp_to_obs # Test programs built by developer_tests rttov_test @@ -198,6 +200,8 @@ find_enclosing_indices_test find_first_occurrence_test nml_test parse_args_test +parse_csv_test +csv_read_test sort_test stacktest obs_rwtest diff --git a/assimilation_code/modules/utilities/read_csv_mod.f90 b/assimilation_code/modules/utilities/read_csv_mod.f90 new file mode 100644 index 000000000..eb15e705d --- /dev/null +++ b/assimilation_code/modules/utilities/read_csv_mod.f90 @@ -0,0 +1,598 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download +! +! Utility routines for reading simple ASCII, CSV-like tabular data. +! +! This module provides a lightweight interface to work with +! non-NetCDF data products. It supports files with a single +! header row and delimited fields (comma, semicolon, or whitespace). +! +! Some features: +! - Cached CSV file handle with header, delimiter, and row & column count. +! - Column-based access by field name (character, integer, real). +! - Automatic delimiter detection with optional override. +! +! Files are opened once using csv_open() and closed explicitly using +! csv_close(). Internally, the file is rewound as needed to reread data. +! +! Example usage: +! +! type(csv_file_type) :: cf +! real(r8), allocatable :: lat(:) +! +! call csv_open('input.csv', cf) +! allocate(lat(csv_get_nrows(cf))) +! call csv_get_field(cf, 'lat', lat) +! call csv_close(cf) + +module read_csv_mod + +use types_mod, only : r8 +use utilities_mod, only : error_handler, E_ERR, find_textfile_dims, & + open_file, close_file, to_upper, & + string_to_real, string_to_integer + +implicit none +private + +public :: csv_get_nrows, & + csv_get_field, & + csv_field_exists, & + csv_print_header, & + csv_get_field_index, & + csv_file_type, & + get_csv_words_from_string, & + csv_open, & + csv_close + +interface csv_get_field + module procedure csv_get_field_char + module procedure csv_get_field_int + module procedure csv_get_field_real +end interface csv_get_field + +character(len=*), parameter :: source = 'read_csv_mod.f90' +integer, parameter :: MAX_FIELDS_LEN = 15000 +integer, parameter :: MAX_NUM_FIELDS = 1000 + +! a csv file structure +type csv_file_type + private + character(len=256) :: filename = '' + integer :: nrows = 0 + integer :: ncols = 0 + integer :: iunit = -1 + character :: delim = ',' + character(len=512) :: fields(MAX_NUM_FIELDS) + logical :: is_open = .false. +end type csv_file_type + + +character(len=512) :: string1 + +contains + + +!-------------------------------------------------------------- +! Retrieve a string column using a cached handle. +subroutine csv_get_field_char(cf, varname, varvals, context) + +type(csv_file_type), intent(inout) :: cf +character(len=*), intent(in) :: varname +character(len=*), intent(out) :: varvals(:) +character(len=*), intent(in), optional :: context + +character(len=*), parameter :: routine = 'csv_get_field_char' + +integer :: fidx +integer :: io, iobs, nfields +character(len=MAX_FIELDS_LEN) :: line +character(len=512) :: entries(MAX_NUM_FIELDS) + + +if (.not. cf%is_open) then + string1 = 'CSV file handle has not been initialized.' + call error_handler(E_ERR, routine, string1, context) + return +endif + +! initialize return value +varvals(:) = '' + +! Locate field index in cached header +fidx = csv_find_field(cf, varname) +if (fidx < 1 .or. fidx > cf%ncols) then + string1 = 'Requested field "'//trim(varname)//'" not found in header for file "'// & + trim(cf%filename)//'"' + call error_handler(E_ERR, routine, string1, context) +endif + +! Skip header and start reading +call csv_skip_header(cf) + +! Read rows +if (size(varvals) /= cf%nrows) then + write(string1, '(A, I0, A, I0)') 'Size mismatch: varvals has ', size(varvals), & + ' entries but csv handle reports nrows = ', cf%nrows + call error_handler(E_ERR, routine, string1, context) +endif + +do iobs = 1, cf%nrows + read(cf%iunit, '(A)', iostat=io) line + if (io /= 0) then + write(string1, '(A, I0, A, I0)') 'Unexpected EOF or read error after ', iobs-1, & + ' data lines in "', trim(cf%filename)//'"' + call error_handler(E_ERR, routine, string1, context) + endif + + call get_csv_words_from_string(line, cf%delim, nfields, entries) + + varvals(iobs) = trim(entries(fidx)) +enddo + +end subroutine csv_get_field_char + + +!-------------------------------------------------------------- +! Read integer data from csv file +subroutine csv_get_field_int(cf, varname, varvals, context) + +type(csv_file_type), intent(inout) :: cf +character(len=*), intent(in) :: varname +integer, intent(out) :: varvals(:) +character(len=*), intent(in), optional :: context + +character(len=*), parameter :: routine = 'csv_get_field_int' + +integer :: i +character(len=512) :: strvals(size(varvals)) + +! Read it as string first +call csv_get_field_char(cf, varname, strvals, context) + +! Then convert to int. +! string_to_integer() returns MISSING_I if the number +! can't be converted. +do i = 1, size(varvals) + varvals(i) = string_to_integer(strvals(i)) +enddo + +end subroutine csv_get_field_int + + +!-------------------------------------------------------------- +! Read real data from csv file +subroutine csv_get_field_real(cf, varname, varvals, context) + +type(csv_file_type), intent(inout) :: cf +character(len=*), intent(in) :: varname +real(r8), intent(out) :: varvals(:) +character(len=*), intent(in), optional :: context + +character(len=*), parameter :: routine = 'csv_get_field_real' + +integer :: i +character(len=512) :: strvals(size(varvals)) + +call csv_get_field_char(cf, varname, strvals, context) + +! To real for all column data. +! string_to_real() returns MISSING_R8 if the number +! can't be converted. +do i = 1, size(varvals) + varvals(i) = string_to_real(strvals(i)) +enddo + +end subroutine csv_get_field_real + + +!---------------------------------------------------- +! Get number of rows in the csv file (without header line) +integer function csv_get_nrows_from_file(fname, context) result(nrows) + +character(len=*), intent(in) :: fname +character(len=*), intent(in), optional :: context + +character(len=*), parameter :: routine = 'csv_get_obs_num' + +! Number of data entries in file +call find_textfile_dims(fname, nrows) + +! Omit the header +nrows = nrows - 1 + +! Check if no data in file +if (nrows <= 0) then + write(string1, '(4A, i0)') 'Input CSV file "', trim(fname), '" contains ', & + 'no data. The number of data lines is ', nrows + call error_handler(E_ERR, routine, string1, context) +endif + +end function csv_get_nrows_from_file + + +!--------------------------------------------------- +! Find field index using cached header in csv_file_type. +! Returns -1 if field not found. +integer function csv_find_field(cf, key) result(idx) + +type(csv_file_type), intent(in) :: cf +character(len=*), intent(in) :: key + +integer :: i +character(len=64) :: field_name, field_key + +field_key = adjustl(trim(key)) +call to_upper(field_key) + +idx = -1 + +do i = 1, cf%ncols + field_name = adjustl(trim(cf%fields(i))) + call to_upper(field_name) + if (field_name == field_key) then + idx = i + return + endif +enddo + +end function csv_find_field + + +!------------------------------------------------- +! Detect the delimiter of a CSV file. One should expect a ',' +! but certain data files can come with a ';' so +! let's take care of both cases +! Delimiter detection is heuristic unless forced_delim is provided. +! Default is to return ','. +function detect_delim(line, forced_delim) result(delim) + +character(len=*), intent(in) :: line +character, intent(in), optional :: forced_delim + +character :: delim +integer :: i, nsemi, ncomma, L + +! Caller knows best +if (present(forced_delim)) then + delim = forced_delim + return +endif + +L = len_trim(line) + +nsemi = 0 +ncomma = 0 + +do i = 1, L + if (line(i:i) == ';') nsemi = nsemi + 1 + if (line(i:i) == ',') ncomma = ncomma + 1 +enddo + +! Is there a better check? +! Changed to return comma unless more semicolons are found. +! If equal, default to commas which is the more common format. +! (CSV == comma separated values) +if (nsemi > ncomma) then + delim = ';' +else + delim = ',' +endif + +end function detect_delim + + +!-------------------------------------------------------------- +! Help function to get field/column index of requested variable +integer function csv_get_field_index(cf, varname) result(idx) + +type(csv_file_type), intent(in) :: cf +character(len=*), intent(in) :: varname + +idx = csv_find_field(cf, varname) + +end function csv_get_field_index + + +!--------------------------------------------- +! Check if a field exist in the csv file +logical function csv_field_exists(cf, varname) result(found) + +type(csv_file_type), intent(in) :: cf +character(len=*), intent(in) :: varname + +integer :: idx + +idx = csv_find_field(cf, varname) +found = (idx > 0) + +end function csv_field_exists + + +!---------------------------------------- +! Print the fields names in the csv file +subroutine csv_print_header(cf) + +type(csv_file_type), intent(in) :: cf + +integer :: i + +write(*, '(2X, A, I0, A)') 'CSV Header Fields (', cf%ncols, ' columns):' +do i = 1, cf%ncols + print '(I5,2X,A)', i, trim(cf%fields(i)) +end do +end subroutine csv_print_header + + +!------------------------------------------- +! Rewind a file and skip its header +subroutine csv_skip_header(cf, context) + +type(csv_file_type), intent(inout) :: cf +character(len=*), intent(in), optional :: context + +integer :: io +character(len=MAX_FIELDS_LEN) :: line + +character(len=*), parameter :: routine = 'csv_skip_header' + +! We just need to skip the header, so make sure +! we're back at the top +rewind(cf%iunit, iostat=io) +if (io /= 0) then + write(string1,'(A,I0)') 'REWIND failed, io=', io + call error_handler(E_ERR, routine, string1, context) + return +endif + +read(cf%iunit, '(A)', iostat=io) line +if (io /= 0) then + write(string1,'(A,I0)') 'READ(header) failed, io=', io + call error_handler(E_ERR, routine, string1, context) + return +endif + +end subroutine csv_skip_header + + +!------------------------------------------- +! Accessor function to get number of rows +function csv_get_nrows(cf) result(ndata) + +type(csv_file_type), intent(in) :: cf +integer :: ndata + +ndata = cf%nrows + +end function csv_get_nrows + + +!------------------------------------------- +! Open a CSV handle: cache header/dims. +! By doing so, we won't need to open the file +! every time to read header or get dimensions. +subroutine csv_open(fname, cf, forced_delim, context) + +character(len=*), intent(in) :: fname +type(csv_file_type), intent(out) :: cf +character(len=*), intent(in), optional :: forced_delim +character(len=*), intent(in), optional :: context + +character(len=*), parameter :: routine = 'csv_open' + +integer :: io +character(len=MAX_FIELDS_LEN) :: line + +! Reset +cf%filename = fname +cf%nrows = 0 +cf%ncols = 0 +cf%iunit = -1 +cf%delim = ',' +cf%fields = '' +cf%is_open = .false. + +! Number of rows (excluding header) +cf%nrows = csv_get_nrows_from_file(fname, context) + +! Read header and delimiter +cf%iunit = open_file(fname, action='read', form='formatted') + +read(cf%iunit, '(A)', iostat=io) line +if (io /= 0) then + call csv_close(cf) + + write(string1, '(A, I0)') 'Got bad read code from input file, io = ', io + call error_handler(E_ERR, routine, string1, context) + return +endif + +! Can also enforce a specific delim as a second argument +cf%delim = detect_delim(line, forced_delim) + +call get_csv_words_from_string(line, cf%delim, cf%ncols, cf%fields) + +cf%is_open = .true. + +end subroutine csv_open + + +!--------------------------------------------------- +! Just invalidate/reset the csv handle. +subroutine csv_close(cf) + +type(csv_file_type), intent(inout) :: cf + +if (cf%iunit > 0) call close_file(cf%iunit) + +cf%filename = '' +cf%nrows = 0 +cf%ncols = 0 +cf%iunit = -1 +cf%delim = ',' +cf%fields = '' +cf%is_open = .false. + +end subroutine csv_close + + +!------------------------------------------------------------------------------ +! parse a single string up into delimeter-separated words +! +! This code is closely related to the parse routines +! in parse_args_mod.f90. It is customized for CSV rules +! using separator characters that are not blanks. + +subroutine get_csv_words_from_string(inline, delim, wordcount, words) + + character(len=*), intent(in) :: inline + character, intent(in) :: delim + integer, intent(out) :: wordcount + character(len=*), intent(out) :: words(:) + +! in all these offsets, they are relative to 1, left hand char in string: +! firstoff is offset to next delimiter character starting a word +! thisoff is offset to the current character +! finaloff is offset of the last non-delimiter character in the string +! inword is a logical which toggles when inside a word or not +! maxw are the max number of words, defined by what the caller passes in +! maxl is the max length of any one word, again defined by the size of the +! incoming array. + +integer :: firstoff, finaloff, thisoff +logical :: inword +integer :: maxw, maxl +integer :: wordlen, i + +character(len=len(inline)) :: wordline +character(len=512) :: msgstring, msgstring2 +character :: endword, thisc +character(len=*), parameter :: routine = 'get_csv_words_from_string' + + +! maxw is max number of 'words' allowed +! maxl is the max length of any one 'word' + +maxw = size(words) +maxl = len(words(1)) + +words = '' +wordcount = 0 + +finaloff = len_trim(inline) +if (finaloff <= 0) return + +wordline = inline + +firstoff = 1 +thisoff = 1 +inword = .true. +wordlen = 0 +endword = delim + +NEXTCHAR: do + ! end of input? + if (thisoff > finaloff) then + ! if currently in a word, complete it + if (inword) then + wordcount = wordcount + 1 + if (wordcount > maxw) exit NEXTCHAR + wordlen = thisoff-firstoff-1 + if (wordlen > maxl) exit NEXTCHAR + words(wordcount) = wordline(firstoff:firstoff+wordlen) + endif + exit NEXTCHAR + endif + + ! next character on line + thisc = wordline(thisoff:thisoff) + + ! this (escape by backslash) doesn't seem to be universially supported + ! by CSV files but i can't see that it hurts. + + ! escaped chars - backslash prevents interpretation of next char + if (thisc == '\') then + ! move the remainder of the string over, overwriting the \ and + ! skipping the next char. + do i=thisoff, finaloff-1 + wordline(i:i) = wordline(i+1:i+1) + enddo + wordline(finaloff:finaloff) = ' ' + finaloff = finaloff-1 + thisoff = thisoff+1 + cycle NEXTCHAR + endif + + ! transition into a word? this is slightly more complex than blank + ! separated words. in a CSV file, the delimiters separate fields, so + ! the first one doesn't start with one, and the last field doesn't end + ! with one. quotes can be used immediately after a delimiter to keep + ! field data together. the next char after a closing quote should be + ! the field delimieter. + + ! start of a delimiter-separated string. + ! unlike strings of blanks, you can't skip strings of consecutive delimiters + ! and the first and last fields aren't enclosed by delimiters. + if (.not. inword) then + if (thisc == delim) then + inword = .true. + thisoff = thisoff+1 ! skip delimeter + firstoff = thisoff ! first char of field + endword = thisc + else + write(msgstring, *) "error? not in word, next char not delimiter" + call error_handler(E_ERR,routine,msgstring,source) + endif + cycle NEXTCHAR + endif + + ! transition out of a word? + ! also, if the first character of a word is a quote, the + ! word continues until the closing quote. + if (inword) then + ! if first char of string is a quote, skip it and mark it as + ! the new delimiter + if ((thisoff == firstoff) .and. & + (thisc == '"' .or. thisc == "'")) then + endword = thisc + thisoff = thisoff+1 + firstoff = thisoff ! reset start of field + cycle NEXTCHAR + endif + ! if we come to a delimiter, check for quote and remove it + ! and reset the delimiter char + if (thisc == endword) then + inword = .false. + wordlen = thisoff-firstoff-1 + if (thisc == '"' .or. thisc == "'") then + endword = delim + thisoff = thisoff+1 ! skip quote + endif + wordcount = wordcount + 1 + if (wordcount > maxw) exit NEXTCHAR + if (wordlen > maxl) exit NEXTCHAR + words(wordcount) = wordline(firstoff:firstoff+wordlen) + cycle NEXTCHAR + endif + thisoff = thisoff + 1 ! normal case, word contents OR end of word, skip delimiter + cycle NEXTCHAR + endif + +enddo NEXTCHAR + +if (wordcount > maxw) then + write(msgstring,*) 'more delimeter-separated words than max number allowed by calling code, ', maxw + call error_handler(E_ERR,routine,msgstring,source) +endif + +if (wordlen > maxl) then + write(msgstring,*) 'one or more words longer than max length allowed by calling code, ', maxl + call error_handler(E_ERR,routine,msgstring,source) +endif + + +end subroutine get_csv_words_from_string + + +end module read_csv_mod diff --git a/assimilation_code/modules/utilities/read_csv_mod.rst b/assimilation_code/modules/utilities/read_csv_mod.rst new file mode 100644 index 000000000..4b47885ee --- /dev/null +++ b/assimilation_code/modules/utilities/read_csv_mod.rst @@ -0,0 +1,176 @@ +.. index :: csv + +MODULE read_csv_mod +=================== + +.. contents:: + :depth: 2 + :local: + +Overview +-------- + +The ``read_csv_mod`` module provides lightweight utility routines for reading +simple ASCII, CSV-like tabular data products. It is intended for use in DART +observation converters and preprocessing workflows that need column-based +access to non-NetCDF data without additional external dependencies. + +The module supports files with exactly one header row followed by data rows, +where fields are separated by a single-character delimiter (typically comma or +semicolon). + +Public types +------------ + +.. rubric:: ``type(csv_file_type)`` + +A handle that stores cached information about an open CSV file. All +components are private and must be accessed via module procedures. + +.. container:: routine + + :: + + type csv_file_type + private + + character(len=256) :: filename + integer :: nrows + integer :: ncols + integer :: iunit + character :: delim + character(len=512) :: fields(MAX_NUM_FIELDS) + logical :: is_open + end type csv_file_type + +Public interfaces and routines +------------------------------ + +.. rubric:: ``csv_open(fname, cf, forced_delim, context)`` + +Opens a delimited text file, reads and caches the header, determines the number +of data rows, and initializes the CSV handle. + +.. rubric:: ``csv_close(cf)`` + +Closes the file unit (if open) and resets the handle. + +.. rubric:: ``csv_get_nrows(cf)`` + +Returns the number of data rows in the file (excluding the header). + +.. rubric:: ``csv_get_field(cf, varname, varvals, context)`` + +Generic interface to read a column by header name. Dispatches to type-specific +implementations for character, integer, or real output arrays. + +.. note:: + + - The output array must be sized exactly to ``csv_get_nrows(cf)``. + - Header name matching is case-insensitive. + - The file is rewound and the header skipped internally for each call. + +.. rubric:: ``csv_get_field_index(cf, varname)`` + +Returns the 1-based column index of ``varname`` or -1 if the field is not found. + +.. rubric:: ``csv_field_exists(cf, varname)`` + +Returns ``.true.`` if ``varname`` exists in the cached header. + +.. rubric:: ``csv_print_header(cf)`` + +Prints the cached header fields and their indices to standard output. + +.. rubric:: ``get_csv_words_from_string(inline, delim, wordcount, words)`` + +Splits a single line into delimiter-separated fields using CSV-like parsing +rules. + +Typical usage +------------- + +A CSV file is opened using a CSV-type handle. Output arrays are then allocated +using the number of data rows in the file. Data are read one column at a time +as follows: + +.. code-block:: fortran + + use read_csv_mod, only : csv_file_type, csv_open, csv_close, csv_get_nrows, csv_get_field + use types_mod, only : r8 + + type(csv_file_type) :: cf + real(r8), allocatable :: lat(:) + + call csv_open('input.csv', cf) + + allocate(lat(csv_get_nrows(cf))) + call csv_get_field(cf, 'lat', lat) + + call csv_close(cf) + +What this module does +--------------------- + +- Reads delimited ASCII tables with: + + - one header row containing field names + - one data record per subsequent line + - a single-character delimiter + +- Opens the file once and caches metadata in a ``csv_file_type`` handle: + + - filename and Fortran unit number + - detected or user-specified delimiter + - header field names and number of columns + - number of data rows (excluding the header) + +- Provides column-based access by header name using the generic interface + ``csv_get_field()``, returning: + + - character strings (raw field contents) + - integers (via ``string_to_integer``) + - reals (via ``string_to_real``) + +- Allows repeated access to different columns without reopening the file. + Internally, the file is rewound and the header skipped for each read. + +- Handles numeric conversion failures non-fatally: + + - values that cannot be converted to integer are returned as ``MISSING_I`` + - values that cannot be converted to real are returned as ``MISSING_R8`` + +- Treats backslash (``\``) as an escape character, preventing interpretation of the following character during parsing. + +What this module does not do +---------------------------- + +- It does not implement the full CSV specification (e.g., RFC 4180). The parser + is intentionally simple and designed for common, well-behaved tabular files. + +- It does not support: + + - multiple header lines + - comment lines or metadata blocks + - embedded newlines inside quoted fields + +- It does not infer or mix data types within a single read. Each call to + ``csv_get_field()`` reads a *single column* into a single output type + (character, integer, or real). If a column contains mixed content + (e.g., numeric and non-numeric values), numeric conversion will produce + missing values for the non-conforming entries rather than raising an error. + +- It does not read multiple columns in a single pass. Each call to + ``csv_get_field()`` rewinds and scans the file again, which may be inefficient + when many columns are required. + +Delimiter behavior: forced vs detected +-------------------------------------- + +By default, ``csv_open()`` detects the delimiter from the header line using a +simple heuristic that distinguishes between comma (`,`) and semicolon (`;`) +characters. If semicolons occur more frequently than commas in the header, +the delimiter is assumed to be a semicolon; otherwise a comma is used. + +If the optional ``forced_delim`` argument is provided to ``csv_open()``, +delimiter detection is skipped and the specified delimiter is used instead. diff --git a/developer_tests/utilities/csv_read_test.f90 b/developer_tests/utilities/csv_read_test.f90 new file mode 100644 index 000000000..8a7e38ff5 --- /dev/null +++ b/developer_tests/utilities/csv_read_test.f90 @@ -0,0 +1,547 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download +! +! test cases for CSV read routines. +! +! creates test files, runs test, and deletes file. +! + + +program csv_read_test + +use types_mod, only : r8, MISSING_R8, MISSING_I +use utilities_mod, only : initialize_utilities, finalize_utilities, & + open_file, close_file +use mpi_utilities_mod, only : shell_execute +use read_csv_mod, only : csv_file_type, csv_get_field, & + csv_open, csv_close, csv_get_nrows + +use test ! fortran-testanything + +implicit none + + +character(len=512) :: string1, string2 +character(len=64) :: testname + +! File variables +integer :: io, iunit + +! if you want to keep the generated input files for further testing +! set this to true and the file deletes will not happen. +logical :: keep_testfiles = .false. + +character(len=512), allocatable :: dat(:) +character, parameter :: BACKSLASH = ACHAR(92) + +! csv file handle +type(csv_file_type) :: cf + +!------------------------------------------------------------------------ + +! start test +call initialize_utilities('csv_read_test', standalone_program=.true.) +call plan(46) + +testname = "basic case" +call basic_test("test1", testname) + +testname = "empty fields" +call empty_test("test2", testname) + +testname = "semicolon separated" +call diff_separator("test3", testname) + +testname = "blank test" +call blank_test("test5", testname) + +testname = "bad num columns" +call bad_cols("test7", testname) + +testname = "autodetect separators" +call sep_test("test8", testname) + +testname = "type mismatch" +call bad_type("test9", testname) + +testname = "mixed numerics" +call mixed("test10", testname) + + +! end test +call finalize_utilities() + + +contains + + +!----------------------------------------------- +! Basic test: +! Create a test file and close it. +! Open csv file and get data. +! 3 columns of data, nothing strange. + +subroutine basic_test(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit +character(len=256) :: nam(2) +integer :: tot(2) + +! create test file +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name,Date,Total" +write(iunit, "(A)") "Alice,12/1/22,200" +write(iunit, "(A)") "Bob,1/1/25,400" +call close_file(iunit) + + +! start test +! assume comma as separator +call csv_open(fname, cf, context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 2), trim(testname)//", number of rows") + +! Read the data +! the return data type is set by the type of the third argument. +! this one is a string. +call csv_get_field(cf, 'Name', nam, testname) +call ok((nam(1) == 'Alice'), trim(testname)//", read name 1") +call ok((nam(2) == 'Bob'), trim(testname)//", read name 2") + +! this one is an integer +call csv_get_field(cf, 'Total', tot, testname) +call ok((tot(1) == 200), trim(testname)//", read total 1") +call ok((tot(2) == 400), trim(testname)//", read total 2") + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine basic_test + +!----------------------------------------------- +! test for blank column, blanks in column headers + +subroutine empty_test(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit +character(len=256) :: nam(4) +integer :: tot(4) + + +! create the test file +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name,,Date,Total Days" +write(iunit, "(A)") "Alice,,12/1/2022,200" +write(iunit, "(A)") "Bob,,1/1/2025,400" +write(iunit, "(A)") "Carl,,12/1/2020,60" +write(iunit, "(A)") "David,,4/1/2021,160" +call close_file(iunit) + + +! run the test +call csv_open(fname, cf, context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 4), trim(testname)//", number of rows") + +! Read the data +call csv_get_field(cf, 'Name', nam, fname) +call ok((nam(2) == "Bob"), trim(testname)//", read name 2") +call ok((nam(4) == "David"), trim(testname)//", read name 4") + +call csv_get_field(cf, 'Total Days', tot, fname) +call ok((tot(1) == 200), trim(testname)//", read total 1") +call ok((tot(4) == 160), trim(testname)//", read total 4") + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine empty_test + +!----------------------------------------------- +! test different separator character +! and reals + +subroutine diff_separator(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit +character(len=256) :: nam(3), idents(3) +real(r8) :: tot(3) + +! create test file +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name;;Id;Total" +write(iunit, "(A)") "Alice;;12.1.2022;200.2" +write(iunit, "(A)") "Bob;;1,1,2,0,2,5;4.0e3" +write(iunit, "(A)") "Carl;;12 4 2020;60" +call close_file(iunit) + + +! run test +call csv_open(fname, cf, context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 3) , trim(testname)//", number of rows") + +! Read the data +call csv_get_field(cf, 'Name', nam, fname) +call ok((nam(1) == "Alice"), trim(testname)//", read name 1") +call ok((nam(2) == "Bob"), trim(testname)//", read name 2") +call ok((nam(3) == "Carl"), trim(testname)//", read name 3") + +call csv_get_field(cf, 'Total', tot, fname) +call ok((tot(1) == 200.2_r8), trim(testname)//", read total 1") +call ok((tot(2) == 4000_r8), trim(testname)//", read total 2") +call ok((tot(3) == 60._r8), trim(testname)//", read total 3") + +call csv_get_field(cf, 'Id', idents, fname) +call ok((idents(1) == '12.1.2022'), trim(testname)//", read ident 1") +call ok((idents(2) == '1,1,2,0,2,5'), trim(testname)//", read ident 2") +call ok((idents(3) == '12 4 2020'), trim(testname)//", read ident 3") + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine diff_separator + +!----------------------------------------------- +! try to retrieve a bad column name + +subroutine bad_column(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit +character(len=256) :: nam(2) +integer :: tot(2) + +! create test file +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name,Date,Total" +write(iunit, "(A)") "Alice,12/1/22,200" +write(iunit, "(A)") "Bob,1/1/25,400" +call close_file(iunit) + + +! start test +call csv_open(fname, cf, context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 2), trim(testname)//", number of rows") + +! try to read a non-existant column +call csv_get_field(cf, 'Age', nam, testname) + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine bad_column + +!----------------------------------------------- +! different separator and embedded blanks? + +subroutine blank_test(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit + +character(len=256) :: nam(2) +real(r8) :: tot(2) + + +! create file +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name: Date: Total" +write(iunit, "(A)") "Bob: 1/1/25: 400.04" +write(iunit, "(A)") "Alice: 12/1/22: 202.8" +call close_file(iunit) + + +! run test +call csv_open(fname, cf, forced_delim=':', context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 2), trim(testname)//", number of rows") + +! Read the data +call csv_get_field(cf, 'Name', nam, fname) +call ok((nam(1) == 'Bob'), trim(testname)//", read name 1") +call ok((nam(2) == 'Alice'), trim(testname)//", read name 2") + +call csv_get_field(cf, 'Total', tot, fname) +call ok((tot(1) == 400.04_r8), trim(testname)//", read total 1") +call ok((tot(2) == 202.8_r8), trim(testname)//", read total 2") + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine blank_test + + +!----------------------------------------------- +! test too short an array as input + +subroutine short_array(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit +character(len=256) :: nam(2) +integer :: tot(2) + +! create test file +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name,Total" +write(iunit, "(A)") "Alice,200" +write(iunit, "(A)") "Bob,400" +write(iunit, "(A)") "Carl,60" +write(iunit, "(A)") "David,160" +call close_file(iunit) + + +! start test +call csv_open(fname, cf, context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 4), trim(testname)//", number of rows") + +! try to read the data into an array that is too short. +! this should provoke an error message +call csv_get_field(cf, 'Name', nam, testname) + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine short_array + + +!----------------------------------------------- +! test badly formed input file - wrong number of columns +! in some of the lines + +subroutine bad_cols(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit +character(len=256) :: nam(4) +integer :: tot(4) + +! create test file +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name,Weekly,Total" +write(iunit, "(A)") "Alice,200,1000" ! right +write(iunit, "(A)") "Bob,400" ! no third number +write(iunit, "(A)") "60,Carl,,60" ! too many numbers +write(iunit, "(A)") "David" ! single value +call close_file(iunit) + + +! start test +call csv_open(fname, cf, context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 4), trim(testname)//", number of rows") + +! try to read the data into an array that is too short. +! string +call csv_get_field(cf, 'Name', nam, testname) +call ok((nam(1) == 'Alice'), trim(testname)//", read name 1") +call ok((nam(2) == 'Bob'), trim(testname)//", read name 2") + +! integer +call csv_get_field(cf, 'Total', tot, testname) +call ok((tot(1) == 1000), trim(testname)//", read total 1") +call ok((tot(2) == MISSING_R8), trim(testname)//", read total 2") +call ok((tot(3) == MISSING_R8), trim(testname)//", read total 3") +call ok((tot(4) == MISSING_R8), trim(testname)//", read total 4") + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine bad_cols + + +!----------------------------------------------- +! test wrong type passed in for a column + +subroutine bad_type(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit +character(len=256) :: nam(2) +integer :: tot(2) + +! create test file +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name,Total" +write(iunit, "(A)") "Alice,abc" +write(iunit, "(A)") "Bob,#$%" +call close_file(iunit) + + +! start test +call csv_open(fname, cf, context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 2), trim(testname)//", number of rows") + +! try to read a non-numeric string into an integer array +! integer +call csv_get_field(cf, 'Total', tot, testname) +call ok((tot(1) == MISSING_I), trim(testname)//", read total 1") +call ok((tot(2) == MISSING_I), trim(testname)//", read total 2") + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine bad_type + + +!----------------------------------------------- +! test automatic separator test + +subroutine sep_test(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit +character(len=256) :: nam(2) +integer :: tot(2) + +! create test file +! this header has 1 more comma than semicolon, so should +! detect comma is the separator. +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name;X,Day;,Week;,Month,Total" +write(iunit, "(A)") "Alice;T,1,2,3,6" +write(iunit, "(A)") "Bob;R,2,4,6,12" +call close_file(iunit) + + +! start test +call csv_open(fname, cf, context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 2), trim(testname)//", number of rows") + +! make sure columns are being counted correctly +call csv_get_field(cf, 'Total', tot, testname) +call ok((tot(1) == 6), trim(testname)//", read total 1") +call ok((tot(2) == 12), trim(testname)//", read total 2") + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine sep_test + + +!----------------------------------------------- +! test column with mixed types + +subroutine mixed(fname, testname) +character(len=*), intent(in) :: fname +character(len=*), intent(in) :: testname + +integer :: i, nrows +integer :: iunit +character(len=256) :: nam(5) +real(r8) :: tot(5) + +! create test file +iunit = open_file(fname, action="write") +write(iunit, "(A)") "Name,Total" +write(iunit, "(A)") "Alice,123.45" +write(iunit, "(A)") "Bob,abc" +write(iunit, "(A)") "Charles,333.33" +write(iunit, "(A)") "David,x1r2t3" +write(iunit, "(A)") "Edgar,123" +call close_file(iunit) + + +! start test +call csv_open(fname, cf, context=fname) + +nrows = csv_get_nrows(cf) +call ok((nrows == 5), trim(testname)//", number of rows") + +! try to read a mix of numbers and non-numeric strings into a real array + +call csv_get_field(cf, 'Total', tot, testname) +call ok((tot(1) == 123.45_r8), trim(testname)//", read total 1") +call ok((tot(2) == MISSING_R8), trim(testname)//", read total 2") +call ok((tot(3) == 333.33_r8), trim(testname)//", read total 3") +call ok((tot(4) == MISSING_R8), trim(testname)//", read total 4") +call ok((tot(5) == 123.0_r8), trim(testname)//", read total 5") + +call csv_close(cf) + + +! delete the test file +call delete_file(fname) + +end subroutine mixed + +!----------------------------------------------- +! clean up + +subroutine delete_file(fname) +character(len=*), intent(in) :: fname + +integer :: rc + +! do not delete test files if true +if (keep_testfiles) return + +rc = shell_execute(BACKSLASH//"rm "//trim(fname)) + +end subroutine delete_file + + +end program csv_read_test diff --git a/developer_tests/utilities/parse_csv_test.f90 b/developer_tests/utilities/parse_csv_test.f90 new file mode 100644 index 000000000..5617e07e0 --- /dev/null +++ b/developer_tests/utilities/parse_csv_test.f90 @@ -0,0 +1,84 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download +! + +! test parsing comma separated values (CSV) from a single input string. +! tests include missing columns, embedded blanks, quoted values, +! and alternative separators. + +program parse_csv_test + +use utilities_mod, only : initialize_utilities, finalize_utilities, & + open_file, close_file +use read_csv_mod, only : get_csv_words_from_string + +use test ! fortran-testanything + +implicit none + +integer :: nwords, i +character(len=512) :: nextline +character(len=64) :: words(100) +character(len=64) :: testname + +!---------------------------------------------------------------------- + +! main code here + +! initialize the dart libs +call initialize_utilities('parse_csv_test', standalone_program=.true.) +call plan(20) + +testname = "basic case" +nextline = "these,are,comma,separated,words,on,a,line" +call get_csv_words_from_string(nextline, ',', nwords, words) +call ok((nwords == 8), trim(testname)//", word count") +call ok((words(1) == "these"), trim(testname)//", word 1") +call ok((words(8) == "line"), trim(testname)//", word 8") + +testname = "empty column" +nextline = "one,,three,four" +call get_csv_words_from_string(nextline, ',', nwords, words) +call ok((nwords == 4), trim(testname)//", word count") +call ok((words(1) == "one"), trim(testname)//", word 1") +call ok((words(2) == ""), trim(testname)//", word 2") +call ok((words(3) == "three"), trim(testname)//", word 3") + +testname = "quotes" +nextline = '"quoted string1"'//",nonquote,'quote two','embedded_"//'"'//"_quote'" +call get_csv_words_from_string(nextline, ',', nwords, words) +call ok((nwords == 4), trim(testname)//", word count") +call ok((words(1) == "quoted string1"), trim(testname)//", word 1") +call ok((words(2) == "nonquote"), trim(testname)//", word 2") +call ok((words(3) == "quote two"), trim(testname)//", word 3") +call ok((words(4) == 'embedded_"_quote'), trim(testname)//", word 4") + +testname = "different separator" +nextline = "1;2;3" +call get_csv_words_from_string(nextline, ';', nwords, words) +call ok((nwords == 3), trim(testname)//", word count") +call ok((words(1) == "1"), trim(testname)//", word 1") +call ok((words(3) == "3"), trim(testname)//", word 3") + +testname = "single word" +nextline = "single_string" +call get_csv_words_from_string(nextline, ',', nwords, words) +call ok((nwords == 1), trim(testname)//", word count") +call ok((words(1) == "single_string"), trim(testname)//", word 1") + +testname = "embedded blanks" +nextline = "comma separated strings,with embedded blanks" +call get_csv_words_from_string(nextline, ',', nwords, words) +call ok((nwords == 2), trim(testname)//", word count") +call ok((words(1) == "comma separated strings"), trim(testname)//", word 1") +call ok((words(2) == "with embedded blanks"), trim(testname)//", word 2") + +call finalize_utilities() + +! end of main code + +!---------------------------------------------------------------------- + +end program + diff --git a/developer_tests/utilities/work/quickbuild.sh b/developer_tests/utilities/work/quickbuild.sh index 90492b48b..4731acbfe 100755 --- a/developer_tests/utilities/work/quickbuild.sh +++ b/developer_tests/utilities/work/quickbuild.sh @@ -18,12 +18,14 @@ TEST="utilities" serial_programs=( PrecisionCheck +csv_read_test error_handler_test file_utils_test find_enclosing_indices_test find_first_occurrence_test nml_test parse_args_test +parse_csv_test sort_test stacktest ) diff --git a/guide/available-observation-converters.rst b/guide/available-observation-converters.rst index ac74fea3e..cca437b41 100644 --- a/guide/available-observation-converters.rst +++ b/guide/available-observation-converters.rst @@ -8,6 +8,7 @@ into the format required by DART. Each directory has at least one converter: - ``AIRS``: :doc:`/observations/obs_converters/AIRS/README` +- ``ARVOR``: :doc:`../observations/obs_converters/ARVOR/readme` - ``AURA``: See ``DART/observations/obs_converters/AURA`` - ``Aviso+/CMEMS``: :doc:`../observations/obs_converters/AVISO/AVISO` - ``Ameriflux``: :doc:`../observations/obs_converters/Ameriflux/level4_to_obs` @@ -45,6 +46,7 @@ Each directory has at least one converter: - ``QuikSCAT``: :doc:`../observations/obs_converters/quikscat/QuikSCAT` - ``Radar``: :doc:`../observations/obs_converters/radar/README` - ``snow``: :doc:`../observations/obs_converters/snow/snow_to_obs` +- ``SVP``: :doc:`../observations/obs_converters/SVP/readme` - ``Text``: :doc:`../observations/obs_converters/text/text_to_obs` - ``text_GITM``: See ``DART/observations/obs_converters/text_GITM`` - ``tpw``: :doc:`../observations/obs_converters/tpw/tpw` diff --git a/index.rst b/index.rst index bf4596f72..7603dc8fa 100644 --- a/index.rst +++ b/index.rst @@ -329,6 +329,7 @@ References observations/obs_converters/AIRS/README observations/obs_converters/AIRS/convert_airs_L2 observations/obs_converters/AIRS/convert_amsu_L1 + observations/obs_converters/ARVOR/readme observations/obs_converters/AVISO/AVISO observations/obs_converters/Ameriflux/fluxnetfull_to_obs observations/obs_converters/Ameriflux/level4_to_obs @@ -370,6 +371,7 @@ References observations/obs_converters/obs_error/README observations/obs_converters/radar/README observations/obs_converters/snow/snow_to_obs + observations/obs_converters/SVP/readme observations/obs_converters/text/text_to_obs observations/obs_converters/tpw/tpw observations/obs_converters/tropical_cyclone/tc_to_obs @@ -528,6 +530,7 @@ References assimilation_code/programs/obs_common_subset/obs_common_subset assimilation_code/modules/utilities/ensemble_manager_mod assimilation_code/modules/utilities/random_seq_mod + assimilation_code/modules/utilities/read_csv_mod assimilation_code/modules/utilities/mpi_utilities_mod assimilation_code/modules/utilities/time_manager_mod assimilation_code/modules/utilities/utilities_mod diff --git a/observations/obs_converters/ARVOR/arvor_to_obs.f90 b/observations/obs_converters/ARVOR/arvor_to_obs.f90 new file mode 100644 index 000000000..f8ba4a148 --- /dev/null +++ b/observations/obs_converters/ARVOR/arvor_to_obs.f90 @@ -0,0 +1,386 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +! This converter supports in-situ ocean observations for the InaCAWO project. +! The types of data that can be ingested are: +! 1. ARVOR-C: Argo-type profiling floats (T/S profiles) +! 2. ARVOR-I: Argo-type (ice-capable) floats (T/S profiles) +! +! The raw data files come in ASCII format, containing +! a header with float metadata and multiple rows representing +! profile levels during a dive/ascent cycle +! +! The raw data comes in with several characteristics such as timestamp, +! instrument ID information, geographic location, cycle metadata (e.g., +! start time, mission settings, descent slices, parking depth, etc). +! The converter typically extracts: time, lat, lon, pressure/depth, +! temperature, and salinity. +! +! Note on units: +! temp in Kelvin: convert to C. +! pres in dbar: convert to depth (m, +down). + +program arvor_to_obs + +use types_mod, only : r8, t_kelvin, MISSING_R8 +use time_manager_mod, only : time_type, set_calendar_type, GREGORIAN, get_time, & + set_date, print_date, operator(+), operator(-) +use utilities_mod, only : initialize_utilities, find_namelist_in_file, & + nmlfileunit, error_handler, do_nml_term, E_ERR, & + finalize_utilities, do_nml_file, get_next_filename, & + find_textfile_dims, file_exist, E_MSG +use location_mod, only : VERTISHEIGHT +use obs_sequence_mod, only : obs_type, obs_sequence_type, init_obs, get_num_obs, & + static_init_obs_sequence, init_obs_sequence, & + set_copy_meta_data, set_qc_meta_data, write_obs_seq, & + destroy_obs_sequence, destroy_obs +use obs_utilities_mod, only : create_3d_obs, add_obs_to_seq +use obs_kind_mod, only : FLOAT_TEMPERATURE, FLOAT_SALINITY +use read_csv_mod, only : csv_file_type, csv_get_nrows, csv_get_field, & + csv_open, csv_close, csv_print_header + +implicit none + +character(len=*), parameter :: source = 'arvor_to_obs' + +character(len=512) :: string1, string2 + +! File variables +character(len=256) :: next_infile +integer :: io, iunit, filenum +integer :: num_new_obs, nfiles +integer :: num_valid_obs +logical :: from_list = .false., first_obs = .true. + +! Obs sequence +type(obs_sequence_type) :: obs_seq +type(obs_type) :: obs, prev_obs +type(time_type) :: prev_time +integer :: num_copies = 1, & ! number of copies in sequence + num_qc = 1 ! number of QC entries +real(r8) :: obs_qc + +! Data arrays +real(r8), allocatable :: lon(:), lat(:), pres(:) ! Location +real(r8), allocatable :: temp(:), saln(:) ! Physical observations + +character(len=512), allocatable :: dat(:) + +integer, dimension(2) :: float_types = [FLOAT_TEMPERATURE, FLOAT_SALINITY] +real(r8), dimension(2) :: float_errs + +! ARVOR profile +type arvor + type(time_type) :: dat + real(r8) :: lat + real(r8) :: lon + real(r8) :: dep + real(r8) :: tmp + real(r8) :: sal +end type arvor + +type(arvor), allocatable :: prof(:) + +! csv obs file +type(csv_file_type) :: cf + +!------------------------------------------------------------------------ +! Declare namelist parameters +character(len=256) :: file_in = '' +character(len=256) :: file_list = '' +character(len=256) :: file_out = 'obs_seq.arvor' +integer :: avg_obs_per_file = 500000 +real(r8) :: obs_error_temp = 0.02_r8 ! Profiling floats are highly accurate +real(r8) :: obs_error_sal = 0.02_r8 ! ARVOR CTDs are precise +logical :: debug = .true. + + +namelist /arvor_to_obs_nml/ file_in, & + file_list, & + obs_error_temp, & + obs_error_sal, & + file_out, & + avg_obs_per_file, & + debug + +! Start Converter +call initialize_utilities() + +! Read the namelist options +call find_namelist_in_file('input.nml', 'arvor_to_obs_nml', iunit) +read(iunit, nml = arvor_to_obs_nml, iostat = io) + +if (do_nml_file()) write(nmlfileunit, nml=arvor_to_obs_nml) +if (do_nml_term()) write( * , nml=arvor_to_obs_nml) + +! Set the calendar kind +call set_calendar_type(GREGORIAN) + +! Check the files +if (file_in /= '' .and. file_list /= '') then + string1 = 'One of input ARVOR file or the file list must be NULL' + call error_handler(E_ERR, source, string1) +endif +if (file_list /= '') from_list = .true. + +! Get number of observations +num_new_obs = avg_obs_per_file +if (from_list) then + call find_textfile_dims(file_list, nfiles) + num_new_obs = avg_obs_per_file * nfiles +endif + +! Assign obs errors +float_errs = [obs_error_temp, obs_error_sal] + +! Initialize obs seq file +call static_init_obs_sequence() +call init_obs(obs, num_copies, num_qc) +call init_obs(prev_obs, num_copies, num_qc) + +print * +if (file_exist(file_out)) then + write(*, '(A)') 'Output file: '//trim(adjustl(file_out))//' exists. Replacing it ...' +else + write(*, '(A)') 'Creating "'//trim(adjustl(file_out))//'" file.' +endif + +call init_obs_sequence(obs_seq, num_copies, num_qc, num_new_obs) +call set_copy_meta_data(obs_seq, num_copies, 'ARVOR observation') +call set_qc_meta_data(obs_seq, num_qc, 'ARVOR QC') + +! Loop over the obs files +filenum = 1 +obs_qc = 0.0_r8 + +FILELOOP: do + ! Get the data file + if (from_list) then + next_infile = get_next_filename(file_list, filenum) + else + next_infile = file_in + if (filenum > 1) next_infile = '' + endif + if (next_infile == '') exit FILELOOP + print * + if (debug) write(*, '(A, i0, X, A)') 'Input file: #', filenum, trim(next_infile) + + ! Read data from file + call get_profile_data(next_infile) + + ! Add obs to sequence + call fill_obs(obs, prev_obs, prev_time, obs_qc) + + call cleanup + + filenum = filenum + 1 +enddo FILELOOP + +call finish_obs() +call finalize_utilities() + + +contains + + +!----------------------------------------------- +! Read the data profile and the associated dates +subroutine get_profile_data(filename) + +character(len=*), intent(in) :: filename +character(len=*), parameter :: routine = 'get_profile_data' + +integer :: i, k, nobs + +! Open csv file and get dims +call csv_open(filename, cf, context=routine) +nobs = csv_get_nrows(cf) + +if (debug) call csv_print_header(cf) + +allocate(dat(nobs) , prof(nobs)) +allocate(lat(nobs) , lon(nobs), pres(nobs)) +allocate(temp(nobs), saln(nobs)) + +! Read the data +call csv_get_field(cf, 'lat', lat, routine) +call csv_get_field(cf, 'lon', lon, routine) +call csv_get_field(cf, 'sea_water_pressure', pres, routine) +call csv_get_field(cf, 'sea_water_temperature', temp, routine) +call csv_get_field(cf, 'salinity', saln, routine) +call csv_get_field(cf, 'dat', dat, routine) + +call csv_close(cf) + +! Data adjustments +k = 0 +do i = 1, nobs + if (lat(i) == MISSING_R8 .or. & + lon(i) == MISSING_R8 .or. & + pres(i) == MISSING_R8 .or. & + temp(i) == MISSING_R8 .or. & + saln(i) == MISSING_R8 ) cycle + k = k + 1 + + if (lon(i) < 0.0_r8) lon(i) = lon(i) + 360.0_r8 + + prof(k)%lon = lon(i) + prof(k)%lat = lat(i) + prof(k)%dat = parse_time(dat(i)) + prof(k)%dep = depth_from_pressure(pres(i), lat(i)) + prof(k)%tmp = temp(i) - t_kelvin + prof(k)%sal = saln(i) ! Salinity is in PSU; consistent with ROMS +enddo +num_valid_obs = k + +if (debug) then + write(*, '(A, i0)') ' >> Total number of obs: ', nobs + write(*, '(A, i0)') ' >> Valid observations count: ', num_valid_obs + + do i = 1, num_valid_obs + write(string1, '(5X, A, i3, 5(A,f10.4), A)') '* obs #', i, ', lat:', prof(i)%lat, & + ', lon:', prof(i)%lon, ', dep:', prof(i)%dep, & + ', T:' , prof(i)%tmp, ', S:' , prof(i)%sal, & + ', date: ' + call print_date(prof(i)%dat, str=string1) + enddo +endif + +end subroutine get_profile_data + + +!------------------------------------------------------ +! Given a profile, add the metadata in the obs sequence +subroutine fill_obs(obs, prev_obs, prev_time, oqc) + +type(obs_type), intent(inout) :: obs, prev_obs +type(time_type), intent(inout) :: prev_time +real(r8), intent(in) :: oqc + +type(time_type) :: odat +real(r8) :: olon, olat, odep, oval, oerr +integer :: otype, iobs, osec, oday, k + +! T, S +do k = 1, 2 + otype = float_types(k) + oerr = float_errs(k) + + do iobs = 1, num_valid_obs + olon = prof(iobs)%lon + olat = prof(iobs)%lat + odep = prof(iobs)%dep + odat = prof(iobs)%dat + + if (k == 1) oval = prof(iobs)%tmp + if (k == 2) oval = prof(iobs)%sal + + call get_time(odat, osec, oday) + call create_3d_obs(olat, olon, odep, VERTISHEIGHT, oval, otype, oerr, oday, osec, oqc, obs) + call add_obs_to_seq(obs_seq, obs, odat, prev_obs, prev_time, first_obs) + enddo +enddo + +end subroutine fill_obs + + +!------------------------------------------------------------ +! All collected obs (if any) are written in the seq file. +subroutine finish_obs() + +integer :: obs_num + +obs_num = get_num_obs(obs_seq) + +if (obs_num > 0) then + print * + if (debug) write(*, '(A, i0, A)') '> Ready to write ', obs_num, ' observations:' + + call write_obs_seq(obs_seq, file_out) + call destroy_obs(obs) + call destroy_obs_sequence(obs_seq) +else + string1 = 'No obs were converted.' + call error_handler(E_ERR, source, string1) +endif +call error_handler(E_MSG, source, 'Finished successfully.') + +end subroutine finish_obs + + +!----------------------------------------- +! Using the date string, set the DART time +function parse_time(datestr) result(obs_time) + +character(len=*), parameter :: routine = 'parse_time' + +character(len=*), intent(in) :: datestr +type(time_type) :: obs_time + +integer :: year, month, day, hour, minute, second + +!example: 2025-10-06T06:10:00Z + +read(datestr, '(i4, 5(1x,i2))', iostat=io) year, month, day, hour, minute, second +if (io /= 0) then + write(string1, *) 'Unable to read time variable. Error status was ', io + call error_handler(E_ERR, routine, string1, source) +endif + +obs_time = set_date(year, month, day, hour, minute, second) + +end function parse_time + + +!-------------------------------------------------------------- +! Convert sea water pressure to depth (+ve, downward) in meters +! p_dbar : pressure in decibar (dbar) +! lat_deg: latitude in degrees ([-90, 90]) +! +! Includes latitude-dependent gravity and the 1.092e-6*p compressibility term. +! Valid for typical oceanic ranges (0–12,000 dbar). +function depth_from_pressure(p_dbar, lat_deg) result(depth_m) + +character(len=*), parameter :: routine = 'depth_from_pressure' + +real(r8), intent(in) :: p_dbar +real(r8), intent(in) :: lat_deg +real(r8) :: depth_m +real(r8) :: x, sin2, sin4, g + +if (p_dbar < 0._r8 .or. abs(lat_deg) > 90._r8) then + write(string1, *) 'Cannot compute depth with input pressure: ', p_dbar + write(string2, *) 'and latitude: ', lat_deg + call error_handler(E_ERR, routine, string1, source, text2 = string2) +end if + +! Latitude-dependent gravity (m/s^2), Saunders (1981)/UNESCO +x = sin(lat_deg * (acos(-1._r8)/180._r8)) ! sin(phi) +sin2 = x*x +sin4 = sin2*sin2 +g = 9.780318_r8 * (1._r8 + 5.2788e-3_r8*sin2 + 2.36e-5_r8*sin4) + & + 1.092e-6_r8 * p_dbar + +! Polynomial (UNESCO 1983): depth (m) from pressure (dbar) +depth_m = (((-1.82e-15_r8*p_dbar + 2.279e-10_r8)*p_dbar - 2.2512e-5_r8)*p_dbar + 9.72659_r8) & + * p_dbar / g + +end function depth_from_pressure + + +!------------------------------------------------------------ +! Clear up memory +subroutine cleanup() + +if (allocated(prof)) deallocate(prof) +if (allocated(lon) ) deallocate(lon) +if (allocated(lat) ) deallocate(lat) +if (allocated(dat) ) deallocate(dat) +if (allocated(temp)) deallocate(temp) +if (allocated(saln)) deallocate(saln) +if (allocated(pres)) deallocate(pres) + +end subroutine cleanup + +end program arvor_to_obs diff --git a/observations/obs_converters/ARVOR/readme.rst b/observations/obs_converters/ARVOR/readme.rst new file mode 100644 index 000000000..19bc0ccec --- /dev/null +++ b/observations/obs_converters/ARVOR/readme.rst @@ -0,0 +1,137 @@ +ARVOR (Profiling Floats) +======================== + +This utility converts in-situ profiling float (T/S profile) ASCII files +into a DART observation sequence (`obs_seq`) file. + +.. contents:: + :depth: 2 + :local: + +Overview +-------- +Profiling floats such as ARVOR-C and ARVOR-I (ice-capable) record seawater temperature and +salinity at multiple depths during dives and ascents. +This converter ingests ASCII (.csv format) files of these profiles and +writes them into a DART obs_seq format for assimilation with DART. The +csv files are expected to have on the first line a header with +different fields names followed by data entries. + +The converter reads the following from the raw data: + * ``dat`` -- timestamp + * ``lat`` -- latitude in degrees + * ``lon`` -- longitude in degrees + * ``pres`` -- pressure in dbar converted to depth in meters + * ``sea_water_temperature`` -- temperature in Kelvin, converted to C. + * ``salinity`` -- salinity in psu + +.. note:: + + **Time handling:** The converter reads the ``dat`` column, parses the timestamp, and + constructs a DART ``time_type`` using the Gregorian calendar. The + expected time format is ``YYYY-MM-DDThh:mm:ssZ`` + +Build & Run +----------- +Build as usual with DART converters. Then run, ``./arvor_to_hf``, with an +`input.nml` that includes a namelist section that looks like: + +.. code-block:: fortran + + &arvor_to_obs_nml + file_in = 'ARVOR_20251006_300534063313500.csv', + file_list = '', ! path to list of files (or '') + file_out = 'obs_seq.arvor', + obs_error_temp = 0.02, ! temperature error standard deviation (C) + obs_error_sal = 0.02, ! salinity error standard deviation (PSU) + avg_obs_per_file = 500000, ! pre-allocation limit + debug = .true. + / + +* Exactly one of `file_in` or `file_list` must be non-empty. +* Temperature values in the raw file are in Kelvin and are converted to degrees Celsius. +* Pressure values (dbar) are converted to depth in meters (+ down). +* Any missing values or bad rows (`lat`, `lon`, `pres`, `temp`, or `saln` = missing) are skipped. +* Observation kinds: `FLOAT_TEMPERATURE` and `FLOAT_SALINITY`. + +Namelist Summary +**************** +.. list-table:: + :header-rows: 1 + :widths: 20 10 25 45 + + * - **Namelist item** + - **Type** + - **Default / Allowed values** + - **Description** + + * - ``file_in`` + - character(len=256) + - ``''`` (empty) + - Path to single ASCII profile file. + + * - ``file_list`` + - character(len=256) + - ``''`` (empty) + - Text file listing profile files. + + * - ``file_out`` + - character(len=256) + - ``'obs_seq.arvor'`` + - Output obs_seq file name. + + * - ``obs_error_temp`` + - real(r8) + - ``0.02`` + - Standard deviation error for temperature. + + * - ``obs_error_sal`` + - real(r8) + - ``0.02`` + - Standard deviation error for salinity. + + * - ``avg_obs_per_file`` + - integer + - ``500000`` + - Estimate of valid obs per file. Used for pre-allocation. Number of files times this number must be larger than the total number of output observations. + + * - ``debug`` + - logical + - ``.true.`` + - If true, prints detailed conversion log. + +Output +------ +A new observation sequence file named according to `file_out`. +Each observation record includes location (lat, lon, depth), time, and +one of the two kinds (temperature or salinity), with error variance +set to the square of `obs_error_temp` or `obs_error_sal`. + +If ``debug = .true.``, a summary will be printed (similar to the one below): + +.. code-block:: text + + Input file: #2 T_HCSV00_C_BMKG_20251006061000_ARVORC_300534063012970.csv + >> Total number of obs: 25 + >> Valid observations count: 24 + * obs # 1, lat: -5.7368, lon: 113.8890, dep: 65.0270, T: 28.6840, S: 33.9690, date: 2025 Oct 06 06:10:00 + * obs # 2, lat: -5.7368, lon: 113.8890, dep: 62.2036, T: 28.6840, S: 33.9680, date: 2025 Oct 06 06:11:05 + * obs # 3, lat: -5.7368, lon: 113.8890, dep: 59.3703, T: 28.6840, S: 33.9680, date: 2025 Oct 06 06:12:10 + * obs # 4, lat: -5.7368, lon: 113.8890, dep: 56.5468, T: 28.6840, S: 33.9680, date: 2025 Oct 06 06:13:15 + * obs # 5, lat: -5.7368, lon: 113.8890, dep: 53.7233, T: 28.6840, S: 33.9680, date: 2025 Oct 06 06:14:20 + * obs # 6, lat: -5.7368, lon: 113.8890, dep: 50.8898, T: 28.6840, S: 33.9680, date: 2025 Oct 06 06:15:25 + * obs # 7, lat: -5.7368, lon: 113.8890, dep: 48.0663, T: 28.6840, S: 33.9680, date: 2025 Oct 06 06:16:30 + ... + + > Ready to write 286 observations: + write_obs_seq opening formatted observation sequence file "obs_seq.arvor" + write_obs_seq closed observation sequence file "obs_seq.arvor" + arvor_to_obs Finished successfully. + +Further reading +--------------- +For background on the observing platform supported by this converter: + +* `Argo Program: `_ *"Float types"* -- Official overview of the various Argo float families, including ARVOR, PROVOR, and DEEP-Arvor. +* `Le Reste et al. (2016): `_ *"Deep-Arvor: A new profiling float to extend the Argo observations to 4000 m"* -- Detailed design and performance study of the deep variant of the ARVOR float. +* `NKE Instrumentation: `_ *"Profiling floats"* -- Manufacturer specifications for the ARVOR and PROVOR families (pressure range, sensors, telemetry). diff --git a/observations/obs_converters/ARVOR/work/input.nml b/observations/obs_converters/ARVOR/work/input.nml new file mode 100644 index 000000000..fe752f325 --- /dev/null +++ b/observations/obs_converters/ARVOR/work/input.nml @@ -0,0 +1,41 @@ +&preprocess_nml + overwrite_output = .true. + input_obs_qty_mod_file = '../../../../assimilation_code/modules/observations/DEFAULT_obs_kind_mod.F90' + output_obs_qty_mod_file = '../../../../assimilation_code/modules/observations/obs_kind_mod.f90' + input_obs_def_mod_file = '../../../forward_operators/DEFAULT_obs_def_mod.F90' + output_obs_def_mod_file = '../../../forward_operators/obs_def_mod.f90' + obs_type_files = '../../../forward_operators/obs_def_ocean_mod.f90' + quantity_files = '../../../../assimilation_code/modules/observations/ocean_quantities_mod.f90' + / + +&arvor_to_obs_nml + file_in = '' + file_list = 'obs_files.txt' + file_out = 'obs_seq.arvor' + debug = .true. + / + +&obs_sequence_nml + write_binary_obs_sequence = .false. + / + +&utilities_nml + module_details = .false. + / + +&obs_kind_nml + / + +&location_nml + / + +&obs_sequence_tool_nml + filename_seq = '' + filename_out = '' + print_only = .false. + gregorian_cal = .true. + first_obs_days = 150113 + first_obs_seconds = 0 + last_obs_days = 150114 + last_obs_seconds = 86399 + / diff --git a/observations/obs_converters/ARVOR/work/quickbuild.sh b/observations/obs_converters/ARVOR/work/quickbuild.sh new file mode 100755 index 000000000..673fb9177 --- /dev/null +++ b/observations/obs_converters/ARVOR/work/quickbuild.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +# DART software - Copyright UCAR. This open source software is provided +# by UCAR, "as is", without charge, subject to all terms of use at +# http://www.image.ucar.edu/DAReS/DART/DART_download + +main() { + +export DART=$(git rev-parse --show-toplevel) +source "$DART"/build_templates/buildconvfunctions.sh + +CONVERTER=ARVOR +LOCATION=threed_sphere + +programs=( +advance_time +arvor_to_obs +obs_seq_to_netcdf +obs_sequence_tool +) + +# build arguments +arguments "$@" + +# clean the directory +\rm -f -- *.o *.mod Makefile .cppdefs + +# build and run preprocess before making any other DART executables +buildpreprocess + +# build +buildconv + + +# clean up +\rm -f -- *.o *.mod + +} + +main "$@" diff --git a/observations/obs_converters/README.rst b/observations/obs_converters/README.rst index 0fce1aec6..108345834 100644 --- a/observations/obs_converters/README.rst +++ b/observations/obs_converters/README.rst @@ -360,6 +360,7 @@ The current list of converters (some directories contain multiple converters) include: - ``AIRS``: :doc:`./AIRS/README` +- ``ARVOR``: :doc:`./ARVOR/readme` - ``AURA``: See ``./AURA`` - ``Aviso+/CMEMS``: :doc:`./AVISO/AVISO` - ``Ameriflux``: :doc:`./Ameriflux/level4_to_obs` @@ -400,6 +401,7 @@ converters) include: - ``QuikSCAT``: :doc:`./quikscat/QuikSCAT` - ``Radar``: :doc:`./radar/README` - ``snow``: :doc:`./snow/snow_to_obs` +- ``SVP``: :doc:`./SVP/readme` - ``Text``: :doc:`./text/text_to_obs` - ``text_GITM``: See ``./text_GITM`` - ``tpw``: :doc:`./tpw/tpw` diff --git a/observations/obs_converters/SVP/readme.rst b/observations/obs_converters/SVP/readme.rst new file mode 100644 index 000000000..e4e7255ad --- /dev/null +++ b/observations/obs_converters/SVP/readme.rst @@ -0,0 +1,149 @@ +=== +SVP +=== +This utility converts **Surface Velocity Program (SVP)** drifter data +(SST + surface currents) into a DART observation sequence (``obs_seq``) file. + +.. contents:: + :depth: 2 + :local: + +Overview +-------- +SVP drifters measure near-surface ocean temperature and currents. For the +InaCAWO project, BMKG/Baron provide ASCII (CSV-like) files with: + +- Header line of field names +- One row per surface observation time + +The converter reads: + - ``dat`` – timestamp + - ``lat`` – latitude (degrees) + - ``lon`` – longitude (degrees) + - ``sea_temperature`` – SST (Kelvin; converted to C) + - ``u`` / ``v`` – eastward / northward surface velocity (m/s) + +Each valid row becomes up to three DART observations at the surface +(``VERTISSURFACE``): ``DRIFTER_TEMPERATURE``, ``DRIFTER_U_CURRENT_COMPONENT``, +``DRIFTER_V_CURRENT_COMPONENT`` + +.. note:: + + **Time handling:** The converter reads the ``dat`` column, parses the timestamp, and + constructs a DART ``time_type`` in the Gregorian calendar. + Expected format: ``YYYY-MM-DDThh:mm:ssZ`` + +Build & run +----------- +Build like other DART converters, then run with an ``input.nml`` that includes +``svp_to_obs_nml``: + +.. code-block:: bash + + ./svp_to_obs + +The program writes a single output sequence defined by ``file_out``. + +Namelist +******** +This namelist is added to ``input.nml``: + +.. code-block:: fortran + + &svp_to_obs_nml + file_in = '' ! single ASCII file (or '') + file_list = '' ! text file of paths (or '') + file_out = 'obs_seq.svp', ! output obs_seq file + obs_error_sst = 0.20_r8, ! SST error (C) + obs_error_vel = 0.10_r8, ! U/V error (m/s) + avg_obs_per_file = 500000, ! pre-allocation limit + debug = .true. + / + +.. list-table:: + :header-rows: 1 + :widths: 22 12 23 43 + + * - **Namelist item** + - **Type** + - **Default** + - **Description** + + * - ``file_in`` + - character(len=256) + - ``''`` (empty) + - Path to a single SVP ASCII input file. Mutually exclusive with ``file_list``. + + * - ``file_list`` + - character(len=256) + - ``''`` (empty) + - Text file listing SVP ASCII files, one per line. Use instead of ``file_in`` for batches. + + * - ``file_out`` + - character(len=256) + - ``'obs_seq.svp'`` + - Name of the DART output observation sequence file. Overwritten if it exists. + + * - ``obs_error_sst`` + - real(r8) + - ``0.20_r8`` + - Observation error standard deviation for SST (in °C). + + * - ``obs_error_vel`` + - real(r8) + - ``0.10_r8`` + - Observation error standard deviation for U and V (in m/s). + + * - ``avg_obs_per_file`` + - integer + - ``500000`` + - Estimated number of valid observations per input file. Used only for pre-allocation. Number of files times this number must be larger than the total number of output observations. + + * - ``debug`` + - logical + - ``.true.`` + - If true, prints detailed information for each file and observation. + +Output +------ +A new observation sequence file named according to `file_out`. +Each observation record includes location (lat, lon), time, and +one of the three kinds (SST, U, or V), with error variance +set to the square of `obs_error_sst` or `obs_error_vel`. + +With ``debug = .true.``, the converter prints a summary of accepted/filtered +observations for each input file. + +.. code-block:: text + + Output file: obs_seq.svp exists. Replacing it ... + + Input file: #1 T_HCSV00_C_BMKG_20251006000000_SVP_300534064005670.csv + * lat: 0.4730, lon: 43.3212, SST: 26.1200, U: -0.0062, V: -0.0061, date: 2025 Oct 06 00:00:00 + + Input file: #2 T_HCSV00_C_BMKG_20251006010000_SVP_300534064005670.csv + * lat: 0.4732, lon: 43.3212, SST: 26.0100, U: 0.0000, V: 0.0061, date: 2025 Oct 06 01:00:00 + + Input file: #3 T_HCSV00_C_BMKG_20251006020000_SVP_300534064005670.csv + * lat: 0.4730, lon: 43.3214, SST: 26.0400, U: 0.0062, V: -0.0061, date: 2025 Oct 06 02:00:00 + + Input file: #4 T_HCSV00_C_BMKG_20251006030000_SVP_300534064005670.csv + * lat: 0.4732, lon: 43.3214, SST: 26.1400, U: 0.0000, V: 0.0061, date: 2025 Oct 06 03:00:00 + + Input file: #5 T_HCSV00_C_BMKG_20251006040000_SVP_300534064005670.csv + * lat: 0.4730, lon: 43.3214, SST: 26.3300, U: 0.0000, V: -0.0061, date: 2025 Oct 06 04:00:00 + + ... + + > Ready to write 72 observations: + write_obs_seq opening formatted observation sequence file "obs_seq.svp" + write_obs_seq closed observation sequence file "obs_seq.svp" + svp_to_obs Finished successfully. + +Further reading +--------------- +For background on the observing platform supported by this converter: + +* `Global Drifter Program (GDP): `_ *"SVP drifter overview"* -- NOAA and Scripps-maintained description of the standard SVP and SVP-B drifters used globally. +* `Lumpkin & Pazos (2007): `_ *"Measuring surface currents with Surface Velocity Program drifters"* -- Classic technical paper detailing instrument design, calibration, and data characteristics. +* `Ocean Observers: `_ *"Drifting buoys (DBCP)"* -- General introduction to the international drifter network and its coordination under WMO/IOC. diff --git a/observations/obs_converters/SVP/svp_to_obs.f90 b/observations/obs_converters/SVP/svp_to_obs.f90 new file mode 100644 index 000000000..e7c0572d7 --- /dev/null +++ b/observations/obs_converters/SVP/svp_to_obs.f90 @@ -0,0 +1,351 @@ +! DART software - Copyright UCAR. This open source software is provided +! by UCAR, "as is", without charge, subject to all terms of use at +! http://www.image.ucar.edu/DAReS/DART/DART_download + +! This converter supports in-situ ocean observations for the InaCAWO project. +! SVP: Surface Velocity Program difters (surface currents + SST) +! +! The raw data files come in ASCII format, containing +! a header with float metadata and multiple rows representing +! surface ocean conditions at different times. +! +! The raw data comes in with several characteristics such as timestamp, +! instrument ID information, geographic location, cycle metadata (e.g., +! start time, mission settings, descent slices, parking depth, etc). +! The converter typically extracts: time, lat, lon, SST, and +! surface currents. +! +! Note on units: +! SST in Kelvin: convert to C. +! U and V is in m/s. + +program svp_to_obs + +use types_mod, only : r8, t_kelvin, MISSING_R8 +use time_manager_mod, only : time_type, set_calendar_type, GREGORIAN, get_time, & + set_date, print_date, operator(+), operator(-) +use utilities_mod, only : initialize_utilities, find_namelist_in_file, & + nmlfileunit, error_handler, do_nml_term, E_ERR, & + finalize_utilities, do_nml_file, get_next_filename, & + find_textfile_dims, file_exist, E_MSG +use location_mod, only : VERTISSURFACE +use obs_sequence_mod, only : obs_type, obs_sequence_type, init_obs, get_num_obs, & + static_init_obs_sequence, init_obs_sequence, & + set_copy_meta_data, set_qc_meta_data, write_obs_seq, & + destroy_obs_sequence, destroy_obs +use obs_utilities_mod, only : create_3d_obs, add_obs_to_seq +use obs_kind_mod, only : DRIFTER_U_CURRENT_COMPONENT, DRIFTER_TEMPERATURE, & + DRIFTER_V_CURRENT_COMPONENT +use read_csv_mod, only : csv_file_type, csv_get_nrows, csv_get_field, & + csv_open, csv_close, csv_print_header + +implicit none + +character(len=*), parameter :: source = 'svp_to_obs' + +character(len=512) :: string1 + +! File variables +character(len=256) :: next_infile +integer :: io, iunit, filenum +integer :: num_new_obs, nfiles +integer :: num_valid_obs +logical :: from_list = .false., first_obs = .true. + +! Obs sequence +type(obs_sequence_type) :: obs_seq +type(obs_type) :: obs, prev_obs +type(time_type) :: prev_time +integer :: num_copies = 1, & ! number of copies in sequence + num_qc = 1 ! number of QC entries +real(r8) :: obs_qc + +! Data arrays +real(r8), allocatable :: lon(:), lat(:) ! Location +real(r8), allocatable :: sst(:), uvel(:), vvel(:) ! Physical observations + +character(len=512), allocatable :: dat(:) + +integer, dimension(3) :: drifter_types = [DRIFTER_TEMPERATURE, & + DRIFTER_U_CURRENT_COMPONENT, & + DRIFTER_V_CURRENT_COMPONENT] +real(r8), dimension(3) :: drifter_errs + +! SVP data structure +type svp + type(time_type) :: dat + real(r8) :: lat + real(r8) :: lon + real(r8) :: sst + real(r8) :: u + real(r8) :: v +end type svp + +type(svp), allocatable :: surf(:) + +! csv obs file +type(csv_file_type) :: cf + +!------------------------------------------------------------------------ +! Declare namelist parameters +character(len=256) :: file_in = '' +character(len=256) :: file_list = '' +character(len=256) :: file_out = 'obs_seq.svp' +integer :: avg_obs_per_file = 500000 +real(r8) :: obs_error_vel = 0.10_r8 ! May vary locally +real(r8) :: obs_error_sst = 0.20_r8 ! Derived from small sensors +logical :: debug = .true. + + +namelist /svp_to_obs_nml/ file_in, & + file_list, & + obs_error_sst, & + obs_error_vel, & + file_out, & + avg_obs_per_file, & + debug + +! Start Converter +call initialize_utilities() + +! Read the namelist options +call find_namelist_in_file('input.nml', 'svp_to_obs_nml', iunit) +read(iunit, nml = svp_to_obs_nml, iostat = io) + +if (do_nml_file()) write(nmlfileunit, nml=svp_to_obs_nml) +if (do_nml_term()) write( * , nml=svp_to_obs_nml) + +! Set the calendar kind +call set_calendar_type(GREGORIAN) + +! Check the files +if (file_in /= '' .and. file_list /= '') then + string1 = 'One of input SVP file or the file list must be NULL' + call error_handler(E_ERR, source, string1) +endif +if (file_list /= '') from_list = .true. + +! Get number of observations +num_new_obs = avg_obs_per_file +if (from_list) then + call find_textfile_dims(file_list, nfiles) + num_new_obs = avg_obs_per_file * nfiles +endif + +! Assign obs errors +drifter_errs = [obs_error_sst, obs_error_vel, obs_error_vel] + +! Initialize obs seq file +call static_init_obs_sequence() +call init_obs(obs, num_copies, num_qc) +call init_obs(prev_obs, num_copies, num_qc) + +print * +if (file_exist(file_out)) then + write(*, '(A)') 'Output file: '//trim(adjustl(file_out))//' exists. Replacing it ...' +else + write(*, '(A)') 'Creating "'//trim(adjustl(file_out))//'" file.' +endif + +call init_obs_sequence(obs_seq, num_copies, num_qc, num_new_obs) +call set_copy_meta_data(obs_seq, num_copies, 'SVP observation') +call set_qc_meta_data(obs_seq, num_qc, 'SVP QC') + +! Loop over the obs files +filenum = 1 +obs_qc = 0.0_r8 + +FILELOOP: do + ! Get the data file + if (from_list) then + next_infile = get_next_filename(file_list, filenum) + else + next_infile = file_in + if (filenum > 1) next_infile = '' + endif + if (next_infile == '') exit FILELOOP + print * + if (debug) write(*, '(A, i0, X, A)') 'Input file: #', filenum, trim(next_infile) + + ! Read data from file + call get_surface_data(next_infile) + + ! Add obs to sequence + call fill_obs(obs, prev_obs, prev_time, obs_qc) + + call cleanup + + filenum = filenum + 1 +enddo FILELOOP + +call finish_obs() +call finalize_utilities() + + +contains + + +!----------------------------------------------- +! Read the data and the associated dates +subroutine get_surface_data(filename) + +character(len=*), intent(in) :: filename +character(len=*), parameter :: routine = 'get_surface_data' + +integer :: i, k, nobs + +! Open csv file and get dims +call csv_open(filename, cf, context=routine) +nobs = csv_get_nrows(cf) + +if (debug) call csv_print_header(cf) + +allocate(dat(nobs), surf(nobs)) +allocate(lat(nobs), lon(nobs)) +allocate(sst(nobs), uvel(nobs), vvel(nobs)) + +! Read the data +call csv_get_field(cf, 'dat', dat, routine) +call csv_get_field(cf, 'lat', lat, routine) +call csv_get_field(cf, 'lon', lon, routine) +call csv_get_field(cf, 'sea_temperature', sst, routine) +call csv_get_field(cf, 'u', uvel, routine) +call csv_get_field(cf, 'v', vvel, routine) + +call csv_close(cf) + +! Data adjustments +k = 0 +do i = 1, nobs + if (lat(i) == MISSING_R8 .or. & + lon(i) == MISSING_R8 .or. & + sst(i) == MISSING_R8 .or. & + uvel(i) == MISSING_R8 .or. & + vvel(i) == MISSING_R8 ) cycle + k = k + 1 + + if (lon(i) < 0.0_r8) lon(i) = lon(i) + 360.0_r8 + + surf(k)%lon = lon(i) + surf(k)%lat = lat(i) + surf(k)%dat = parse_time(dat(i)) + surf(k)%sst = sst(i) - t_kelvin + surf(k)%u = uvel(i) + surf(k)%v = vvel(i) +enddo +num_valid_obs = k + +if (debug) then + do i = 1, num_valid_obs + write(string1, '(5X, A, i3, 5(A,f10.4), A)') '* obs #', i, ', lat:', surf(i)%lat, & + ', lon:', surf(i)%lon, ', SST:', surf(i)%sst, & + ', U :', surf(i)%u , ', V :', surf(i)%v , & + ', date: ' + call print_date(surf(i)%dat, str=string1) + enddo +endif + +end subroutine get_surface_data + + +!------------------------------------------------------ +! Add the metadata in the obs sequence +subroutine fill_obs(obs, prev_obs, prev_time, oqc) + +type(obs_type), intent(inout) :: obs, prev_obs +type(time_type), intent(inout) :: prev_time +real(r8), intent(in) :: oqc + +type(time_type) :: odat +real(r8) :: olon, olat, odep, oval, oerr +integer :: otype, iobs, osec, oday, k + +! Surface drifter data +odep = 0.0_r8 + +! SST, U, V +do k = 1, 3 + otype = drifter_types(k) + oerr = drifter_errs(k) + + do iobs = 1, num_valid_obs + olon = surf(iobs)%lon + olat = surf(iobs)%lat + odat = surf(iobs)%dat + + if (k == 1) oval = surf(iobs)%sst + if (k == 2) oval = surf(iobs)%u + if (k == 3) oval = surf(iobs)%v + + call get_time(odat, osec, oday) + call create_3d_obs(olat, olon, odep, VERTISSURFACE, oval, otype, oerr, oday, osec, oqc, obs) + call add_obs_to_seq(obs_seq, obs, odat, prev_obs, prev_time, first_obs) + enddo +enddo + +end subroutine fill_obs + + +!------------------------------------------------------------ +! All collected obs (if any) are written in the seq file. +subroutine finish_obs() + +integer :: obs_num + +obs_num = get_num_obs(obs_seq) + +if (obs_num > 0) then + print * + if (debug) write(*, '(A, i0, A)') '> Ready to write ', obs_num, ' observations:' + + call write_obs_seq(obs_seq, file_out) + call destroy_obs(obs) + call destroy_obs_sequence(obs_seq) +else + string1 = 'No obs were converted.' + call error_handler(E_ERR, source, string1) +endif +call error_handler(E_MSG, source, 'Finished successfully.') + +end subroutine finish_obs + + +!----------------------------------------- +! Using the date string, set the DART time +function parse_time(datestr) result(obs_time) + +character(len=*), parameter :: routine = 'parse_time' + +character(len=*), intent(in) :: datestr +type(time_type) :: obs_time + +integer :: year, month, day, hour, minute, second + +!example: 2025-10-06T06:10:00Z + +read(datestr, '(i4, 5(1x,i2))', iostat=io) year, month, day, hour, minute, second +if (io /= 0) then + write(string1, *) 'Unable to read time variable. Error status was ', io + call error_handler(E_ERR, routine, string1, source) +endif + +obs_time = set_date(year, month, day, hour, minute, second) + +end function parse_time + + +!------------------------------------------------------------ +! Clear up memory +subroutine cleanup() + +if (allocated(surf)) deallocate(surf) +if (allocated(lon) ) deallocate(lon) +if (allocated(lat) ) deallocate(lat) +if (allocated(dat) ) deallocate(dat) +if (allocated(sst) ) deallocate(sst) +if (allocated(uvel)) deallocate(uvel) +if (allocated(vvel)) deallocate(vvel) + +end subroutine cleanup + +end program svp_to_obs diff --git a/observations/obs_converters/SVP/work/input.nml b/observations/obs_converters/SVP/work/input.nml new file mode 100644 index 000000000..75840b353 --- /dev/null +++ b/observations/obs_converters/SVP/work/input.nml @@ -0,0 +1,41 @@ +&preprocess_nml + overwrite_output = .true. + input_obs_qty_mod_file = '../../../../assimilation_code/modules/observations/DEFAULT_obs_kind_mod.F90' + output_obs_qty_mod_file = '../../../../assimilation_code/modules/observations/obs_kind_mod.f90' + input_obs_def_mod_file = '../../../forward_operators/DEFAULT_obs_def_mod.F90' + output_obs_def_mod_file = '../../../forward_operators/obs_def_mod.f90' + obs_type_files = '../../../forward_operators/obs_def_ocean_mod.f90' + quantity_files = '../../../../assimilation_code/modules/observations/ocean_quantities_mod.f90' + / + +&svp_to_obs_nml + file_in = '' + file_list = 'obs_files.txt' + file_out = 'obs_seq.svp' + debug = .true. + / + +&obs_sequence_nml + write_binary_obs_sequence = .false. + / + +&utilities_nml + module_details = .false. + / + +&obs_kind_nml + / + +&location_nml + / + +&obs_sequence_tool_nml + filename_seq = '' + filename_out = '' + print_only = .false. + gregorian_cal = .true. + first_obs_days = 150113 + first_obs_seconds = 0 + last_obs_days = 150114 + last_obs_seconds = 86399 + / diff --git a/observations/obs_converters/SVP/work/quickbuild.sh b/observations/obs_converters/SVP/work/quickbuild.sh new file mode 100755 index 000000000..97b3164ba --- /dev/null +++ b/observations/obs_converters/SVP/work/quickbuild.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +# DART software - Copyright UCAR. This open source software is provided +# by UCAR, "as is", without charge, subject to all terms of use at +# http://www.image.ucar.edu/DAReS/DART/DART_download + +main() { + +export DART=$(git rev-parse --show-toplevel) +source "$DART"/build_templates/buildconvfunctions.sh + +CONVERTER=SVP +LOCATION=threed_sphere + +programs=( +advance_time +svp_to_obs +obs_seq_to_netcdf +obs_sequence_tool +) + +# build arguments +arguments "$@" + +# clean the directory +\rm -f -- *.o *.mod Makefile .cppdefs + +# build and run preprocess before making any other DART executables +buildpreprocess + +# build +buildconv + + +# clean up +\rm -f -- *.o *.mod + +} + +main "$@"