mgharamti · mgharamti · Jan 7, 2026 · Dec 7, 2025 · Dec 7, 2025 · Dec 7, 2025
diff --git a/assimilation_code/modules/utilities/parse_args_mod.f90 b/assimilation_code/modules/utilities/parse_args_mod.f90
@@ -55,6 +55,7 @@ module parse_args_mod
 private
 
 public :: get_args_from_string,           &
+          get_csv_words_from_string,      &
           get_name_val_pairs_from_string, &
           get_next_arg                   
 
@@ -207,6 +208,178 @@ subroutine get_args_from_string(inline, argcount, argwords)
 
 end subroutine get_args_from_string
 
+!------------------------------------------------------------------------------
+! parse a single string up into delimeter-separated words
+
+subroutine get_csv_words_from_string(inline, delim, wordcount, words)
+
+ character(len=*), intent(in)  :: inline
+ character,        intent(in)  :: delim
+ integer,          intent(out) :: wordcount
+ character(len=*), intent(out) :: words(:)
+
+! in all these offsets, they are relative to 1, left hand char in string:
+!  firstoff is offset to next delimiter character starting a word
+!  thisoff  is offset to the current character
+!  finaloff is offset of the last non-delimiter character in the string
+! inword is a logical which toggles when inside a word or not
+! maxw are the max number of words, defined by what the caller passes in
+! maxl is the max length of any one word, again defined by the size of the
+!  incoming array.
+
+integer :: firstoff, finaloff, thisoff
+logical :: inword
+integer :: maxw, maxl
+integer :: wordlen, i
+
+character(len=len(inline)) :: wordline
+character(len=512) :: msgstring, msgstring2
+character :: endword, thisc
+character(len=*), parameter :: routine = 'get_csv_words_from_string'
+
+logical :: debug = .false. ! true to debug this routine, warning verbose
+
+
+! maxw is max number of 'words' allowed
+! maxl is the max length of any one 'word'
+
+maxw = size(words)
+maxl = len(words(1))
+
+words = ''
+wordcount = 0
+
+finaloff = len_trim(inline)
+if (finaloff <= 0) return
+
+wordline = inline
+
+firstoff = 1
+thisoff  = 1
+inword = .true.
+wordlen = 0
+endword = delim
+
+if (debug) print *, 'line = ', '"'//trim(wordline)//'"'
+
+NEXTCHAR: do
+   ! end of input?
+   if (thisoff > finaloff) then
+      ! if currently in a word, complete it
+      ! todo: if quoted string is last, strip final quote
+      if (inword) then
+         wordcount = wordcount + 1
+         if (wordcount > maxw) exit NEXTCHAR
+         wordlen = thisoff-firstoff-1
+if (debug) print *, 'thisoff, firstoff, wordlen = ', thisoff, firstoff, wordlen
+         if (wordlen > maxl) exit NEXTCHAR
+         words(wordcount) = wordline(firstoff:firstoff+wordlen)
+if (debug) print *, 'word ', wordcount, ' is ', '"'//wordline(firstoff:firstoff+wordlen)//'"'
+      endif
+      exit NEXTCHAR
+   endif
+
+   ! next character on line
+   thisc = wordline(thisoff:thisoff)
+
+if (debug) print *, 'thisoff, finaloff, inword, endword, thisc = ', thisoff, finaloff, &
+                     inword, '"'//endword//'"', ' ', '"'//thisc//'"'
+
+   ! this (escape by backslash) doesn't seem to be universially supported 
+   ! by CSV files but i can't see that it hurts.
+
+   ! escaped chars - backslash prevents interpretation of next char
+   if (thisc == '\') then
+      ! move the remainder of the string over, overwriting the \ and
+      ! skipping the next char.
+      do i=thisoff, finaloff-1
+         wordline(i:i) = wordline(i+1:i+1)
+      enddo
+      wordline(finaloff:finaloff) = ' '
+      finaloff = finaloff-1
+      thisoff = thisoff+1
+      cycle NEXTCHAR
+   endif
+
+   ! transition into a word?  this is slightly more complex than blank
+   ! separated words.  in a CSV file, the delimiters separate fields, so
+   ! the first one doesn't start with one, and the last field doesn't end
+   ! with one.  quotes can be used immediately after a delimiter to keep
+   ! field data together.  the next char after a closing quote should be
+   ! the field delimieter.
+
+   ! start of a delimiter-separated string.
+   ! unlike strings of blanks, you can't skip strings of consecutive delimiters
+   ! and the first and last fields aren't enclosed by delimiters.
+   if (.not. inword) then 
+      if (thisc == delim) then
+         inword = .true.
+         thisoff = thisoff+1  ! skip delimeter
+         firstoff = thisoff   ! first char of field
+         endword = thisc
+      else
+         write(msgstring, *) "error?  not in word, next char not delimiter"
+         call error_handler(E_ERR,routine,msgstring,source)
+      endif 
+      cycle NEXTCHAR
+   endif
+
+   ! transition out of a word?
+   ! also, if the first character of a word is a quote, the
+   ! word continues until the closing quote.
+   if (inword) then
+      ! if first char of string is a quote, skip it and mark it as
+      ! the new delimiter
+      if ((thisoff == firstoff) .and. &
+          (thisc == '"' .or. thisc == "'")) then
+         endword = thisc
+         thisoff = thisoff+1
+         firstoff = thisoff   ! reset start of field 
+         cycle NEXTCHAR
+      endif
+      ! if we come to a delimiter, check for quote and remove it
+      ! and reset the delimiter char
+      if (thisc == endword) then
+         inword = .false.
+         wordlen = thisoff-firstoff-1
+         if (thisc == '"' .or. thisc == "'") then
+            endword = delim   ! todo: necessary?
+            thisoff = thisoff+1  ! skip quote
+         endif
+         wordcount = wordcount + 1
+         if (wordcount > maxw) exit NEXTCHAR
+if (debug)  print *, 'thisoff, firstoff, wordlen = ', thisoff, firstoff, wordlen
+         if (wordlen > maxl) exit NEXTCHAR
+         words(wordcount) = wordline(firstoff:firstoff+wordlen)
+if (debug)  print *, 'word ', wordcount, ' is ', '"'//wordline(firstoff:firstoff+wordlen)//'"'
+         cycle NEXTCHAR
+      endif
+      thisoff = thisoff + 1  ! normal case, word contents OR end of word, skip delimiter
+      cycle NEXTCHAR
+   endif
+
+enddo NEXTCHAR
+
+if (wordcount > maxw) then
+   write(msgstring,*) 'more delimeter-separated words than max number allowed by calling code, ', maxw
+   call error_handler(E_ERR,routine,msgstring,source)
+endif
+
+if (wordlen > maxl) then
+   write(msgstring,*) 'one or more words longer than max length allowed by calling code, ', maxl
+   call error_handler(E_ERR,routine,msgstring,source)
+endif
+
+if (debug) then
+   print *, 'wordcount = ', wordcount
+   do i=1, wordcount
+      print *, 'word', i, ' is "'//trim(words(i))//'"'
+   enddo
+endif
+
+
+end subroutine get_csv_words_from_string
+
 !------------------------------------------------------------------------------
 ! parse a single string up into blank-separated name=value words
 ! and return an array of names and values, plus a flag indicating

diff --git a/assimilation_code/modules/utilities/read_csv_mod.f90 b/assimilation_code/modules/utilities/read_csv_mod.f90
@@ -32,7 +32,7 @@ module read_csv_mod
 use utilities_mod,  only : error_handler, E_ERR, find_textfile_dims, &
                            open_file, close_file, to_upper,          &
                            string_to_real, string_to_integer
-use parse_args_mod, only : get_args_from_string
+use parse_args_mod, only : get_csv_words_from_string
 
 implicit none
 private
@@ -124,7 +124,7 @@ subroutine csv_get_field_char(cf, varname, varvals, context)
       call error_handler(E_ERR, routine, string1, context)
    endif
 
-   call split_fields(line, cf%delim, nfields, entries)
+   call get_csv_words_from_string(line, cf%delim, nfields, entries)
 
    ! Parse the column entry. If it's _EMPTY_ then 
    ! treat it as empty string to make it MISSING 
@@ -213,98 +213,6 @@ integer function csv_get_nrows_from_file(fname, context) result(nrows)
 end function csv_get_nrows_from_file
 
 
-!---------------------------------------------------
-! Adapt get_args_from_string after adjusting delims
-subroutine split_fields(line, delim, nfields, fields)
-
-character(len=*), intent(in)  :: line
-character,        intent(in)  :: delim
-integer,          intent(out) :: nfields
-character(len=*), intent(out) :: fields(:)
-
-character(len=MAX_FIELDS_LEN) :: work
-
-! Clean the line then parse it 
-work = normalize_delims(line, delim)
-call get_args_from_string(work, nfields, fields)
-
-end subroutine split_fields
-
-
-!----------------------------------------------------------------------
-! Replace ',' and ';' with blanks to use above parsers. 
-! We also need to treat empty fields so that we don't
-! collapse with the spaces and cause any column drifts. 
-! This serves as a wrapper for 'get_args_from_string'
-! Example: 
-! A;B;;;;C;; --> A B _EMPTY_ _EMPTY_ _EMPTY_ C _EMPTY_ _EMPTY_
-function normalize_delims(line, delim) result(out_line)
-
-character(len=*), intent(in)  :: line
-character,        intent(in)  :: delim
-
-character(len=MAX_FIELDS_LEN) :: out_line
-integer                       :: i, j, L, k, lee
-logical                       :: prev_is_delim
-
-! Start as with a delimiter 
-out_line      = ' '
-prev_is_delim = .true.
-
-j = 1
-L = len_trim(line)
-
-lee = len(EMPTY_ENTRY)
-
-! Go over the line 1 character at a time
-do i = 1, L
-   if (line(i:i) == char(13)) cycle
-   if (line(i:i) == delim) then
-      ! Found a delim
-      if (prev_is_delim) then
-         ! insert placeholder + 1 space
-         out_line(j:j+lee-1) = EMPTY_ENTRY
-         j = j+lee
-         out_line(j:j) = ' '
-
-         j = j+1
-      else
-         ! normal delimiter
-         out_line(j:j) = ' '
-         j = j+1
-      endif
-      prev_is_delim = .true.
-      if (j > MAX_FIELDS_LEN - 64) exit ! prevent overflow; 64 is a small cushion
-   else
-      out_line(j:j) = line(i:i) 
-
-      j = j+1
-      prev_is_delim = .false.
-      if (j > MAX_FIELDS_LEN - 64) exit
-   endif
-enddo
-
-! Trailing empty field: line ends with a delimiter (or several)
-if (L > 0 .and. line(L:L) == delim) then
-   out_line(j:j+lee-1) = EMPTY_ENTRY
-   j = j + lee
-endif
-
-! Trim right spaces
-k = j - 1
-do while (k >= 1 .and. out_line(k:k) == ' ')
-   k = k - 1
-enddo
-
-if (k < 1) then
-   out_line = ''
-else
-   out_line = out_line(1:k)
-endif
-
-end function normalize_delims
-
-
 !---------------------------------------------------
 ! Find field index using cached header in csv_file_type.
 integer function csv_find_field(cf, key) result(idx)
@@ -460,10 +368,11 @@ end function csv_get_nrows
 ! Open a CSV handle: cache header/dims.
 ! By doing so, we won't need to open the file 
 ! every time to read header or get dimensions. 
-subroutine csv_open(fname, cf, context)
+subroutine csv_open(fname, cf, forced_delim, context)
 
 character(len=*),    intent(in)  :: fname
 type(csv_file_type), intent(out) :: cf
+character(len=*),    intent(in), optional :: forced_delim
 character(len=*),    intent(in), optional :: context
 
 character(len=*), parameter :: routine = 'csv_open'
@@ -496,9 +405,9 @@ subroutine csv_open(fname, cf, context)
 endif
 
 ! Can also enforce a specific delim as a second argument
-cf%delim = detect_delim(line)
+cf%delim = detect_delim(line, forced_delim)
 
-call split_fields(line, cf%delim, cf%ncols, cf%fields)
+call get_csv_words_from_string(line, cf%delim, cf%ncols, cf%fields)
 
 cf%is_open = .true.