66# Standard library imports
77from datetime import datetime
88from pathlib import Path
9+ from typing import Callable , Iterable , List
910
1011# Third-party imports
1112import fsspec
@@ -96,15 +97,47 @@ def apply_user_input_ocean_model_specific_changes(
9697 return ds
9798
9899
99- def make_ciofs_kerchunk (start : str , end : str , name : str ) -> dict :
100+ def find_json_files_in_date_range (
101+ fs2 ,
102+ make_glob_from_year : Callable [[str ], str ],
103+ start : datetime ,
104+ end : datetime ,
105+ filename_date_format : str ,
106+ ) -> List [str ]:
107+ """Find JSON files in a date range and return their paths."""
108+
109+ # only glob start and end year files, order isn't important
110+ if abs (start .year - end .year ) > 1 :
111+ raise ValueError (
112+ f"Start ({ start .year } ) and end ({ end .year } ) "
113+ "dates must be at most 1 year apart."
114+ )
115+ start_year = start .strftime ("%Y" )
116+ end_year = end .strftime ("%Y" )
117+ json_list = fs2 .glob (make_glob_from_year (start_year ))
118+ if end_year != start_year :
119+ json_list += fs2 .glob (make_glob_from_year (end_year ))
120+
121+ def filter_paths (start : datetime , end : datetime , paths : Iterable [str ]) -> List [str ]:
122+ if end < start :
123+ # if going backward in time, swap start and end
124+ end , start = start , end
125+ return [
126+ pth
127+ for pth in paths
128+ if start <= datetime .strptime (Path (pth ).stem , filename_date_format ) <= end
129+ ]
130+
131+ return filter_paths (start , end , json_list )
132+
133+
134+ def make_ciofs_kerchunk (start : datetime , end : datetime , name : str ) -> dict :
100135 """_summary_
101136
102137 Parameters
103138 ----------
104- start, end : str
105- Should be something like "2004_0001" for YYYY_0DDD where DDD is dayofyear
106- to match the files in the directory, which are by year and day of year
107- ("ciofs_fresh" or "ciofs") or "YYYY-MM-DD" for "aws_ciofs"
139+ start, end : datetime
140+ Start and end time of the simulation.
108141
109142 Returns
110143 -------
@@ -123,50 +156,21 @@ def make_ciofs_kerchunk(start: str, end: str, name: str) -> dict:
123156
124157 fs2 = fsspec .filesystem ("" ) # local file system to save final jsons to
125158
126- if name in ["CIOFS" , "CIOFSFRESH" ]:
127-
159+ if name == "CIOFSOP" :
128160 # base for matching
129161 def base_str (a_time : str ) -> str :
130- return f"{ output_dir_single_files } /{ a_time } _*.json"
131-
132- date_format = "%Y_0%j"
162+ return f"{ output_dir_single_files } /ciofs_{ a_time } -*.json"
133163
134- elif name == "CIOFSOP" :
164+ date_format = "ciofs_%Y-%m-%d"
135165
166+ else : # name is "CIOFS" or "CIOFSFRESH"
136167 # base for matching
137168 def base_str (a_time : str ) -> str :
138- return f"{ output_dir_single_files } /ciofs_{ a_time } -*.json"
139-
140- date_format = "ciofs_%Y-%m-%d"
141- else :
142- raise ValueError (f"Name { name } not recognized" )
169+ return f"{ output_dir_single_files } /{ a_time } _*.json"
143170
144- # only glob start and end year files, order isn't important
145- json_list = fs2 .glob (base_str (start [:4 ]))
146- if end [:4 ] != start [:4 ]:
147- json_list += fs2 .glob (base_str (end [:4 ]))
148-
149- # forward in time
150- if end > start :
151- json_list = [
152- j
153- for j in json_list
154- if datetime .strptime (Path (j ).stem , date_format ).isoformat () >= start
155- and datetime .strptime (Path (j ).stem , date_format ).isoformat () <= end
156- ]
157- # backward in time
158- elif end < start :
159- json_list = [
160- j
161- for j in json_list
162- if datetime .strptime (Path (j ).stem , date_format ).isoformat () <= start
163- and datetime .strptime (Path (j ).stem , date_format ).isoformat () >= end
164- ]
171+ date_format = "%Y_0%j"
165172
166- if json_list == []:
167- raise ValueError (
168- f"No files found in { output_dir_single_files } for { start } to { end } "
169- )
173+ json_list = find_json_files_in_date_range (fs2 , base_str , start , end , date_format )
170174
171175 # Multi-file JSONs
172176 # This code uses the output generated above to create a single ensemble dataset,
@@ -280,13 +284,13 @@ def postprocess(out: dict) -> dict:
280284 return out
281285
282286
283- def make_nwgoa_kerchunk (start : str , end : str , name : str = "NWGOA" ) -> dict :
287+ def make_nwgoa_kerchunk (start : datetime , end : datetime , name : str = "NWGOA" ) -> dict :
284288 """_summary_
285289
286290 Parameters
287291 ----------
288- start, end : str
289- Should be something like "1999-01-02" for YYYY-MM-DD
292+ start, end : datetime
293+ Start and end time of the simulation.
290294
291295 Returns
292296 -------
@@ -308,33 +312,7 @@ def base_str(a_time: str) -> str:
308312
309313 date_format = "nwgoa_%Y-%m-%d"
310314
311- # only glob start and end year files, order isn't important
312- json_list = fs2 .glob (base_str (start [:4 ]))
313-
314- if end [:4 ] != start [:4 ]:
315- json_list += fs2 .glob (base_str (end [:4 ]))
316-
317- # forward in time
318- if end > start :
319- json_list = [
320- j
321- for j in json_list
322- if datetime .strptime (Path (j ).stem , date_format ).isoformat () >= start
323- and datetime .strptime (Path (j ).stem , date_format ).isoformat () <= end
324- ]
325- # backward in time
326- elif end < start :
327- json_list = [
328- j
329- for j in json_list
330- if datetime .strptime (Path (j ).stem , date_format ).isoformat () <= start
331- and datetime .strptime (Path (j ).stem , date_format ).isoformat () >= end
332- ]
333-
334- if json_list == []:
335- raise ValueError (
336- f"No files found in { output_dir_single_files } for { start } to { end } "
337- )
315+ json_list = find_json_files_in_date_range (fs2 , base_str , start , end , date_format )
338316
339317 # account for double compression
340318 # Look at individual variables in the files to see what needs to be changed with
0 commit comments