ltelab
diff --git a/Diff for: ‎disdrodb/l0/check_readers.py
+109-131 b/Diff for: ‎disdrodb/l0/check_readers.py
+109-131
diff --git a/Diff for: ‎disdrodb/l0/l0_processing.py
+4-5 b/Diff for: ‎disdrodb/l0/l0_processing.py
+4-5
diff --git a/Diff for: ‎disdrodb/tests/data/check_readers/DISDRODB/Raw/UK/DIVEN/data/CAIRNGORM/sample_ncas-disdrometer-11_cairngorm_20170210_precipitation_v1.0.nc
95.7 KB b/Diff for: ‎disdrodb/tests/data/check_readers/DISDRODB/Raw/UK/DIVEN/data/CAIRNGORM/sample_ncas-disdrometer-11_cairngorm_20170210_precipitation_v1.0.nc
95.7 KB
diff --git a/Diff for: ‎disdrodb/tests/data/check_readers/DISDRODB/Raw/UK/DIVEN/ground_truth/CAIRNGORM/L0B.DIVEN.CAIRNGORM.s20170210000000.e20170210000400.V0.nc
85.7 KB b/Diff for: ‎disdrodb/tests/data/check_readers/DISDRODB/Raw/UK/DIVEN/ground_truth/CAIRNGORM/L0B.DIVEN.CAIRNGORM.s20170210000000.e20170210000400.V0.nc
85.7 KB
diff --git a/Diff for: ‎disdrodb/tests/data/check_readers/DISDRODB/Raw/UK/DIVEN/issue/CAIRNGORM.yml
+8 b/Diff for: ‎disdrodb/tests/data/check_readers/DISDRODB/Raw/UK/DIVEN/issue/CAIRNGORM.yml
+8
diff --git a/Diff for: ‎disdrodb/tests/data/check_readers/DISDRODB/Raw/UK/DIVEN/metadata/CAIRNGORM.yml
+57 b/Diff for: ‎disdrodb/tests/data/check_readers/DISDRODB/Raw/UK/DIVEN/metadata/CAIRNGORM.yml
+57
@@ -22,77 +22,40 @@
 import shutil
 
 import pandas as pd
+import xarray as xr
 
 from disdrodb import __root_path__
-from disdrodb.api.path import define_metadata_dir, get_disdrodb_path
-from disdrodb.l0.l0_reader import get_station_reader_function
+from disdrodb.api.io import available_stations
+from disdrodb.api.path import define_campaign_dir, define_station_dir
+from disdrodb.l0.l0_processing import run_l0a_station
+from disdrodb.metadata import read_station_metadata
 from disdrodb.utils.directories import list_files
 
 TEST_BASE_DIR = os.path.join(__root_path__, "disdrodb", "tests", "data", "check_readers", "DISDRODB")
 
 
-def _get_list_test_data_sources() -> list:
-    """Get list of test data sources.
-
-    Returns
-    -------
-    list
-        List of test data sources.
-    """
-
-    data_sources = os.listdir(os.path.join(TEST_BASE_DIR, "Raw"))
-    return data_sources
-
-
-def _get_list_test_campaigns(data_source: str) -> list:
-    """Get list of test campaigns for a given data source.
-
-    Parameters
-    ----------
-    data_source : str
-        Data source.
-
-    Returns
-    -------
-    list
-        List of test campaigns.
-
-    """
-    campaign_names = os.listdir(os.path.join(TEST_BASE_DIR, "Raw", data_source))
-    return campaign_names
-
-
-def _get_list_test_stations(data_source: str, campaign_name: str) -> list:
-    """Get list of test stations for a given data source and campaign.
+def _check_identical_netcdf_files(file1: str, file2: str) -> bool:
+    """Check if two L0B netCDF files are identical.
 
     Parameters
     ----------
-    data_source : str
-        Data source.
-
-    campaign_name : str
-        Name of the campaign.
-
-    Returns
-    -------
-    list
-        List of test stations.
+    file1 : str
+        Path to the first file.
 
+    file2 : str
+        Path to the second file.
     """
-    metadata_dir = define_metadata_dir(
-        product="RAW",
-        base_dir=TEST_BASE_DIR,
-        data_source=data_source,
-        campaign_name=campaign_name,
-        check_exists=False,
-    )
-    filepaths = list_files(metadata_dir, glob_pattern="*.yml", recursive=False)
-    list_station_names = [os.path.splitext(os.path.basename(i))[0] for i in filepaths]
+    # Open files
+    ds1 = xr.open_dataset(file1)
+    ds2 = xr.open_dataset(file2)
+    # Remove attributes that depends on processing time
+    ds1.attrs.pop("disdrodb_processing_date", None)
+    ds2.attrs.pop("disdrodb_processing_date", None)
+    # Assert equality
+    xr.testing.assert_identical(ds1, ds2)
 
-    return list_station_names
 
-
-def _is_parquet_files_identical(file1: str, file2: str) -> bool:
+def _check_identical_parquet_files(file1: str, file2: str) -> bool:
     """Check if two parquet files are identical.
 
     Parameters
@@ -102,64 +65,79 @@ def _is_parquet_files_identical(file1: str, file2: str) -> bool:
 
     file2 : str
         Path to the second file.
-
-    Returns
-    -------
-    bool
-        True if the two files are identical, False otherwise.
-
     """
     df1 = pd.read_parquet(file1)
     df2 = pd.read_parquet(file2)
-    return df1.equals(df2)
+    if not df1.equals(df2):
+        raise ValueError("The two Parquet files differ.")
 
 
-def _run_reader_on_test_data(data_source: str, campaign_name: str) -> None:
-    """Run reader over the test data sample.
+def _check_station_reader_results(
+    base_dir,
+    data_source,
+    campaign_name,
+    station_name,
+):
+    raw_dir = define_campaign_dir(
+        base_dir=TEST_BASE_DIR,
+        product="RAW",
+        data_source=data_source,
+        campaign_name=campaign_name,
+    )
 
-    Parameters
-    ----------
-    data_source : str
-        Data source.
-    campaign_name : str
-        Campaign name.
-    """
-    station_names = _get_list_test_stations(data_source=data_source, campaign_name=campaign_name)
-    for station_name in station_names:
-        reader = get_station_reader_function(
-            base_dir=TEST_BASE_DIR,
-            data_source=data_source,
-            campaign_name=campaign_name,
-            station_name=station_name,
-        )
-
-        # Define campaign_name raw_dir and process_dir
-        raw_dir = get_disdrodb_path(
-            base_dir=TEST_BASE_DIR,
-            product="RAW",
-            data_source=data_source,
-            campaign_name=campaign_name,
-        )
-
-        processed_dir = get_disdrodb_path(
-            base_dir=TEST_BASE_DIR,
-            product="L0A",
-            data_source=data_source,
-            campaign_name=campaign_name,
-            check_exists=False,
-        )
-        # Call the reader
-        reader(
-            raw_dir=raw_dir,
-            processed_dir=processed_dir,
-            station_name=station_name,
-            force=True,
-            verbose=False,
-            debugging_mode=False,
-            parallel=False,
-        )
-
-        return processed_dir
+    run_l0a_station(
+        base_dir=TEST_BASE_DIR,
+        data_source=data_source,
+        campaign_name=campaign_name,
+        station_name=station_name,
+        force=True,
+        verbose=False,
+        debugging_mode=False,
+        parallel=False,
+    )
+
+    metadata = read_station_metadata(
+        base_dir=TEST_BASE_DIR,
+        product="L0A",
+        data_source=data_source,
+        campaign_name=campaign_name,
+        station_name=station_name,
+    )
+    raw_data_format = metadata["raw_data_format"]
+    if raw_data_format == "netcdf":
+        glob_pattern = "*.nc"
+        check_identical_files = _check_identical_netcdf_files
+        product = "L0B"
+    else:  # raw_data_format == "txt"
+        glob_pattern = "*.parquet"
+        check_identical_files = _check_identical_parquet_files
+        product = "L0A"
+
+    ground_truth_station_dir = os.path.join(raw_dir, "ground_truth", station_name)
+    processed_station_dir = define_station_dir(
+        base_dir=TEST_BASE_DIR,
+        product=product,
+        data_source=data_source,
+        campaign_name=campaign_name,
+        station_name=station_name,
+    )
+
+    # Retrieve files
+    ground_truth_files = sorted(list_files(ground_truth_station_dir, glob_pattern=glob_pattern, recursive=True))
+    processed_files = sorted(list_files(processed_station_dir, glob_pattern=glob_pattern, recursive=True))
+
+    # Check same number of files
+    n_groud_truth = len(ground_truth_files)
+    n_processed = len(processed_files)
+    if n_groud_truth != n_processed:
+        raise ValueError(f"{n_groud_truth} ground truth files but only {n_processed} are prfoduced.")
+
+    # Compare equality of files
+    for ground_truth_filepath, processed_filepath in zip(ground_truth_files, processed_files):
+        try:
+            check_identical_files(ground_truth_filepath, processed_filepath)
+        except Exception:
+            raise ValueError(f"Reader validation has failed for '{data_source}' '{campaign_name}' '{station_name}'")
 
 
 def check_all_readers() -> None:
@@ -171,28 +149,28 @@ def check_all_readers() -> None:
         If the reader validation has failed.
     """
 
-    for data_source in _get_list_test_data_sources():
-        for campaign_name in _get_list_test_campaigns(data_source):
-            process_dir = _run_reader_on_test_data(data_source, campaign_name)
-            ground_truth_dir = os.path.join(TEST_BASE_DIR, "Raw", data_source, campaign_name, "ground_truth")
-            processed_product_dir = os.path.join(process_dir, "L0A")
-
-            glob_pattern = os.path.join("*", "*.parquet")
-            ground_truth_files = list_files(ground_truth_dir, glob_pattern=glob_pattern, recursive=False)
-            processed_files = list_files(processed_product_dir, glob_pattern=glob_pattern, recursive=False)
-
-            for ground_truth_filepath, processed_file_filepath in zip(ground_truth_files, processed_files):
-                station_name = os.path.basename(os.path.dirname(ground_truth_filepath))
-                is_correct = _is_parquet_files_identical(ground_truth_filepath, processed_file_filepath)
-                if not is_correct:
-                    raise Exception(
-                        f"Reader validation has failed for data_source '{data_source}', campaign_name '{campaign_name}'"
-                        f" and station_name '{station_name}'"
-                    )
+    list_stations_info = available_stations(
+        product="RAW",
+        data_sources=None,
+        campaign_names=None,
+        return_tuple=True,
+        base_dir=TEST_BASE_DIR,
+    )
 
-    # Remove Processed directory if exists
-    if os.path.exists(os.path.join(TEST_BASE_DIR, "Processed")):
+    check_failed = False
+    for data_source, campaign_name, station_name in list_stations_info:
         try:
-            shutil.rmtree(os.path.join(TEST_BASE_DIR, "Processed"))
+            _check_station_reader_results(
+                base_dir=TEST_BASE_DIR,
+                data_source=data_source,
+                campaign_name=campaign_name,
+                station_name=station_name,
+            )
         except Exception:
-            pass
+            check_failed = True
+        if check_failed:
+            break
+
+    # Remove Processed directory if exists
+    if os.path.exists(os.path.join(TEST_BASE_DIR, "Processed")):
+        shutil.rmtree(os.path.join(TEST_BASE_DIR, "Processed"))
@@ -38,6 +38,7 @@
 )
 from disdrodb.api.info import infer_path_info_dict
 from disdrodb.api.path import (
+    define_campaign_dir,
     define_l0a_filepath,
     define_l0b_filepath,
     define_l0b_station_dir,
@@ -911,20 +912,18 @@ def run_l0a_station(
         station_name=station_name,
     )
     # Define campaign raw_dir and process_dir
-    raw_dir = get_disdrodb_path(
+    raw_dir = define_campaign_dir(
         base_dir=base_dir,
         product="RAW",
         data_source=data_source,
         campaign_name=campaign_name,
     )
-    processed_dir = get_disdrodb_path(
+    processed_dir = define_campaign_dir(
         base_dir=base_dir,
-        product="L0A",
+        product="L0A",  # also works for raw netCDFs
         data_source=data_source,
         campaign_name=campaign_name,
-        check_exists=False,
     )
-
     # Run L0A processing
     # --> The reader call the run_l0a within the custom defined reader function
     # --> For the special case of raw netCDF data, it calls the run_l0b_from_nc function
 
@@ -0,0 +1,8 @@
+# This file is used to store dates to drop by the reader, the time format used is the isoformat (YYYY-mm-dd HH:MM:SS).
+# timestamp: list of timestamps
+# time_period: list of list ranges of dates
+# Example:
+# timestamp: ['2018-12-07 14:15','2018-12-07 14:17','2018-12-07 14:19', '2018-12-07 14:25']
+# time_period: [['2018-08-01 12:00:00', '2018-08-01 14:00:00'],
+#               ['2018-08-01 15:44:30', '2018-08-01 15:59:31'],
+#               ['2018-08-02 12:44:30', '2018-08-02 12:59:31']]
@@ -0,0 +1,57 @@
+data_source: UK
+campaign_name: DIVEN
+station_name: CAIRNGORM
+sensor_name: Thies_LPM
+reader: UK/DIVEN
+raw_data_format: netcdf
+platform_type: fixed
+source: NCAS Laser Precipitation Monitor
+source_convention: ''
+source_processing_date: ''
+title: cairngorm
+description: 1 minute precipitation characteristics output for a single day from a
+  DiVeN disdrometer at Cairngorm
+project_name: Disdrometer Verification Network (DiVeN)
+keywords: ''
+summary: ''
+history: ''
+comment: ''
+station_id: '11'
+location: Cairngorm
+country: United Kingdom
+continent: Europe
+latitude: 57.0063
+longitude: -3.6628
+altitude: 781
+deployment_status: ''
+deployment mode: ''
+platform_protection: ''
+platform_orientation: ''
+sensor_long_name: Thies_LPM
+sensor_manufacturer: ''
+sensor_wavelength: 2143 mv mm-2
+sensor_serial_number: ''
+firmware_iop: ''
+firmware_dsp: ''
+firmware_version: ''
+sensor_beam_length: ''
+sensor_beam_width: ''
+sensor_nominal_width: ''
+measurement_interval: ''
+calibration_sensitivity: ''
+calibration_certification_date: ''
+calibration_certification_url: ''
+contributors: Ryan R. Neely III
+authors: Ben Pickering
+authors_url: ''
+contact: Ben Pickering
+contact_information: [email protected]
+acknowledgement: ''
+references: ''
+documentation: ''
+website: ''
+institution: National Centre for Atmospheric Science (NCAS)
+source_repository: https://orcid.org/0000-0002-8474-9005
+license: ''
+doi: ''
+disdrodb_data_url: ''