From f079285936c6c9adea3550c8ae701701d89764a3 Mon Sep 17 00:00:00 2001 From: Joel Heaps Date: Sat, 21 Jun 2025 16:30:33 -0500 Subject: [PATCH 1/5] Implement keyword arguments for met_db_load.main and update tests accordingly Includes a fix for an erroneous reference to 'self.logger' when printing an exception. --- METdbLoad/test/test_met_db_load.py | 165 +++++++++++++---------------- METdbLoad/test/test_run_sql.py | 20 ++-- METdbLoad/ush/met_db_load.py | 63 +++++------ 3 files changed, 115 insertions(+), 133 deletions(-) diff --git a/METdbLoad/test/test_met_db_load.py b/METdbLoad/test/test_met_db_load.py index d7088d6f..f78569e2 100644 --- a/METdbLoad/test/test_met_db_load.py +++ b/METdbLoad/test/test_met_db_load.py @@ -137,20 +137,17 @@ def test_met_db_table_counts( "mtd_header_db_check": "true", } - test_args = dict_to_args( - { - "xmlfile": str( - get_xml_test_file( - tmp_path, met_data_dir, met_tool, load_flags=load_flags - ) - ), - "index": "true", - "tmpdir": [str(tmp_path)], - "loglevel": None, - } - ) - - load_main(test_args) + test_args = { + "xmlfile": str( + get_xml_test_file( + tmp_path, met_data_dir, met_tool, load_flags=load_flags + ) + ), + "index": "true", + "tmpdir": [str(tmp_path)], + } + + load_main(**test_args) for table, expected_count in expected_counts.items(): assert_count_rows(testRunSql.cur, table, expected_count) @@ -206,22 +203,19 @@ def test_met_db_table_dups( "mtd_header_db_check": "true", "force_dup_file": "false", } - test_args = dict_to_args( - { - "xmlfile": str( - get_xml_test_file( - tmp_path, met_data_dir, met_tool, load_flags=load_flags - ) - ), - "index": "true", - "tmpdir": [str(tmp_path)], - "loglevel": None, - } - ) - - load_main(test_args) + test_args = { + "xmlfile": str( + get_xml_test_file( + tmp_path, met_data_dir, met_tool, load_flags=load_flags + ) + ), + "index": "true", + "tmpdir": [str(tmp_path)], + } + + load_main(**test_args) # load again to check duplicates aren't loaded in db - load_main(test_args) + load_main(**test_args) for table, expected_count in expected_counts.items(): assert_count_rows(testRunSql.cur, table, expected_count) @@ -277,22 +271,19 @@ def test_met_db_table_dups_allowed( "mtd_header_db_check": "true", "force_dup_file": "true", } - test_args = dict_to_args( - { - "xmlfile": str( - get_xml_test_file( - tmp_path, met_data_dir, met_tool, load_flags=load_flags - ) - ), - "index": "true", - "tmpdir": [str(tmp_path)], - "loglevel": None, - } - ) - - load_main(test_args) + test_args = { + "xmlfile": str( + get_xml_test_file( + tmp_path, met_data_dir, met_tool, load_flags=load_flags + ) + ), + "index": "true", + "tmpdir": [str(tmp_path)], + } + + load_main(**test_args) # load again to add duplicates - load_main(test_args) + load_main(**test_args) for table, expected_count in expected_counts.items(): assert_count_rows(testRunSql.cur, table, expected_count * 2) @@ -304,21 +295,18 @@ def test_met_db_indexes( tmp_path, ): # set up to "apply_indexes" - test_args = dict_to_args( - { - "xmlfile": str( - get_xml_test_file( - tmp_path, - POINT_STAT_DATA_DIR, - "point_stat", - {"apply_indexes": "true"}, - ) - ), - "index": "false", - "tmpdir": [str(tmp_path)], - "loglevel": None, - } - ) + test_args = { + "xmlfile": str( + get_xml_test_file( + tmp_path, + POINT_STAT_DATA_DIR, + "point_stat", + {"apply_indexes": "true"}, + ) + ), + "index": False, + "tmpdir": [str(tmp_path)], + } # file_id and stat_header are already indexed idx_cnt = testRunSql.cur.execute("SHOW INDEX FROM line_data_fho") @@ -326,7 +314,7 @@ def test_met_db_indexes( # sys.exit is called after processing indexes with pytest.raises(SystemExit): - load_main(test_args) + load_main(**test_args) # check extra indicies have been created idx_cnt = testRunSql.cur.execute("SHOW INDEX FROM line_data_fho") @@ -338,7 +326,7 @@ def test_met_db_indexes( # check sys.exit called on error with pytest.raises(SystemExit): with patch.object(RunSql, "apply_indexes", side_effect=KeyError): - load_main(test_args) + load_main(**test_args) @pytest.mark.parametrize( @@ -395,20 +383,17 @@ def test_local_in_file( ): """check we get the same result when local_file is on or off""" - test_args = dict_to_args( - { - "xmlfile": str( - get_xml_test_file( - tmp_path, met_data_dir, met_tool, local_infile=local_infile - ) - ), - "index": "false", - "tmpdir": [str(tmp_path)], - "loglevel": None, - } - ) - - load_main(test_args) + test_args = { + "xmlfile": str( + get_xml_test_file( + tmp_path, met_data_dir, met_tool, local_infile=local_infile + ) + ), + "index": False, + "tmpdir": [str(tmp_path)], + } + + load_main(**test_args) for table, expected_count in expected_counts.items(): assert_count_rows(testRunSql.cur, table, expected_count) @@ -434,23 +419,19 @@ def test_empty_files(tmp_path): "VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE\n" ) - test_args = dict_to_args( - { - "xmlfile": str( - get_xml_test_file( - tmp_path, - met_data_dir, - "mtd", - ) - ), - "index": "false", - "tmpdir": [str(tmp_path)], - "loglevel": None, - } - ) - - load_main(test_args) + test_args = { + "xmlfile": str( + get_xml_test_file( + tmp_path, + met_data_dir, + "mtd", + ) + ), + "index": False, + "tmpdir": [str(tmp_path)] + } + load_main(**test_args) def test_print_version(): mock_logger = MagicMock() @@ -459,7 +440,7 @@ def test_print_version(): assert mock_logger.info.call_args[0][0].startswith("METdbload Version:") with pytest.raises(SystemExit): - with patch("os.path.dirname", side_effect=TypeError): + with patch("METdbLoad.ush.met_db_load.PACKAGE_NAME", "metdataio_bogus_package_name"): print_version(mock_logger) assert mock_logger.error.call_count == 2 diff --git a/METdbLoad/test/test_run_sql.py b/METdbLoad/test/test_run_sql.py index 0b985d29..981ddf5d 100644 --- a/METdbLoad/test/test_run_sql.py +++ b/METdbLoad/test/test_run_sql.py @@ -14,17 +14,15 @@ def populate_some_data( tmp_path, met_data_dir=GRID_STAT_DATA_DIR, met_tool="grid_stat", load_flags={} ): - test_args = dict_to_args( - { - "xmlfile": str( - get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags) - ), - "index": "true", - "tmpdir": [str(tmp_path)], - "loglevel": "DEBUG" - } - ) - load_main(test_args) + test_args = { + "xmlfile": str( + get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags) + ), + "index": "true", + "tmpdir": [str(tmp_path)], + "loglevel": "DEBUG" + } + load_main(**test_args) def test_get_file_name(tmp_path, emptyDB, testRunSql, mock_logger): diff --git a/METdbLoad/ush/met_db_load.py b/METdbLoad/ush/met_db_load.py index 5524226d..424689ce 100644 --- a/METdbLoad/ush/met_db_load.py +++ b/METdbLoad/ush/met_db_load.py @@ -22,6 +22,7 @@ import time from datetime import datetime from datetime import timedelta +from importlib.metadata import version, PackageNotFoundError import sys import os import getpass @@ -41,32 +42,31 @@ from METreformat.util import get_common_logger -def main(args): +DEFAULT_TMP_DIR = [os.getenv('HOME')] +PACKAGE_NAME = "metdataio" # Used for version lookup + + +def main(xmlfile, index, tmpdir=DEFAULT_TMP_DIR, loglevel=None): """ Main program to load files into the METdataio/METviewer database Returns: N/A """ try: - # use the current date/time for logger begin_time = str(datetime.now()) - # setup a logger for this module - cli_loglevel = False - if args.loglevel: - loglevel = args.loglevel - cli_loglevel = True - else: - loglevel = DEFAULT_LOGLEVEL - # Get the common logger + # Setup a logger for this module + loglevel_provided = True if loglevel else False + loglevel = loglevel or DEFAULT_LOGLEVEL # Use DEFAULT_LOGLEVEL if loglevel is 'None' + logger = get_common_logger(loglevel, 'stdout') - if cli_loglevel: - logger.info(f"Loglevel set to {loglevel} from command line.") - else: + if not loglevel_provided: logger.info( - f"Loglevel not supplied. Setting to default: {loglevel}. This may be overwritten by XML loadfile.") + "Loglevel not provided. Using default loglevel of %s. (This may be overridden by XML loadfile.)", loglevel) + else: + logger.info("Loglevel set to %s.", loglevel) # Print the METdbload version from the docs folder print_version(logger) @@ -86,10 +86,10 @@ def main(args): # Read the XML file # try: - logger.debug("XML filename is %s", args.xmlfile) + logger.debug("XML filename is %s", xmlfile) # instantiate a load_spec XML file - xml_loadfile = XmlLoadFile(args.xmlfile, logger=logger) + xml_loadfile = XmlLoadFile(xmlfile, logger=logger) # read in the XML file and get the information out of its tags xml_loadfile.read_xml() @@ -103,7 +103,7 @@ def main(args): # Verify the tmp file # try: - tmp_dir = args.tmpdir[0] + tmp_dir = tmpdir[0] if not os.path.isdir(tmp_dir): logger.error( "*** Error occurred in Main accessing tmp dir %s ***", tmp_dir) @@ -116,13 +116,13 @@ def main(args): # If XML tag verbose is set to True, change logger to debug level unless already # supplied via the cli. - if xml_loadfile.flags["verbose"] and not cli_loglevel: + if xml_loadfile.flags["verbose"] and not loglevel == DEFAULT_LOGLEVEL: logger.setLevel("DEBUG") # # If argument -index is used, only process the index # - if args.index and xml_loadfile.flags["apply_indexes"]: + if index and xml_loadfile.flags["apply_indexes"]: try: if xml_loadfile.connection['db_management_system'] in CN.RELATIONAL: sql_run = RunSql() @@ -360,7 +360,8 @@ def main(args): logger.info("--- *** --- End METdbLoad --- *** ---") except (RuntimeError, TypeError, NameError, KeyError, AttributeError): - self.logger.error( + logger = get_common_logger(DEFAULT_LOGLEVEL, 'stdout') + logger.error( "*** %s occurred in main function of met_db_load ***", sys.exc_info()[0]) sys.exit("*** Error loading data") @@ -371,16 +372,12 @@ def print_version(logger): N/A """ try: - base_dir = os.path.dirname(os.path.realpath(__file__)) - version_file = "{}/../../docs/version".format(base_dir) - file = open(version_file, mode='r') - code_version = str.strip(file.read()) - logger.info("METdbload Version: %s", code_version) + logger.info("METdbload Version: %s", version(PACKAGE_NAME)) - except (RuntimeError, TypeError, NameError, KeyError): + except (PackageNotFoundError): logger.error("*** %s occurred in print_version ***", sys.exc_info()[0]) logger.error( - "*** %s occurred in Main printing version ***", sys.exc_info()[0]) + "*** %s occurred in Main printing version. %s package was not found. ***", sys.exc_info()[0], PACKAGE_NAME) sys.exit("*** Error in print version") @@ -440,12 +437,11 @@ def parse_args(): try: parser = argparse.ArgumentParser() # Allow user to choose dir for tmp files - default to user home - tmp_dir = [os.getenv('HOME')] parser.add_argument( "xmlfile", help="Please provide required xml load_spec filename") parser.add_argument("-index", action="store_true", help="Only process index, do not load data") - parser.add_argument("tmpdir", nargs='*', default=tmp_dir, + parser.add_argument("tmpdir", nargs='*', default=DEFAULT_TMP_DIR, help="Optional - when different directory wanted for tmp file") parser.add_argument("--loglevel", default=None, type=str, choices={"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}, help="Optional - specify log level. One of: DEBUG, INFO, WARNING, ERROR, CRITICAL.") @@ -457,4 +453,11 @@ def parse_args(): if __name__ == '__main__': - main(parse_args()) + args = parse_args() + + main( + xmlfile=args.xmlfile, + index=args.index, + tmpdir=args.tmpdir, + loglevel=args.loglevel, + ) From 0c6744138a08516addd8358fd87285c03995eb8d Mon Sep 17 00:00:00 2001 From: Joel Heaps Date: Sat, 21 Jun 2025 16:30:46 -0500 Subject: [PATCH 2/5] Add test outputs to .gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 9dce9d26..b370ef7a 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,7 @@ mpr_df_orig.txt # other METplus components /METcalcpy + +# Test outputs +stdout/ +tcmpr_reformatted.txt From 69b2671ed6f8fc81b9e0ee361f9836f36da6321e Mon Sep 17 00:00:00 2001 From: Joel Heaps Date: Sat, 21 Jun 2025 16:31:00 -0500 Subject: [PATCH 3/5] Add missing dependencies to requirements.txt --- requirements.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 07d531f2..72c4755a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,5 @@ pytest>=8.3.4 python-dateutil>=2.9.0.post0 PyYAML>=6.0.2 xarray>=2025.1.2 - - - +metcalcpy>=2.1 +netcdf4>=1.7.2 From ec9705f3c373ec0775135b08f5da74bf2eaa11b5 Mon Sep 17 00:00:00 2001 From: Joel Heaps Date: Sat, 21 Jun 2025 16:48:48 -0500 Subject: [PATCH 4/5] Fix "index only" test case --- METdbLoad/test/test_met_db_load.py | 2 +- METdbLoad/ush/met_db_load.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/METdbLoad/test/test_met_db_load.py b/METdbLoad/test/test_met_db_load.py index f78569e2..3b94ca2e 100644 --- a/METdbLoad/test/test_met_db_load.py +++ b/METdbLoad/test/test_met_db_load.py @@ -304,7 +304,7 @@ def test_met_db_indexes( {"apply_indexes": "true"}, ) ), - "index": False, + "index": True, "tmpdir": [str(tmp_path)], } diff --git a/METdbLoad/ush/met_db_load.py b/METdbLoad/ush/met_db_load.py index 424689ce..cce04369 100644 --- a/METdbLoad/ush/met_db_load.py +++ b/METdbLoad/ush/met_db_load.py @@ -46,7 +46,7 @@ PACKAGE_NAME = "metdataio" # Used for version lookup -def main(xmlfile, index, tmpdir=DEFAULT_TMP_DIR, loglevel=None): +def main(xmlfile, index=False, tmpdir=DEFAULT_TMP_DIR, loglevel=None): """ Main program to load files into the METdataio/METviewer database Returns: N/A From 0ab72912a58d4c3dd9a0a9defdddb543cd769070 Mon Sep 17 00:00:00 2001 From: Joel Heaps Date: Sun, 22 Jun 2025 20:35:47 -0500 Subject: [PATCH 5/5] Rename main function to load_met_data in met_db_load module --- METdbLoad/test/test_met_db_load.py | 22 +++++++++++----------- METdbLoad/test/test_run_sql.py | 4 ++-- METdbLoad/ush/met_db_load.py | 14 +++++++++++--- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/METdbLoad/test/test_met_db_load.py b/METdbLoad/test/test_met_db_load.py index 3b94ca2e..09439512 100644 --- a/METdbLoad/test/test_met_db_load.py +++ b/METdbLoad/test/test_met_db_load.py @@ -1,7 +1,7 @@ import pytest import sys from unittest.mock import patch, MagicMock -from METdbLoad.ush.met_db_load import main as load_main +from METdbLoad.ush.met_db_load import load_met_data from METdbLoad.ush.met_db_load import purge_files, parse_args, next_set, print_version from METdbLoad.ush.run_sql import RunSql from METdbLoad.test.utils import dict_to_args @@ -147,7 +147,7 @@ def test_met_db_table_counts( "tmpdir": [str(tmp_path)], } - load_main(**test_args) + load_met_data(**test_args) for table, expected_count in expected_counts.items(): assert_count_rows(testRunSql.cur, table, expected_count) @@ -213,9 +213,9 @@ def test_met_db_table_dups( "tmpdir": [str(tmp_path)], } - load_main(**test_args) + load_met_data(**test_args) # load again to check duplicates aren't loaded in db - load_main(**test_args) + load_met_data(**test_args) for table, expected_count in expected_counts.items(): assert_count_rows(testRunSql.cur, table, expected_count) @@ -281,9 +281,9 @@ def test_met_db_table_dups_allowed( "tmpdir": [str(tmp_path)], } - load_main(**test_args) + load_met_data(**test_args) # load again to add duplicates - load_main(**test_args) + load_met_data(**test_args) for table, expected_count in expected_counts.items(): assert_count_rows(testRunSql.cur, table, expected_count * 2) @@ -314,7 +314,7 @@ def test_met_db_indexes( # sys.exit is called after processing indexes with pytest.raises(SystemExit): - load_main(**test_args) + load_met_data(**test_args) # check extra indicies have been created idx_cnt = testRunSql.cur.execute("SHOW INDEX FROM line_data_fho") @@ -326,7 +326,7 @@ def test_met_db_indexes( # check sys.exit called on error with pytest.raises(SystemExit): with patch.object(RunSql, "apply_indexes", side_effect=KeyError): - load_main(**test_args) + load_met_data(**test_args) @pytest.mark.parametrize( @@ -393,14 +393,14 @@ def test_local_in_file( "tmpdir": [str(tmp_path)], } - load_main(**test_args) + load_met_data(**test_args) for table, expected_count in expected_counts.items(): assert_count_rows(testRunSql.cur, table, expected_count) def test_empty_files(tmp_path): - """Junk files shouldn't cause an error when running load_main""" + """Junk files shouldn't cause an error when running load_met_data""" met_data_dir = tmp_path / "empty_files_test" met_data_dir.mkdir() @@ -431,7 +431,7 @@ def test_empty_files(tmp_path): "tmpdir": [str(tmp_path)] } - load_main(**test_args) + load_met_data(**test_args) def test_print_version(): mock_logger = MagicMock() diff --git a/METdbLoad/test/test_run_sql.py b/METdbLoad/test/test_run_sql.py index 981ddf5d..0df2948d 100644 --- a/METdbLoad/test/test_run_sql.py +++ b/METdbLoad/test/test_run_sql.py @@ -2,7 +2,7 @@ from unittest import mock from pymysql import OperationalError -from METdbLoad.ush.met_db_load import main as load_main +from METdbLoad.ush.met_db_load import load_met_data from METdbLoad.ush import constants as CN from METdbLoad.test.utils import dict_to_args from METdataio.METdbLoad.test.utils import ( @@ -22,7 +22,7 @@ def populate_some_data( "tmpdir": [str(tmp_path)], "loglevel": "DEBUG" } - load_main(**test_args) + load_met_data(**test_args) def test_get_file_name(tmp_path, emptyDB, testRunSql, mock_logger): diff --git a/METdbLoad/ush/met_db_load.py b/METdbLoad/ush/met_db_load.py index cce04369..f4a62dd2 100644 --- a/METdbLoad/ush/met_db_load.py +++ b/METdbLoad/ush/met_db_load.py @@ -46,8 +46,16 @@ PACKAGE_NAME = "metdataio" # Used for version lookup -def main(xmlfile, index=False, tmpdir=DEFAULT_TMP_DIR, loglevel=None): - """ Main program to load files into the METdataio/METviewer database +def load_met_data(xmlfile, index=False, tmpdir=DEFAULT_TMP_DIR, loglevel=None): + """ + Loads MET data into database based on the configuration in the given XML file. + Args: + xmlfile: Configuration file specifying what data should be loaded and + how it should be processed + index: Boolean indicating whether only indexes should be loaded, omitting + the data itself. + tmpdir: A directory to store temporary files in + loglevel: Minimum severity for shown log statements Returns: N/A """ @@ -455,7 +463,7 @@ def parse_args(): if __name__ == '__main__': args = parse_args() - main( + load_met_data( xmlfile=args.xmlfile, index=args.index, tmpdir=args.tmpdir,