From 6a7757482c30e6d1f61b57efb3a62d63ea6a288e Mon Sep 17 00:00:00 2001 From: Lee de Mora Date: Tue, 9 Aug 2022 15:51:26 +0100 Subject: [PATCH 1/3] Improved interface for single job analyses --- bgcval2/analysis_compare.py | 8 +- bgcval2/analysis_p2p.py | 134 ++++++++++++++++++++++----------- bgcval2/bgcval2_make_report.py | 103 +++++++++++++------------ setup.py | 4 +- 4 files changed, 152 insertions(+), 97 deletions(-) diff --git a/bgcval2/analysis_compare.py b/bgcval2/analysis_compare.py index c0f6f165..7cff92fd 100755 --- a/bgcval2/analysis_compare.py +++ b/bgcval2/analysis_compare.py @@ -4245,7 +4245,7 @@ def load_yml_and_run(compare_yml, config_user): print(jobID, 'suite:', suites[jobID]) for jobID in jobs: - # even if you don't want to download, we run this + # even if you don't want to download, we run this # as it clears up the path and ensures recently downloed data is # correctly symlinked. download_from_mass(jobID, doMoo=do_mass_download) @@ -4338,15 +4338,15 @@ def main(): compare_ymls = args.compare_yml for compare_yml in compare_ymls: - print(f"analysis_timeseries: Comparison config file {compare_yml}") + print(f"analysis_compare: Comparison config file {compare_yml}") if not os.path.isfile(compare_yml): - print(f"analysis_timeseries: Could not find comparison config file {compare_yml}") + print(f"analysis_compare: Could not find comparison config file {compare_yml}") sys.exit(1) load_yml_and_run(compare_yml, config_user) - print("Finished... ") + print("Finished analysis_compare... ") if __name__ == "__main__": diff --git a/bgcval2/analysis_p2p.py b/bgcval2/analysis_p2p.py index 04ea1c8e..5506ec6a 100755 --- a/bgcval2/analysis_p2p.py +++ b/bgcval2/analysis_p2p.py @@ -41,6 +41,7 @@ from netCDF4 import Dataset import numpy as np import sys +import argparse ##### #Specific local code: @@ -85,7 +86,7 @@ p2pKeys_annual = [ 'T', - 'S', #'MLD', + 'S', #'MLD', 'Chl_CCI', 'N', 'Si', @@ -113,7 +114,7 @@ p2pKeys_physics = [ 'T', - 'S', #'MLD', + 'S', #'MLD', 'ZonalCurrent', 'MeridionalCurrent', 'VerticalCurrent' @@ -139,7 +140,7 @@ def analysis_p2p( noTargets=True, ): """ - + """ ##### # Switches: @@ -1499,44 +1500,93 @@ def single_p2p(jobID, key, year): print("Error: %s" % sys.exc_info()[0]) -def run(): +def get_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + + parser.add_argument('-j', + '--jobID', + nargs='+', + type=str, + default=None, + help='One or more UKESM Job IDs (automatically generated by cycl/rose suite).', + required=True, + ) + parser.add_argument('-k', + '--keys', + default=['level2', ], + nargs='+', + type=str, + help=''.join(['Runtime keys - each key links to a pre-determined list of variables to analyse. ', + 'Keys are: ', ', '.join( accepted_keys)]), + required=False, + ) + parser.add_argument('-y', + '--years', + default=['best',], + nargs='+', + type=str, + help=''.join(['Years to analysis. Default is "best". ']) + required=False, + ) + + parser.add_argument('-c', + '--config-file', + default=os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'default-bgcval2-config.yml'), + help='User configuration file (for paths).', + required=False) + + args = parser.parse_args() + + return args + + + +def main(): from ._version import __version__ print(f'BGCVal2: {__version__}') - if "--help" in argv or len(argv) == 1: - print("Running with no arguments. Exiting.") - if "--help" in argv: - print("Read the documentation.") - sys.exit(0) - try: - jobID = argv[1] - except: - jobID = 'u-ab749' - - try: - year = argv[2] - except: - year = '2007' - - if 'debug' in argv[1:]: - analysisSuite = 'debug' - elif 'level2' in argv[1:]: - analysisSuite = 'level2' - elif 'physics' in argv[1:]: - analysisSuite = 'physics' - else: - analysisSuite = 'annual' - - analysis_p2p( - models=[ - 'NEMO', - 'MEDUSA', - ], - jobID=jobID, - years=[ - year, - ], #'2075','2076', - modelGrid='eORCA1', - annual=True, - noPlots=False, - analysisSuite=analysisSuite, - ) + args = get_args() + + jobIDs = args.jobID + keys = args.keys + years = args.years + + accepted_keys = ['debug', 'physics', 'leve2', 'annual'] + for jobID, year, suite in itertools.product(jobIDs, years, keys): + if year == 'best': + best_year = False + for divby in [100, 50, 25, 10, 5, 1]: + print("analysis_p2p:\t find best year", divby, year,best_year ) + if best_year: + continue + best_year = findLastFinishedYear(jobID, + dividby=divby,) # numberfiles=numberfiles) + if best_year == False: + continue + else: + year = best_year + + print('analysis_p2p:', jobID, year, suite) + single_p2p(jobID, suite, year) + # analysis_p2p( + # models=[ + # 'NEMO', + # 'MEDUSA', + # ], + # jobID=jobID, + # years=[ + # year, + # ], + # modelGrid='eORCA1', + # annual=True, + # noPlots=False, + # analysisSuite=suite, + # ) + print("Finished p2p... ") + + +if __name__ == "__main__": + main() diff --git a/bgcval2/bgcval2_make_report.py b/bgcval2/bgcval2_make_report.py index 5ffd076e..fba407fe 100755 --- a/bgcval2/bgcval2_make_report.py +++ b/bgcval2/bgcval2_make_report.py @@ -74,13 +74,13 @@ def addImageToHtml(fn, imagesfold, reportdir, debug=True): relfn = newfn.replace(reportdir, './') if debug: - print("addImageToHtml:\tfn:", fn, - "\n\timagesfold:", imagesfold, + print("addImageToHtml:\tfn:", fn, + "\n\timagesfold:", imagesfold, "\n\treportdir:", reportdir, "\n\tnewfn:", newfn, "\n\trelfn:", relfn) - + if not os.path.exists(newfn): if debug: print("addImageToHtml:\tcopytree", fn, newfn) basedir = folder(os.path.dirname(newfn)) @@ -102,7 +102,7 @@ def addImageToHtml(fn, imagesfold, reportdir, debug=True): if debug: print("addImageToHtml:\tremoving old file", fn) os.remove(newfn) shutil.copy2(fn, newfn) - if debug: + if debug: print("addImageToHtml:\t copy2", fn, newfn) return relfn @@ -1056,7 +1056,6 @@ def newImageLocation(fn): FileOrder=FileOrder) if level3OMZ: - l3omzFields = [ 'ExtentMaps' # 'O2', @@ -1375,7 +1374,7 @@ def comparehtml5Maker( #### # Copy all necceasiry objects and templates to the report location: print("Copying html and js assets to", reportdir) - #html5Assets_dir = + #html5Assets_dir = copytree(os.path.join(paths.bgcval2_repo,'bgcval2/html5/html5Assets'), reportdir) indexhtmlfn = reportdir + "index.html" @@ -1732,34 +1731,46 @@ def get_args(): formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-c', '--config-file', - default=os.path.join(os.getcwd(), - 'config-user.yml'), - help='User configuration file', + default=os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'default-bgcval2-config.yml'), + help='User configuration file (for paths).', required=False) - parser.add_argument('-i', - '--job-id', + parser.add_argument('-j', + '--jobID', + nargs='+', + type=str, default=None, - help='Job ID', - required=True) + help='One or more UKESM Job IDs (automatically generated by cycl/rose suite).', + required=True, + ) parser.add_argument('-y', '--year', - default=None, + default='*', help='Year', required=False) + parser.add_argument('-k', + '--keys', + default=['kmf', 'level1',], + nargs='+', + type=str, + help=''.join(['Runtime keys - each key links to a pre-determined list of variables to analyse. ',]), + #'Keys are: ', ', '.join( accepted_keys)]), + required=False, + ) parser.add_argument('-a', '--clean', action='store_true', - help='Clean or not', + help='Delete previous report and replace it.', required=False) parser.add_argument('-p', '--physics', action='store_true', - help='Physics or not', + help='Physics only switch.', required=False) parser.add_argument('-r', '--report', - default=None, - help='Report repo', + default='reports/', + help='Output directory to host the report.', required=False) args = parser.parse_args() @@ -1773,37 +1784,28 @@ def main(): print(f'BGCVal2: {__version__}') args = get_args() - jobID = args.job_id - if args.config_file: - config_user = os.path.join(os.getcwd(), args.config_file) - print(f"analysis_timeseries: Using user config file {config_user}") - else: - config_user = os.path.join(os.getcwd(), "bgcval2-config-user.yml") - print(f"analysis_timeseries: Using user default file {config_user}") + jobID = args.job_id + config_user = args.config_file + clean = args.clean + physics = args.physics + year = args.year + if not os.path.isfile(config_user): print(f"analysis_timeseries: Could not find configuration file {config_user}") config_user = None - #defaults: - clean = False - physicsOnly = False - year = '*' - reportdir = folder('reports/' + jobID) - if args.year: try: year = int(args.year) except ValueError: print("analysis_timeseries: Invalid input for year - must be an integer, got {args.year}") - if args.clean: - clean = True - print("analysis_timeseries: Running with Clean option!") - if args.physics: - physicsOnly = True - print("analysis_timeseries: Running with Physics option!") - if args.report: - reportdir = os.path.abspath(args.report) + + if clean: + print("analysis_timeseries: Running with Clean option.") + + if physics: + print("analysis_timeseries: Running with Physics only option.") # get runtime configuration; not implemented yet if config_user: @@ -1814,16 +1816,19 @@ def main(): # filter paths dict into an object that's usable below paths = paths_setter(paths_dict) - html5Maker( - jobID=jobID, - reportdir=reportdir, - year=year, - clean=clean, - physicsOnly=physicsOnly, - paths=paths, - config_user=config_user - ) - + for jobID in jobIDs: + reportdir = os.path.abspath(folder([args.report, jobID])) + + html5Maker( + jobID=jobID, + reportdir=reportdir, + year=year, + clean=clean, + physicsOnly=physics, + paths=paths, + config_user=config_user + ) + if __name__ == "__main__": main() diff --git a/setup.py b/setup.py index 482668b3..3e644a13 100755 --- a/setup.py +++ b/setup.py @@ -196,10 +196,10 @@ def read_authors(filename): 'analysis_level3_dms = bgcval2.analysis_level3_dms:main', 'analysis_level3_omz = bgcval2.analysis_level3_omz:main', 'analysis_level3_sameYear = bgcval2.analysis_level3_sameYear:main', - 'analysis_p2p = bgcval2.analysis_p2p:run', + 'analysis_p2p = bgcval2.analysis_p2p:main', 'analysis_timeseries = bgcval2.analysis_timeseries:main', 'bgcval = bgcval2.bgcval:run', - 'download_from_mass = bgcval2.download_from_mass:main', + 'download_from_mass = bgcval2.download_from_mass:main', 'bgcval2_make_report = bgcval2.bgcval2_make_report:main', ], }, From 8d4768d6308989f263496ed57bccbde191aa618d Mon Sep 17 00:00:00 2001 From: Lee de Mora Date: Tue, 9 Aug 2022 15:53:57 +0100 Subject: [PATCH 2/3] Debugging help messages --- bgcval2/analysis_p2p.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bgcval2/analysis_p2p.py b/bgcval2/analysis_p2p.py index 5506ec6a..4a859209 100755 --- a/bgcval2/analysis_p2p.py +++ b/bgcval2/analysis_p2p.py @@ -32,7 +32,6 @@ ##### #Standard Python modules: -from sys import argv, exit from os.path import exists from calendar import month_name from socket import gethostname @@ -41,6 +40,7 @@ from netCDF4 import Dataset import numpy as np import sys +import os import argparse ##### @@ -1505,6 +1505,7 @@ def get_args(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + accepted_keys = ['debug', 'physics', 'leve2', 'annual'] parser.add_argument('-j', '--jobID', @@ -1528,7 +1529,7 @@ def get_args(): default=['best',], nargs='+', type=str, - help=''.join(['Years to analysis. Default is "best". ']) + help=''.join(['Years to analysis. Default is "best". ']), required=False, ) From b4ce2ef819bc2cb166b60ac8f7997ea369d45b1a Mon Sep 17 00:00:00 2001 From: Lee de Mora Date: Mon, 15 Aug 2022 11:09:45 +0100 Subject: [PATCH 3/3] working on single job analysis --- bgcval2/UKESMpython.py | 2 +- bgcval2/analysis_p2p.py | 40 +++++++++++++++++++--------------- bgcval2/analysis_timeseries.py | 2 +- bgcval2/bgcval2_make_report.py | 2 +- bgcval2/download_from_mass.py | 34 ++++++++++++++++++++--------- 5 files changed, 50 insertions(+), 30 deletions(-) diff --git a/bgcval2/UKESMpython.py b/bgcval2/UKESMpython.py index f497c517..4405e6ba 100644 --- a/bgcval2/UKESMpython.py +++ b/bgcval2/UKESMpython.py @@ -72,7 +72,7 @@ def folder(name): It also accepts lists of strings. """ if type(name) == type(['a', 'b', 'c']): - name = join(name, '/') + name = os.path.join(name) if name[-1] != '/': name = name + '/' if exists(name) is False: diff --git a/bgcval2/analysis_p2p.py b/bgcval2/analysis_p2p.py index 4a859209..6ee165f0 100755 --- a/bgcval2/analysis_p2p.py +++ b/bgcval2/analysis_p2p.py @@ -32,7 +32,6 @@ ##### #Standard Python modules: -from os.path import exists from calendar import month_name from socket import gethostname from getpass import getuser @@ -42,6 +41,7 @@ import sys import os import argparse +import itertools ##### #Specific local code: @@ -51,6 +51,7 @@ from .p2p.patternAnalyses import InterAnnualPatterns, BGCvsPhysics from .bgcvaltools.pftnames import months from .p2p.shelveToDictionary import shelveToDictionary +from .download_from_mass import findLastFinishedYear ##### # User defined set of paths pointing towards the datasets. @@ -1557,35 +1558,40 @@ def main(): accepted_keys = ['debug', 'physics', 'leve2', 'annual'] for jobID, year, suite in itertools.product(jobIDs, years, keys): + print('p2p:', jobID, year, suite) if year == 'best': best_year = False for divby in [100, 50, 25, 10, 5, 1]: - print("analysis_p2p:\t find best year", divby, year,best_year ) if best_year: continue + print("analysis_p2p:\t find best year", divby, year,best_year ) best_year = findLastFinishedYear(jobID, - dividby=divby,) # numberfiles=numberfiles) + dividby=divby, debug= False) # numberfiles=numberfiles) if best_year == False: continue else: year = best_year + analysis_p2p( + models=[ + 'NEMO', + 'MEDUSA', + ], + jobID=jobID, + years=[ + year, + ], #'2075','2076', + modelGrid='eORCA1', + annual=True, + noPlots=False, + analysisSuite=[suite, + ], + ) + + print('analysis_p2p:', jobID, year, suite) single_p2p(jobID, suite, year) - # analysis_p2p( - # models=[ - # 'NEMO', - # 'MEDUSA', - # ], - # jobID=jobID, - # years=[ - # year, - # ], - # modelGrid='eORCA1', - # annual=True, - # noPlots=False, - # analysisSuite=suite, - # ) + print("Finished p2p... ") diff --git a/bgcval2/analysis_timeseries.py b/bgcval2/analysis_timeseries.py index 30ed9261..30a4d2cb 100755 --- a/bgcval2/analysis_timeseries.py +++ b/bgcval2/analysis_timeseries.py @@ -4991,7 +4991,7 @@ def main(): "Will proceed with defaults.") config_user = None - for jobID, suite in itertools.product(keys, jobIDs): + for jobID, suite in itertools.product(jobIDs, keys): analysis_timeseries( jobID=jobID, analysisSuite=suite, diff --git a/bgcval2/bgcval2_make_report.py b/bgcval2/bgcval2_make_report.py index fba407fe..0c4c698e 100755 --- a/bgcval2/bgcval2_make_report.py +++ b/bgcval2/bgcval2_make_report.py @@ -1785,7 +1785,7 @@ def main(): args = get_args() - jobID = args.job_id + jobIDs = args.jobID config_user = args.config_file clean = args.clean physics = args.physics diff --git a/bgcval2/download_from_mass.py b/bgcval2/download_from_mass.py index 0d6cf425..2484e284 100755 --- a/bgcval2/download_from_mass.py +++ b/bgcval2/download_from_mass.py @@ -31,13 +31,14 @@ ##### # Load Standard Python modules: import argparse - +import sys from sys import stdout import subprocess from socket import gethostname import os from glob import glob from re import findall +import numpy as np ##### # Load specific local code: @@ -136,7 +137,7 @@ def rebaseSymlinks(fn, dryrun=True, debug=False): os.symlink(realpath, fn) -def findLastFinishedYear(jobID, dividby=1, numberfiles=6): +def findLastFinishedYear(jobID, dividby=1, numberfiles=None,debug=False,): """ :param jobID: The job ID, as elsewhere. :param dividby: Outputs every "dividby" years. @@ -163,16 +164,22 @@ def findLastFinishedYear(jobID, dividby=1, numberfiles=6): for fn in files: yr = getYearFromFile(fn) - print("download_from_mass:\tgetYearFromFile:", fn, yr) + if debug: + print("download_from_mass:\tgetYearFromFile:", fn, yr) try: fnDict[yr] += 1 except: fnDict[yr] = 1 + if not numberfiles: + numberfiles = np.max([v for v in fnDict.values()]) + if debug:print('numberfiles:', numberfiles) + years = sorted(fnDict.keys()) years.reverse() - print("download_from_mass:\t",years, fnDict) + if debug: + print("download_from_mass:\t",years, fnDict) if len(years) == 0: print("download_from_mass:\tfindLastFinishedYear:\tNo files found.\t") @@ -181,13 +188,14 @@ def findLastFinishedYear(jobID, dividby=1, numberfiles=6): if len(years) < dividby: print("download_from_mass:\tfindLastFinishedYear:\tLess than", dividby, "years of model run, returning first year:", years[-1]) - return years[0] + return False for y in years: - if int(y) % dividby != 0: continue + if int(y) % dividby == 0: + if debug: print('Found year:', y) + return y - print(y, ':', fnDict[y]) - if fnDict[y] >= numberfiles: return y + #if fnDict[y] >= numberfiles: return y print( "No correct year, there's probably a problem here findLastFinishedYear(", @@ -445,9 +453,15 @@ def download_from_mass(jobID, doMoo=True): else: print("download_from_mass:\trunning the command:", bashCommand) stdout.flush() - process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) - output = process.communicate()[0] + try: + process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) + output = process.communicate()[0] + except FileNotFoundError: + print('ERROR: FileNotFoundError: are you running this on a mass-connected machine like mass-cli1?') + print('ERROR: If not, try --dry-run') + sys.exit(1) + print('output', output) # moo get: if len(output.split('\n')) > 6000: failed = 0