diff --git a/README.md b/README.md index 48917ced..63f216fb 100644 --- a/README.md +++ b/README.md @@ -44,22 +44,11 @@ conda activate bgcval2 pip install -e .[develop] ``` -Running the tool -================ - -The tool has a number of executables one can invoke individually, e.g.: - -``` -analysis_timeseries u-bc179 level1 -analysis_p2p u-bc179 level2 2010 -``` -Once these have completed, a summary HTML page can be generated with the command: - +Test that the tool has been installed correctly with: ``` -bgcval2_make_report u-bc179 2010 +analysis_compare -h ``` -This produces an html5 mobile-friendly website which can be opened using your -browser of choice. +which should print the module information and instructions on how to run the tool. ### Available executables @@ -73,6 +62,149 @@ Executable name | What it does | Command `analysis_compare` | runs comparison of multiple single jobs | analysis_compare + +Running the tool to compare multiple jobs +========================================= + +The time developmenmt of several models can be compared +and summarized in a single comparison report html. +This report can be generated with a single command, based on a simple yml file input: + +``` +analysis_compare --compare_yml comparison_recipe.yml +``` + +Example input yaml files exist in the `input_yml` directory. +However, there are a few key values: + + +In this yml file, the structure is: +``` +--- +name: +do_analysis_timeseries: +do_mass_download: +master_suites: + +jobs: + : + description: + colour: red + thickness: 0.7 + linestyle: '-' + shifttime: 0. + suite: physics + : + description: + ... +``` + +These values are: + - `name`: + - The name of the analysis. + - This will be the path of output report. + - `do_analysis_timeseries`: + - A Boolean value to run or skip the single model timeseries. + - Set to False if the single model analysis has already completed. + - `do_mass_download`: + - A boolean value to run the mass download. + - This is not currently possible as we can only download mass file from mass-cli1 on jasmin. + - See below for details on how to download jobs data. + - `master_suites`: + - A list of the type of analysis report to produce. + - Options are: `physics`, `bio`, `debug`. + - Default is `['physics', 'bio',]`. + - `jobs`: + - A list of jobIDs, and some options on how they will appear in the final report. + - The options are: + - `description`: + - A description of job, which helps people differentiate the jobs in the final report. + - `colour`: + - The colour of this jobs line in the report plots. + - Default colour is a randomly generated hex colour. + - `thickness`: + - The thickness of this jobs line in the report plots. + - default is 0.7 + - `linestyle`: + - The linestyle of this jobs line in the report plots. + - Accepts typical matplotlib line styles: `'solid', 'dashed', 'dotted', 'dashdot', '-'. ';', ', etc` + - default is `-'` + - `shiftime`: + - A number in whole years by which the jobs line is shifted. + - this is useful if jobs start from different initial years in spin up, for instance. + - Default is `0.` ( no time shift ). + - `suite`: + - An analysis suite to run the analysis_timeseries. + - See `analysis_timeseries` for more details. + + +A sample yaml exists in `input_yml/comparison_analysis_template.yml`, +which can be adapted to additional analyses. + + + +Downloading data using MASS +=========================== + +Data can be downloaded and prepared for analysis using the `download_from_mass` bgcval2 tool, +with the command: +``` +download_from_mass -j jobID +``` +where `jobID` is one or more jobIDs. + +This script will only work on JASMIN's `mass-cli1` machine, +which is set up to interact with the Met Office Storate System MASS. + +The optional flag `--dry-run` skips the download part of the script, +and generates a script in the `bgcval2/mass_scripts` directory. +From there, users can ssh to `mass-cli1` and execute this script: + +``` +# from login1.jasmin.ac.uk, ssh to the mass machine: +ssh -X mass-cli1 + +# run script with: +source /path/to/bgcval2/is/here/bgcval2/mass_scripts/jobID.sh +``` + +Alternatively, the whole `download_from_mass` tool could be executed on the `mass-cli1` machine. + +Several different keys can be included in the download if monthly data is required. +However, it's not recommended to include monthly data at this stage as that code +both to download, but also to run the monthly analysis is not currently tested. + +This tool downloads the data, but also includes several functions which create symbolic links +in the data's directory in order to accomodate incompatible changes in NEMO's output formatting. + + +Running the tool for a single job +================================= + +The multi-job analysis described above can only do timeseries analysis. +To run an in-depth analysis of a single job, the following command can be run: + +``` +bgcval2 -j jobID +``` + +This will run a time series analysis, a point to point analysis, and +publish the reports into a single job html5 report. + + +Alternatively, these tasks can be invoked individually, e.g.: + +``` +analysis_timeseries --jobID u-bc179 --keys kmf level1 +analysis_p2p u-bc179 level2 2010 +bgcval2_make_report u-bc179 2010 + +``` +This produces an html5 mobile-friendly website which can be opened using your +browser of choice. + + + Time series analysis -------------------- @@ -80,8 +212,9 @@ This is an analysis that investigates the time development of specific marine physics and Biogeochemistry fields in the given model, and then compares them against historical observations. -The command to run it is `analysis_timeseries jobID key`, where jobID is a mass -job id, such a `u-ab123`, and the key is a pre-defined key, which generates a +The command to run it is `analysis_timeseries --jobID jobID --keys key`, +where jobID is one or more mass jobIDs, such a `u-ab123`. +The key is one or more pre-defined keys, which generates a list of variables. Key | What it is | Description @@ -100,6 +233,8 @@ Note that there may be some overlap between the contents of these keys. Point to point analysis ----------------------- +WORK IN PROGRESS. + This is an analysis that compares a specific year of the model against several climatological datasets, and produces comparison maps, histograms and scatter plots. @@ -121,6 +256,8 @@ Note that there may be some overlap between the contents of these keys. Single Model report ------------------- +WORK IN PROGRESS. + Once an analysis has run, either time series or point to point, a report can be generated from this output, using the command: ``` @@ -130,81 +267,6 @@ This gnerated an HTML5 mobile-friendlyt report, summarising the output of a single model run. -Multi-model comaprison report ------------------------------ - -Once several models have been analysed using the time series analysis, -their time development can be compared using the `analysis_compare` command: -``` -analysis_compare recipe.yml -``` - -The comparison reports are generated from a user defined yaml recipe. - -In this yml file, the key structure is: -``` -name: -do_analysis_timeseries: -jobs: - : - description: - : - description: -``` -Where the `name` is a short unique string describing the analysis script -and the ultimately the name given here will become part of the path -of the final output comparison report. - -The `do_analysis_timeseries` bool lets `analysis_compare` send jobs to -`analysis_timeseries`, allowing the user to run the entire suite in one -command, instead of individually running the `analysis_timeseries` then -the `analysis_compare` part afterwards. - -The `jobs` is a dict of job IDs, that describe how each job will appear in the -final report. - -The optional arguments for each job are: - - colour: a colour hex, or html recognised string (default is a randomly generated hex colour.) - - thickness: line thickness for matplotlib (default (`0.7`) - - linestyle: line style for matplotlib (default: `'-'`) - - suite: suite to send to `analysis_timeseries` if `do_analysis_timeseries` is true. - -A sample yaml exists in `input_yml/comparison_analysis_template.yml`, -which can be adapted to additional analysis. - -Download data from MASS ------------------------ - -It's straightforward to download data from the Met Office -Storage System, MASS. -The bgcval2 tool `download_data_from mass` sets up the command and -outputs a script which you can run on Jasmin's mass-cli1 machine. - -Note that the only place on CEDA-JASMIN you can download data -is the dedicated virtual machine, mass-cli1.jasmin.ac.uk. - -The recommended process is to generate the download script on an interactive node, -like sci1 with the command: -``` -download_from_mass jobID noMoo -``` -Which will then create a script in the directory `mass_scripts`. -The runtime flag `noMoo` stops the script from attempted to execute the script. - -From there, the user must log into the mass machine, and execute the script: -``` -#from login1.jasmin.ac.uk: -ssh -X mas-cli1 -cd bgcval2 -source mass_script/*.sh -``` - -Note that these scripts can also be automatically generated by -the `analysis_compare` command by including the -``` -do_mass_download: True -``` - Documentation ============= diff --git a/bgcval2/_runtime_config.py b/bgcval2/_runtime_config.py index 97ae7c61..075949dc 100644 --- a/bgcval2/_runtime_config.py +++ b/bgcval2/_runtime_config.py @@ -64,9 +64,10 @@ def _establish_hostname(): hostname = "github-actions" # for testing on GA machine else: host = gethostname() - print("Got host name: ", host) - raise ValueError(f"Unidentified hostname {host}" - f"Run at either JASMIN, MONSOON or PML.") + print(f"Got host name: {host}") + raise ValueError(f"Unidentified hostname {host} - " + f"Run at either JASMIN, MONSOON or PML please." + " Exiting now, please log in one of the above clusters.") return hostname diff --git a/bgcval2/analysis_compare.py b/bgcval2/analysis_compare.py index dd5d2922..c0f6f165 100755 --- a/bgcval2/analysis_compare.py +++ b/bgcval2/analysis_compare.py @@ -26,26 +26,28 @@ """ .. module:: analysis_compare :platform: Unix - :synopsis: A script to produce an intercomparison of multiple runs the time series analyses. + :synopsis: A tool that generates an intercomparison of multiple UKESM jobs time series analyses. .. moduleauthor:: Lee de Mora +.. moduleauthor:: Valeriu Predoi """ +import argparse import matplotlib # Force matplotlib to not use any Xwindows backend. matplotlib.use('Agg') ##### # Load Standard Python modules: -from sys import argv, exit -from os.path import exists from calendar import month_name from socket import gethostname from netCDF4 import Dataset from glob import glob from scipy.interpolate import interp1d import numpy as np -import os, sys, fnmatch +import os +import sys +import fnmatch from getpass import getuser from collections import defaultdict import yaml @@ -133,9 +135,9 @@ def apply_shifttimes(mdata, jobID, shifttimes): for t in sorted(mdata.keys()): t1 = t + float(shifttimes[jobID]) times.append(t1) - datas.append(mdata[t]) - return times, datas - + datas.append(mdata[t]) + return times, datas + def timeseries_compare(jobs, @@ -144,7 +146,7 @@ def timeseries_compare(jobs, bio=False, debug=False, analysisname='', - shifttimes={}, + shifttimes={}, jobDescriptions={}, lineThicknesses=defaultdict(lambda: 1), linestyles=defaultdict(lambda: '-'), @@ -153,16 +155,16 @@ def timeseries_compare(jobs, """ timeseries_compare: Suite of tools to take pre-analyses time series model data - then compile into single plots, then publish it to an html + then compile into single plots, then publish it to an html document. """ ### strategy here is a simple wrapper. # It's a little cheat-y, as I'm copying straight from analysis_timeseries.py - + jobs = sorted(jobs) #jobs = sorted(colours.keys()) - + for ensemble in list(ensembles.keys()): # ensembles names can not be the same as jobIDs jobs.remove(ensemble) @@ -4004,7 +4006,7 @@ def areatotal(nc, keys): for filename in fnmatch.filter(filenames, '*.png'): AllImages.append(os.path.join(root, filename)) print('AllImages:','fors', root, dirnames, filenames, filename) - + if ensembles != {}: jobs = list(ensembles.keys()) @@ -4142,21 +4144,26 @@ def CompareTwoRuns(jobIDA, def load_comparison_yml(master_compare_yml_fn): """ Load the config yaml. - TAkes an file path string + Takes a file path string Returns: Details dict. """ with open(master_compare_yml_fn, 'r') as openfile: dictionary = yaml.safe_load(openfile) - details = {} + if not dictionary or not isinstance(dictionary, dict): + print(f"Configuration file {master_compare_yml_fn} " + "is either empty or corrupt, please check its contents") + sys.exit(1) + + details = {} details['name'] = dictionary.get('name', False) - details['jobs'] = dictionary.get('jobs', False) + details['jobs'] = dictionary.get('jobs', False) if not details['name']: print('Please provide a name for your analysis. In your yaml, this is:') print('name: MyAnalysisName') - exit(0) + sys.exit(0) if not details['jobs']: print('Please provide at least one JobID for your analysis. In your yaml, this is:') print('jobs: ') @@ -4166,96 +4173,90 @@ def load_comparison_yml(master_compare_yml_fn): print(' thickness: 0.7') print(" linestyle: '-'") print(' shifttime: 0.') - exit(0) + sys.exit(0) - details['do_analysis_timeseries'] = dictionary.get('do_analysis_timeseries', False) + details['do_analysis_timeseries'] = dictionary.get('do_analysis_timeseries', False) details['do_mass_download'] = dictionary.get('do_mass_download', False) - + details['master_suites'] = dictionary.get('master_suites', []) - - default_thickness = 0.75 - default_linestyle = '-' + + default_thickness = 0.7 + default_linestyle = 'solid' default_suite = 'kmf' - + thicknesses = {} linestyles = {} colours = {} suites = {} descriptions = {} shifttimes = {} # number of years to shift time axis. - + for jobID, job_dict in details['jobs'].items(): if job_dict.get('colour', False): colours[jobID] = job_dict['colour'] else: - colours[jobID] = ''.join(['#', "%06x" % random.randint(0, 0xFFFFFF)]) + colours[jobID] = ''.join(['#', "%06x" % random.randint(0, 0xFFFFFF)]) print('WARNING: No colour provided, setting to random hex colour:', colours[jobID]) - + descriptions[jobID] = job_dict.get('description', '') thicknesses[jobID] = job_dict.get('thickness', default_thickness) linestyles[jobID] = job_dict.get('linestyle', default_linestyle) shifttimes[jobID] = float(job_dict.get('shifttime', 0.)) suites[jobID] = job_dict.get('suite', default_suite) - + details['colours'] = colours details['descriptions'] = descriptions details['thicknesses'] = thicknesses - details['linestyles'] = linestyles - details['shifttimes'] = shifttimes + details['linestyles'] = linestyles + details['shifttimes'] = shifttimes details['suites'] = suites return details - -def main(): - if "--help" in argv or len(argv) == 1: - print("Running with no arguments. Exiting.") - if "--help" in argv: - print("Read the documentation.") - exit(0) - - config_user=None - if "bgcval2-config-user.yml" in argv[1:]: - config_user = "bgcval2-config-user.yml" - print(f"analysis_timeseries: Using user config file {config_user}") - - details = load_comparison_yml(argv[1]) - + +def load_yml_and_run(compare_yml, config_user): + """ + Loads the comparison yaml file and run compare_yml. + + """ + # Below here is analysis + details = load_comparison_yml(comp_config) + jobs = details['jobs'] - analysis_name = details['name'] + analysis_name = details['name'] do_analysis_timeseries = details['do_analysis_timeseries'] do_mass_download = details['do_mass_download'] - master_suites = details['master_suites'] - + master_suites = details['master_suites'] + colours = details['colours'] thicknesses = details['thicknesses'] linestyles = details['linestyles'] descriptions = details['descriptions'] shifttimes = details['shifttimes'] - suites = details['suites'] - + suites = details['suites'] + print('---------------------') print('timeseries_compare:', analysis_name) print('job ids:', jobs.keys()) for jobID in jobs: print(jobID, 'description:',descriptions[jobID]) - print(jobID, 'colour:',colours[jobID]) + print(jobID, 'colour:',colours[jobID]) print(jobID, 'line thickness & style:',thicknesses[jobID], linestyles[jobID]) print(jobID, 'Shift time by', shifttimes[jobID]) print(jobID, 'suite:', suites[jobID]) for jobID in jobs: - # even if you don't want to download, it's good to run this - # as it clears up the path and ensures recently downloed data is + # even if you don't want to download, we run this + # as it clears up the path and ensures recently downloed data is # correctly symlinked. download_from_mass(jobID, doMoo=do_mass_download) - + if do_analysis_timeseries: for jobID in jobs: analysis_timeseries( jobID=jobID, analysisSuite=suites[jobID], config_user=config_user - ) + ) # Master suite leys: if not master_suites: @@ -4298,9 +4299,55 @@ def main(): linestyles=linestyles, config_user=config_user ) - + + +def get_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + + parser.add_argument('-y', + '--compare_yml', + nargs='+', + type=str, + help='One or more Comparison Analysis configuration file, for examples see bgcval2 input_yml directory.', + required=True, + ) + + parser.add_argument('-c', + '--config-file', + default=os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'default-bgcval2-config.yml'), + help='User configuration file (for paths).', + required=False) + + args = parser.parse_args() + + return args + + +def main(): + """Run the main routine.""" + args = get_args() + + # This has a sensible default value. + config_user=args.config_file + + # This shouldn't fail as it's a required argument. + compare_ymls = args.compare_yml + + for compare_yml in compare_ymls: + print(f"analysis_timeseries: Comparison config file {compare_yml}") + + if not os.path.isfile(compare_yml): + print(f"analysis_timeseries: Could not find comparison config file {compare_yml}") + sys.exit(1) + + load_yml_and_run(compare_yml, config_user) + print("Finished... ") -if __name__ == "__main__": +if __name__ == "__main__": main() diff --git a/bgcval2/analysis_timeseries.py b/bgcval2/analysis_timeseries.py index 6560e0f6..30ed9261 100755 --- a/bgcval2/analysis_timeseries.py +++ b/bgcval2/analysis_timeseries.py @@ -31,13 +31,14 @@ .. moduleauthor:: Lee de Mora """ +import argparse import matplotlib as mpl mpl.use('Agg') ##### # Load Standard Python modules: -from sys import argv, exit +from sys import exit from os.path import exists from calendar import month_name from socket import gethostname @@ -46,6 +47,7 @@ import numpy as np import os, sys from getpass import getuser +import itertools ##### # Load specific local code: @@ -289,7 +291,7 @@ def analysis_timeseries( print('analysisSuite:',analysisSuite) print('regions:', regions) print('clean:', clean, 'annual:',annual, 'strictFileCheck:', strictFileCheck) - print('config_user:', config_user) + print('config_user:', config_user) # get runtime configuration if config_user: @@ -317,7 +319,7 @@ def analysis_timeseries( analysisKeys = [] if analysisSuite.lower() in [ - 'keymetricsfirst', + 'keymetricsfirst', 'kmf', ]: analysisKeys.extend(keymetricsfirstKeys) @@ -4927,52 +4929,75 @@ def singleTimeSeries( # print "Error: %s" % sys.exc_info()[0] +def get_args(): + """Parse command line arguments.""" + accepted_keys = ['kmf', 'physics','bgc', 'debug', 'spinup', 'salinity', 'fast', 'level1', 'level3', ] + + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('-j', + '--jobID', + nargs='+', + type=str, + default=None, + help='One or more UKESM Job IDs (automatically generated by the cylc/rose suite).', + required=True, + ) + + parser.add_argument('-k', + '--keys', + default=['kmf', 'level1',], + nargs='+', + type=str, + help=''.join(['Runtime keys - each key links to a pre-determined list of variables to analyse. ', + 'Keys are: ', ', '.join( accepted_keys)]), + required=False, + ) + + parser.add_argument('-c', + '--config-file', + default=os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'default-bgcval2-config.yml'), + help='User configuration file (for paths).', + required=False) + + args = parser.parse_args() + return args + + def main(): from ._version import __version__ print(f'BGCVal2: {__version__}') - if "--help" in argv or len(argv) == 1: - print("Running with no arguments. Exiting.") - if "--help" in argv: - print("Read the documentation.") - sys.exit(0) - try: - jobID = argv[1] - except: - jobID = "u-ab749" - - if 'debug' in argv[1:]: - suite = 'debug' - #elif 'all' in argv[1:]: suite = 'all' - elif 'spinup' in argv[1:]: - suite = 'spinup' - elif 'salinity' in argv[1:]: - suite = 'salinity' - elif 'level1' in argv[1:]: - suite = 'level1' - elif 'fast' in argv[1:]: - suite = 'fast' - elif 'level3' in argv[1:]: - suite = 'level3' - elif 'physics' in argv[1:]: - suite = 'physics' - elif 'bgc' in argv[1:]: - suite = 'bgc' - elif 'kmf' in argv[1:] or 'keymetricsfirst' in argv[1:]: - suite = 'keymetricsfirst' - else: - suite = 'level1' - config_user = None - if "bgcval2-config-user.yml" in argv[1:]: - config_user = "bgcval2-config-user.yml" + args = get_args() + jobIDs = args.jobID + keys = args.keys + print('Running analysis_imerseries.\tjobID:', jobIDs, '\tkeys:', keys) + + accepted_keys = ['kmf', 'physics','bgc', 'debug', 'spinup', 'salinity', 'fast', 'level1', 'level3', ] + good_keys = True + for key in keys: + if key not in accepted_keys: + print('Key Argument [',key,'] nor recognised. Accepted keys are:', accepted_keys) + good_keys= False + if not good_keys: + sys.exit(1) + + if os.path.isfile(args.config_file): + config_user = args.config_file print(f"analysis_timeseries: Using user config file {config_user}") + else: + print(f"analysis_timeseries: Could not find configuration file {config_user}." + "Will proceed with defaults.") + config_user = None + + for jobID, suite in itertools.product(keys, jobIDs): + analysis_timeseries( + jobID=jobID, + analysisSuite=suite, + config_user=config_user + ) - analysis_timeseries( - jobID=jobID, - analysisSuite=suite, - config_user=config_user - ) #clean=1) - #if suite == 'all': - #analysis_timeseries(jobID =jobID,analysisSuite='FullDepth', z_component = 'FullDepth',)#clean=1) if __name__ == "__main__": diff --git a/bgcval2/bgcval.py b/bgcval2/bgcval.py index e73d671f..0e5cb57a 100755 --- a/bgcval2/bgcval.py +++ b/bgcval2/bgcval.py @@ -31,11 +31,11 @@ .. moduleauthor:: Lee de Mora """ +import argparse import matplotlib # Force matplotlib to not use any Xwindows backend. matplotlib.use('Agg') import sys -from sys import argv, exit from multiprocessing import Pool from .download_from_mass import findLastFinishedYear @@ -245,36 +245,55 @@ def theWholePackage(jobID, year=False, suite='level1'): clean=True, physicsOnly=physicsOnly) + +def get_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('-i', + '--job-id', + default=None, + help='Job ID', + required=True) + parser.add_argument('-y', + '--year', + default=None, + help='Year', + required=False) + parser.add_argument('-p', + '--physics', + action='store_true', + help='Physics or not', + required=False) + + args = parser.parse_args() + + return args + + def run(): from ._version import __version__ print(f'BGCVal2: {__version__}') - if "--help" in argv or len(argv) == 1: - print("Running with no arguments. Exiting.") - if "--help" in argv: - print("Read the documentation.") - sys.exit(0) - try: - jobID = argv[1] - except: - print("Please provide a job ID") - exit() - #if 'ReportOnly' in argv[:]:ReportOnly=True - #else: ReportOnly = False - if 'physics' in argv[:]: + args = get_args() + jobID = args.job_id + + if args.physics: + print("bgcval: Running with Physics option! Number files 6") physicsOnly = True numberfiles = 4 else: + print("bgcval: Running without Physics option! Number files 4") physicsOnly = False numberfiles = 6 year = False - for ar in argv: + if args.year: try: - ar = int(ar) - except: - continue - year = str(ar) + year = str(int(args.year)) + except ValueError: + print("analysis_timeseries: Invalid input for year - must be an integer, got {args.year}") for divby in [100, 50, 25, 10, 5, 1]: print("main", divby, year) if year: continue diff --git a/bgcval2/bgcval2_make_report.py b/bgcval2/bgcval2_make_report.py index 9c94ecd8..5ffd076e 100755 --- a/bgcval2/bgcval2_make_report.py +++ b/bgcval2/bgcval2_make_report.py @@ -30,10 +30,10 @@ ##### # Load Standard Python modules: +import argparse import sys from glob import glob -from sys import argv import os import shutil @@ -1725,21 +1725,65 @@ def newImageLocation(fn): print("-------------\nSuccess\ntest with:\nfirefox", indexhtmlfn) +def get_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('-c', + '--config-file', + default=os.path.join(os.getcwd(), + 'config-user.yml'), + help='User configuration file', + required=False) + parser.add_argument('-i', + '--job-id', + default=None, + help='Job ID', + required=True) + parser.add_argument('-y', + '--year', + default=None, + help='Year', + required=False) + parser.add_argument('-a', + '--clean', + action='store_true', + help='Clean or not', + required=False) + parser.add_argument('-p', + '--physics', + action='store_true', + help='Physics or not', + required=False) + parser.add_argument('-r', + '--report', + default=None, + help='Report repo', + required=False) + + args = parser.parse_args() + + return args + + def main(): """Run the html maker for a single job ID.""" from ._version import __version__ print(f'BGCVal2: {__version__}') - if "--help" in argv or len(argv) == 1: - print("Running with no arguments. Exiting.") - if "--help" in argv: - print("Read the documentation.") - sys.exit(0) - try: - jobID = argv[1] - except: - print("Please provide a jobID next time") - exit() + args = get_args() + jobID = args.job_id + + if args.config_file: + config_user = os.path.join(os.getcwd(), args.config_file) + print(f"analysis_timeseries: Using user config file {config_user}") + else: + config_user = os.path.join(os.getcwd(), "bgcval2-config-user.yml") + print(f"analysis_timeseries: Using user default file {config_user}") + if not os.path.isfile(config_user): + print(f"analysis_timeseries: Could not find configuration file {config_user}") + config_user = None #defaults: clean = False @@ -1747,31 +1791,21 @@ def main(): year = '*' reportdir = folder('reports/' + jobID) - for i, arg in enumerate(argv): - if i <= 1: continue - + if args.year: try: - y = int(arg) - year = y - continue - except: - pass - - if arg == 'clean': - clean = True - continue - - if arg == 'physics': - physicsOnly = True - continue - - reportdir = arg - - # get runtime configuration - config_user = None - if "bgcval2-config-user.yml" in argv[1:]: - config_user = "bgcval2-config-user.yml" - print(f"bgcval2_make_report: Using user config file {config_user}") + year = int(args.year) + except ValueError: + print("analysis_timeseries: Invalid input for year - must be an integer, got {args.year}") + if args.clean: + clean = True + print("analysis_timeseries: Running with Clean option!") + if args.physics: + physicsOnly = True + print("analysis_timeseries: Running with Physics option!") + if args.report: + reportdir = os.path.abspath(args.report) + + # get runtime configuration; not implemented yet if config_user: paths_dict, config_user = get_run_configuration(config_user) else: diff --git a/bgcval2/download_from_mass.py b/bgcval2/download_from_mass.py index 03f87544..0d6cf425 100755 --- a/bgcval2/download_from_mass.py +++ b/bgcval2/download_from_mass.py @@ -30,7 +30,9 @@ """ ##### # Load Standard Python modules: -from sys import argv, stdout +import argparse + +from sys import stdout import subprocess from socket import gethostname import os @@ -50,7 +52,7 @@ ./download_from_mass.py jobID This tool will only work on machines that have mass enabled. - + """ @@ -73,7 +75,7 @@ def folder(name): def mnStr(month): - """ + """ :param month: An int between 1 and 100. Returns a 2 digit number string with a leading zero, if needed. """ @@ -82,8 +84,8 @@ def mnStr(month): def getYearFromFile(fn): - """ - Takes a file anem, and looks for 8 consequetive numbers, then removes those that are months, and returns the year. + """ + Takes a file name, and looks for 8 consecutive numbers, then removes those that are months, and returns the year. """ a = findall(r'\d\d\d\d\d\d\d\d', fn) a.reverse() # prefer second year. @@ -100,14 +102,14 @@ def getYearFromFile(fn): def rebaseSymlinks(fn, dryrun=True, debug=False): - """ + """ :param fn: A full path to a filename. It should be a symbolic link. :param dryrun: A boolean switch to do a trial run of this function. - This function reduces a chain of symbolic links down to one. It takes a full path, + This function reduces a chain of symbolic links down to one. It takes a full path, checks whether it is a sym link, then checks whether the real path is the target of the link. If not, it replaces the target with the real path. - + """ ##### @@ -116,7 +118,8 @@ def rebaseSymlinks(fn, dryrun=True, debug=False): # print "rebaseSymlinks:\tfile does not exist.",fn # return if not os.path.islink(fn): - if debug: print("download_from_mass:\trebaseSymlinks:\tfile is not a symlink.", fn) + if debug: + print("download_from_mass:\trebaseSymlinks:\tfile is not a symlink.", fn) return ##### @@ -126,7 +129,8 @@ def rebaseSymlinks(fn, dryrun=True, debug=False): if realpath == linkpath: return - print("download_from_mass:\trebaseSymlinks:\tdeleting and re-linking ", fn, '-->', realpath) + if debug: + print("download_from_mass:\trebaseSymlinks:\tdeleting and re-linking ", fn, '-->', realpath) if dryrun: return os.remove(fn) os.symlink(realpath, fn) @@ -140,7 +144,7 @@ def findLastFinishedYear(jobID, dividby=1, numberfiles=6): This tool find the best year to have a close look at the model, by searching through the files and guessing which years are finished. - + """ if jobID == '': return @@ -210,12 +214,12 @@ def downloadField(jobID, :param dryrun: does not download files, just prints. :param extension: Nemo style file extension :param name: Name of the analysis group, used for the folder. - + This tool takes the jobID, the field name, and using the known structure of universally similar MASS and the local filesystem structure from paths.py, downloads the monthly jobID data for the field requested to the local file structure. - - This tool will only work on machines that have mass enabled. - + + This tool will only work on machines that have connection to MASS enabled. + """ if jobID == '': return @@ -376,12 +380,12 @@ def medusaMonthlyexport(jobID, dryrun=False): def download_from_mass(jobID, doMoo=True): """ :param jobID: The job ID - + This tool takes the jobID, and using the known structure of universally similar MASS and the local filesystem structure from paths.py, downloads the jobID data to the local file structure. - + This tool will only work on machines that have mass enabled. - + """ if jobID == '': return @@ -431,7 +435,7 @@ def download_from_mass(jobID, doMoo=True): header_lines.append('# moo passwd -r # if mass password is expired\n') download_script_txt = ''.join(header_lines) - # moo ls: + # moo ls: bashCommand = "moo ls moose:/crum/" + jobID + "/ony.nc.file/*.nc" download_script_txt = ''.join([download_script_txt, bashCommand, '\n']) @@ -482,16 +486,17 @@ def download_from_mass(jobID, doMoo=True): outfile.write(download_script_txt) outfile.close() - fixFilePaths(outputFold, jobID) - deleteBadLinksAndZeroSize(outputFold, jobID) + fixFilePaths(outputFold, jobID, debug=False,) + deleteBadLinksAndZeroSize(outputFold, jobID, debug=False,) def fixFilePaths(outputFold, jobID, debug=False): ##### # The coupled model looses the first two characters of the name in the netcdf file. fns = glob(outputFold + "/*" + jobID[2:] + "*.nc") - print("download_from_mass:\tfixFilePaths:\tLooking for", - outputFold + "/" + jobID[2:] + "*.nc") + if debug: + print("download_from_mass:\tfixFilePaths:\tLooking for", + outputFold + "/" + jobID[2:] + "*.nc") fns.extend( glob(outputFold + '/MetOffice*')) # Because ocean assess might use the lisence? @@ -506,22 +511,23 @@ def fixFilePaths(outputFold, jobID, debug=False): correctfn) continue if correctfn == fn: continue - print("download_from_mass:\tfixFilePaths:\tFixing file prefix", fn, - '-->', correctfn) + if debug: + print("download_from_mass:\tfixFilePaths:\tFixing file prefix", fn, + '-->', correctfn) try: os.symlink(fn, correctfn) except: - print("Unable to make link:", correctfn) + if debug: + print("Unable to make link:", correctfn) continue -# print "download_from_mass:\tfixFilePaths:\t", correctfn -##### -# Some runs have nemo/medusa as a preface to the file name. + ##### + # Some runs have nemo/medusa as a preface to the file name. for pref in ['nemo_', 'medusa_']: - #nemo_u-ai886o_1y_26291201-26301201_grid-V.nc fns = glob(outputFold + "/" + pref + jobID + "*.nc") - print("download_from_mass:\tfixFilePaths:\tLooking for new prefix:", - pref, outputFold + "/" + pref + jobID + "*.nc") + if debug: + print("download_from_mass:\tfixFilePaths:\tLooking for new prefix:", + pref, outputFold + "/" + pref + jobID + "*.nc") for fn in sorted(fns): ##### correctfn = os.path.dirname(fn) + '/' + os.path.basename( @@ -532,14 +538,16 @@ def fixFilePaths(outputFold, jobID, debug=False): "download_from_mass:\tfixFilePaths:\tcorrect path exists.", correctfn) continue - print("download_from_mass:\tfixFilePaths:\tFixing file prefix", - pref, - end=' ') + if debug: + print("download_from_mass:\tfixFilePaths:\tFixing file prefix", + pref, + end=' ') os.symlink(fn, correctfn) - print("download_from_mass:\tfixFilePaths:\t", correctfn) + if debug: + print("download_from_mass:\tfixFilePaths:\t", correctfn) -##### -# Some runs have nemo/medusa as a preface to the file name. + ##### + # Some runs have nemo/medusa as a preface to the file name. suffDict = { 'grid-T': 'grid_T', 'grid-U': 'grid_U', @@ -552,8 +560,9 @@ def fixFilePaths(outputFold, jobID, debug=False): for badsuff, suff in list(suffDict.items()): #nemo_u-ai886o_1y_26291201-26301201_grid-V.nc fns = glob(outputFold + "/" + jobID + "*" + badsuff + ".nc") - print("download_from_mass:\tfixFilePaths:\tLooking for new suff:", - badsuff, outputFold + "/" + jobID + "*" + badsuff + ".nc") + if debug: + print("download_from_mass:\tfixFilePaths:\tLooking for new suff:", + badsuff, outputFold + "/" + jobID + "*" + badsuff + ".nc") for fn in sorted(fns): ##### correctfn = os.path.dirname(fn) + '/' + os.path.basename( @@ -564,70 +573,62 @@ def fixFilePaths(outputFold, jobID, debug=False): "download_from_mass:\tfixFilePaths:\tcorrect path exists.", correctfn) continue - print("download_from_mass:\tfixFilePaths:\tFixing file suffix", - badsuff, - '->', - suff, - end=' ') + if debug: + print("download_from_mass:\tfixFilePaths:\tFixing file suffix", + badsuff, + '->', + suff, + end=' ') if correctfn == fn: continue try: os.symlink(fn, correctfn) except: continue - print("download_from_mass:\tfixFilePaths:\t", correctfn) + if debug: + print("download_from_mass:\tfixFilePaths:\t", correctfn) ##### # This code looks at symoblic links and points them at their ultimate source, removing the long link chains. for fn in glob(outputFold + '/*'): - rebaseSymlinks(fn, dryrun=False) + rebaseSymlinks(fn, dryrun=False, debug=False) -def deleteBadLinksAndZeroSize(outputFold, jobID): +def deleteBadLinksAndZeroSize(outputFold, jobID, debug=True): bashCommand1 = "find " + outputFold + "/. -size 0 -print -delete" bashCommand2 = "find -L " + outputFold + "/. -type l -delete -print" - print("deleteBadLinksAndZeroSize:\t", bashCommand1) + if debug: print("deleteBadLinksAndZeroSize:\t", bashCommand1) process1 = subprocess.Popen(bashCommand1.split(), stdout=subprocess.PIPE) output1 = process1.communicate()[0] - print("deleteBadLinksAndZeroSize:\t", bashCommand2) + if debug: print("deleteBadLinksAndZeroSize:\t", bashCommand2) process2 = subprocess.Popen(bashCommand2.split(), stdout=subprocess.PIPE) output2 = process2.communicate()[0] + def pop_keys(keys, remove_keys): for k in remove_keys: - keys.remove(k) - return keys + if k in keys: + keys.remove(k) + return keys -def main(): - try: - jobID = argv[1] - except: - print("Please provide a jobID") - sys.exit(0) - try: - keys = argv[2:] - except: - keys = [] +def perform_download(jobID, keys, doMoo): + """ + Single model download. + """ ##### # Default behaviour is to download annual files - if 'noMoo' in keys or 'dryrun' in keys or '--dry-run' in keys: - doMoo=False - dryrun = True - keys = pop_keys(keys, ['noMoo', 'dryrun', '--dry-run']) - else: - doMoo=True - dryrun=False - - if not keys: + if not keys or 'annual' in keys: download_from_mass(jobID, doMoo=doMoo) + keys = pop_keys(keys, ['annual', ]) + dryrun = not doMoo ##### # Monthly Ice files if 'ice' in keys or 'soicecov' in keys: @@ -669,5 +670,56 @@ def main(): if keys: downloadField(jobID, keys, timeslice='m', dryrun=dryrun) + +def get_args(): + """Parse command line arguments.""" + + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('-j', + '--jobID', + nargs='+', type=str, + help='One or more JobIDs to download', + required=True) + parser.add_argument('-k', + '--keys', + default=['annual', ], + nargs='+', type=str, + help='Download keys - default options are: annual (which downloads all the annual files), ' + 'or chl, mld, ice, export, which downkoads monthly files for these fields. ' + 'Note that monthly files download is unstable and slow.', + required=False) + parser.add_argument('-d', + '--dry-run', + action='store_true', + help='Dry run: Do not download any files.', + ) + + args = parser.parse_args() + + return args + + +def main(): + """Run the main routine.""" + args = get_args() + + jobIDs = args.jobID + keys = args.keys + dryrun = args.dry_run + doMoo = not dryrun + + if keys in [None, '', [],]: + keys = [] + if keys: + keys = [str(k) for k in keys] + + print(f"Running with job_ids: {jobIDs} and keys {keys}") + + for jobID in jobIDs: + perform_download(jobID, keys, doMoo) + + if __name__ == "__main__": main() diff --git a/input_yml/comparison_analysis_template.yml b/input_yml/comparison_analysis_template.yml index 08fce127..16b9c110 100644 --- a/input_yml/comparison_analysis_template.yml +++ b/input_yml/comparison_analysis_template.yml @@ -1,29 +1,29 @@ --- -# GC5 N96 ORCA1 spinup analysis name: Template job # Run the single Job Analysis (analysis_timeseries) do_analysis_timeseries: False # if True, it calls `analysis_timeseries jobID suite` using details provided here. -do_mass_download: True +do_mass_download: False # if True, it calls bgcval2/download_from_mass.py and attempts to download the jobs data. # Job ID's suites as named by Rose/Cylc jobs: - u-aa001: + u-aa001: # Not a real jobID! description: 'Job number 1' colour: red thickness: 1. linestyle: '-' shifttime: 0. - suite: physics - u-aa002: + suite: kmf level1 + + u-aa002: # Not a real Job ID description: 'Job number 2' colour: blue thickness: 1.0 linestyle: ':' shifttime: 0. - suite: physics + suite: kmf level1 diff --git a/tests/integration/test_command_line.py b/tests/integration/test_command_line.py new file mode 100644 index 00000000..a5031493 --- /dev/null +++ b/tests/integration/test_command_line.py @@ -0,0 +1,154 @@ +"""Tests for BGCVal2 CLI. + +Includes a context manager to temporarily modify sys.argv +""" +import contextlib +import copy +import functools +import sys +from unittest.mock import patch +from io import StringIO + +import pytest + +from bgcval2 import ( + analysis_timeseries, + bgcval, + download_from_mass, + bgcval2_make_report, + analysis_compare +) +from bgcval2.analysis_timeseries import main as analysis_timeseries_main +from bgcval2.download_from_mass import main as download_from_mass_main +from bgcval2.bgcval import run as bgcval_main +from bgcval2.bgcval2_make_report import main as bgcval2_make_report_main +from bgcval2.analysis_compare import main as analysis_compare_main + + +def wrapper(f): + @functools.wraps(f) + def empty(*args, **kwargs): # noqa + if kwargs: + raise ValueError(f'Parameters not supported: {kwargs}') + return True + + return empty + + +@contextlib.contextmanager +def arguments(*args): + backup = sys.argv + sys.argv = list(args) + yield + sys.argv = backup + + +def test_setargs(): + original = copy.deepcopy(sys.argv) + with arguments('testing', 'working', 'with', 'sys.argv'): + assert sys.argv == ['testing', 'working', 'with', 'sys.argv'] + assert sys.argv == original + + +@contextlib.contextmanager +def capture_sys_output(): + capture_out, capture_err = StringIO(), StringIO() + current_out, current_err = sys.stdout, sys.stderr + try: + sys.stdout, sys.stderr = capture_out, capture_err + yield capture_out, capture_err + finally: + sys.stdout, sys.stderr = current_out, current_err + + +@patch('bgcval2.analysis_timeseries.main', new=wrapper(analysis_timeseries)) +def test_run_analysis_timeseries_command(): + """Test run command.""" + with arguments('analysis_timeseries', '--help'): + with pytest.raises(SystemExit) as pytest_wrapped_e: + analysis_timeseries_main() + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code == 0 + err = "analysis_timeseries: error: the following arguments " \ + "are required: -j/--jobID" + with arguments('analysis_timeseries'): + with pytest.raises(SystemExit) as cm, capture_sys_output() \ + as (stdout, stderr): + analysis_timeseries_main() + assert err in str(stderr.getvalue()) + err = "--jobID: expected at least one argument" + with arguments('analysis_timeseries', '--jobID', '--keys'): + with pytest.raises(SystemExit) as cm, capture_sys_output() \ + as (stdout, stderr): + analysis_timeseries_main() + assert err in str(stderr.getvalue()) + + +@patch('bgcval2.bgcval.run', new=wrapper(bgcval)) +def test_run_bgcval_command(): + """Test run command.""" + with arguments('bgcval', '--help'): + with pytest.raises(SystemExit) as pytest_wrapped_e: + bgcval_main() + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code == 0 + err = "the following arguments are required: -i/--job-id\n" + with arguments('bgcval'): + with pytest.raises(SystemExit) as cm, capture_sys_output() \ + as (stdout, stderr): + bgcval_main() + assert err in str(stderr.getvalue()) + + +@patch('bgcval2.download_from_mass.main', new=wrapper(download_from_mass)) +def test_download_from_mass_command(): + """Test run command.""" + with arguments('download_from_mass', '--help'): + with pytest.raises(SystemExit) as pytest_wrapped_e: + download_from_mass_main() + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code == 0 + err = "the following arguments are required: -j/--jobID" + with arguments('download_from_mass'): + with pytest.raises(SystemExit) as cm, capture_sys_output() \ + as (stdout, stderr): + download_from_mass_main() + assert err in str(stderr.getvalue()) + + +@patch('bgcval2.analysis_compare.main', new=wrapper(analysis_compare)) +def test_analysis_compare_command(): + """Test run command.""" + with arguments('analysis_compare', '--help'): + with pytest.raises(SystemExit) as pytest_wrapped_e: + analysis_compare_main() + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code == 0 + err = "the following arguments are required: -y/--compare_yml" + with arguments('analysis_compare'): + with pytest.raises(SystemExit) as cm, capture_sys_output() \ + as (stdout, stderr): + analysis_compare_main() + assert err in str(stderr.getvalue()) + + +@patch('bgcval2.bgcval2_make_report.main', new=wrapper(bgcval2_make_report)) +def test_bgcval2_make_report_command(): + """Test run command.""" + with arguments('analysis_compare', '--help'): + with pytest.raises(SystemExit) as pytest_wrapped_e: + bgcval2_make_report_main() + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code == 0 + err = "the following arguments are required: -i/--job-id" + with arguments('bgcval2_make_report'): + with pytest.raises(SystemExit) as cm, capture_sys_output() \ + as (stdout, stderr): + bgcval2_make_report_main() + assert err in str(stderr.getvalue()) + err = "argument -r/--report: expected one argument" + with arguments('bgcval2_make_report', '--job-id DUM', '--report'): + with pytest.raises(SystemExit) as cm, capture_sys_output() \ + as (stdout, stderr): + bgcval2_make_report_main() + assert err in str(stderr.getvalue()) diff --git a/tests/integration/test_run.py b/tests/integration/test_run.py deleted file mode 100644 index 70ffca7e..00000000 --- a/tests/integration/test_run.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Tests for BGCVal2 CLI. - -Includes a context manager to temporarily modify sys.argv -""" -import contextlib -import copy -import functools -import sys -from unittest.mock import patch - -import pytest - -from bgcval2 import analysis_timeseries, bgcval -from bgcval2.analysis_timeseries import main - - -def wrapper(f): - @functools.wraps(f) - def empty(*args, **kwargs): # noqa - if kwargs: - raise ValueError(f'Parameters not supported: {kwargs}') - return True - - return empty - - -@contextlib.contextmanager -def arguments(*args): - backup = sys.argv - sys.argv = list(args) - yield - sys.argv = backup - - -def test_setargs(): - original = copy.deepcopy(sys.argv) - with arguments('testing', 'working', 'with', 'sys.argv'): - assert sys.argv == ['testing', 'working', 'with', 'sys.argv'] - assert sys.argv == original - - -@patch('bgcval2.analysis_timeseries.main', new=wrapper(analysis_timeseries)) -def test_run_analysis_timeseries(): - """Test run command.""" - with arguments('analysis_timeseries', '--help'): - with pytest.raises(SystemExit) as pytest_wrapped_e: - main() - assert pytest_wrapped_e.type == SystemExit - assert pytest_wrapped_e.value.code == 0 - - -@patch('bgcval2.bgcval.run', new=wrapper(bgcval)) -def test_run_bgcval(): - """Test run command.""" - with arguments('bgcval', '--help'): - with pytest.raises(SystemExit) as pytest_wrapped_e: - main() - assert pytest_wrapped_e.type == SystemExit - assert pytest_wrapped_e.value.code == 0