From 6a7757482c30e6d1f61b57efb3a62d63ea6a288e Mon Sep 17 00:00:00 2001
From: Lee de Mora <ledm@pml.ac.uk>
Date: Tue, 9 Aug 2022 15:51:26 +0100
Subject: [PATCH 1/3] Improved interface for single job analyses

---
 bgcval2/analysis_compare.py    |   8 +-
 bgcval2/analysis_p2p.py        | 134 ++++++++++++++++++++++-----------
 bgcval2/bgcval2_make_report.py | 103 +++++++++++++------------
 setup.py                       |   4 +-
 4 files changed, 152 insertions(+), 97 deletions(-)

diff --git a/bgcval2/analysis_compare.py b/bgcval2/analysis_compare.py
index c0f6f165..7cff92fd 100755
--- a/bgcval2/analysis_compare.py
+++ b/bgcval2/analysis_compare.py
@@ -4245,7 +4245,7 @@ def load_yml_and_run(compare_yml, config_user):
         print(jobID, 'suite:', suites[jobID])
 
     for jobID in jobs:
-        # even if you don't want to download, we run this 
+        # even if you don't want to download, we run this
         # as it clears up the path and ensures recently downloed data is
         # correctly symlinked.
         download_from_mass(jobID, doMoo=do_mass_download)
@@ -4338,15 +4338,15 @@ def main():
     compare_ymls = args.compare_yml
 
     for compare_yml in compare_ymls:
-        print(f"analysis_timeseries: Comparison config file {compare_yml}")
+        print(f"analysis_compare: Comparison config file {compare_yml}")
 
         if not os.path.isfile(compare_yml):
-            print(f"analysis_timeseries: Could not find comparison config file {compare_yml}")
+            print(f"analysis_compare: Could not find comparison config file {compare_yml}")
             sys.exit(1)
 
         load_yml_and_run(compare_yml, config_user)
 
-    print("Finished... ")
+    print("Finished analysis_compare... ")
 
 
 if __name__ == "__main__":
diff --git a/bgcval2/analysis_p2p.py b/bgcval2/analysis_p2p.py
index 04ea1c8e..5506ec6a 100755
--- a/bgcval2/analysis_p2p.py
+++ b/bgcval2/analysis_p2p.py
@@ -41,6 +41,7 @@
 from netCDF4 import Dataset
 import numpy as np
 import sys
+import argparse
 
 #####
 #Specific local code:
@@ -85,7 +86,7 @@
 
 p2pKeys_annual = [
     'T',
-    'S',  #'MLD', 
+    'S',  #'MLD',
     'Chl_CCI',
     'N',
     'Si',
@@ -113,7 +114,7 @@
 
 p2pKeys_physics = [
     'T',
-    'S',  #'MLD', 
+    'S',  #'MLD',
     'ZonalCurrent',
     'MeridionalCurrent',
     'VerticalCurrent'
@@ -139,7 +140,7 @@ def analysis_p2p(
     noTargets=True,
 ):
     """
-	
+
 	"""
     #####
     # Switches:
@@ -1499,44 +1500,93 @@ def single_p2p(jobID, key, year):
         print("Error: %s" % sys.exc_info()[0])
 
 
-def run():
+def get_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    parser.add_argument('-j',
+                        '--jobID',
+                        nargs='+',
+                        type=str,
+                        default=None,
+                        help='One or more UKESM Job IDs (automatically generated by cycl/rose suite).',
+                        required=True,
+                        )
+    parser.add_argument('-k',
+                        '--keys',
+                        default=['level2', ],
+                        nargs='+',
+                        type=str,
+                        help=''.join(['Runtime keys - each key links to a pre-determined list of variables to analyse. ',
+                                      'Keys are: ', ', '.join( accepted_keys)]),
+                        required=False,
+                        )
+    parser.add_argument('-y',
+                        '--years',
+                        default=['best',],
+                        nargs='+',
+                        type=str,
+                        help=''.join(['Years to analysis. Default is "best". '])
+                        required=False,
+                        )
+
+    parser.add_argument('-c',
+                        '--config-file',
+                        default=os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                             'default-bgcval2-config.yml'),
+                        help='User configuration file (for paths).',
+                        required=False)
+
+    args = parser.parse_args()
+
+    return args
+
+
+
+def main():
     from ._version import __version__
     print(f'BGCVal2: {__version__}')
-    if "--help" in argv or len(argv) == 1:
-        print("Running with no arguments. Exiting.")
-        if "--help" in argv:
-            print("Read the documentation.")
-        sys.exit(0)
-    try:
-        jobID = argv[1]
-    except:
-        jobID = 'u-ab749'
-
-    try:
-        year = argv[2]
-    except:
-        year = '2007'
-
-    if 'debug' in argv[1:]:
-        analysisSuite = 'debug'
-    elif 'level2' in argv[1:]:
-        analysisSuite = 'level2'
-    elif 'physics' in argv[1:]:
-        analysisSuite = 'physics'
-    else:
-        analysisSuite = 'annual'
-
-    analysis_p2p(
-        models=[
-            'NEMO',
-            'MEDUSA',
-        ],
-        jobID=jobID,
-        years=[
-            year,
-        ],  #'2075','2076',
-        modelGrid='eORCA1',
-        annual=True,
-        noPlots=False,
-        analysisSuite=analysisSuite,
-    )
+    args = get_args()
+
+    jobIDs = args.jobID
+    keys = args.keys
+    years = args.years
+
+    accepted_keys = ['debug', 'physics', 'leve2', 'annual']
+    for jobID, year, suite in itertools.product(jobIDs, years, keys):
+        if year == 'best':
+            best_year = False
+            for divby in [100, 50, 25, 10, 5, 1]:
+                print("analysis_p2p:\t find best year", divby, year,best_year )
+                if best_year:
+                    continue
+                best_year = findLastFinishedYear(jobID,
+                                            dividby=divby,) #  numberfiles=numberfiles)
+            if best_year == False:
+                continue
+            else:
+                year = best_year
+
+        print('analysis_p2p:', jobID, year, suite)
+        single_p2p(jobID, suite, year)
+        # analysis_p2p(
+        #     models=[
+        #         'NEMO',
+        #         'MEDUSA',
+        #     ],
+        #     jobID=jobID,
+        #     years=[
+        #         year,
+        #     ],
+        #     modelGrid='eORCA1',
+        #     annual=True,
+        #     noPlots=False,
+        #     analysisSuite=suite,
+        # )
+    print("Finished p2p... ")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bgcval2/bgcval2_make_report.py b/bgcval2/bgcval2_make_report.py
index 5ffd076e..fba407fe 100755
--- a/bgcval2/bgcval2_make_report.py
+++ b/bgcval2/bgcval2_make_report.py
@@ -74,13 +74,13 @@ def addImageToHtml(fn, imagesfold, reportdir, debug=True):
     relfn = newfn.replace(reportdir, './')
 
     if debug:
-        print("addImageToHtml:\tfn:", fn, 
-            "\n\timagesfold:", imagesfold, 
+        print("addImageToHtml:\tfn:", fn,
+            "\n\timagesfold:", imagesfold,
             "\n\treportdir:", reportdir,
             "\n\tnewfn:", newfn,
             "\n\trelfn:", relfn)
 
-    
+
     if not os.path.exists(newfn):
         if debug: print("addImageToHtml:\tcopytree", fn, newfn)
         basedir = folder(os.path.dirname(newfn))
@@ -102,7 +102,7 @@ def addImageToHtml(fn, imagesfold, reportdir, debug=True):
             if debug: print("addImageToHtml:\tremoving old file", fn)
             os.remove(newfn)
             shutil.copy2(fn, newfn)
-            if debug: 
+            if debug:
                 print("addImageToHtml:\t copy2", fn, newfn)
     return relfn
 
@@ -1056,7 +1056,6 @@ def newImageLocation(fn):
             FileOrder=FileOrder)
 
     if level3OMZ:
-
         l3omzFields = [
             'ExtentMaps'
             #			  'O2',
@@ -1375,7 +1374,7 @@ def comparehtml5Maker(
     ####
     # Copy all necceasiry objects and templates to the report location:
     print("Copying html and js assets to", reportdir)
-    #html5Assets_dir = 
+    #html5Assets_dir =
 
     copytree(os.path.join(paths.bgcval2_repo,'bgcval2/html5/html5Assets'), reportdir)
     indexhtmlfn = reportdir + "index.html"
@@ -1732,34 +1731,46 @@ def get_args():
         formatter_class=argparse.RawDescriptionHelpFormatter)
     parser.add_argument('-c',
                         '--config-file',
-                        default=os.path.join(os.getcwd(),
-                                             'config-user.yml'),
-                        help='User configuration file',
+                        default=os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                             'default-bgcval2-config.yml'),
+                        help='User configuration file (for paths).',
                         required=False)
-    parser.add_argument('-i',
-                        '--job-id',
+    parser.add_argument('-j',
+                        '--jobID',
+                        nargs='+',
+                        type=str,
                         default=None,
-                        help='Job ID',
-                        required=True)
+                        help='One or more UKESM Job IDs (automatically generated by cycl/rose suite).',
+                        required=True,
+                        )
     parser.add_argument('-y',
                         '--year',
-                        default=None,
+                        default='*',
                         help='Year',
                         required=False)
+    parser.add_argument('-k',
+                        '--keys',
+                        default=['kmf', 'level1',],
+                        nargs='+',
+                        type=str,
+                        help=''.join(['Runtime keys - each key links to a pre-determined list of variables to analyse. ',]),
+                                      #'Keys are: ', ', '.join( accepted_keys)]),
+                        required=False,
+                        )
     parser.add_argument('-a',
                         '--clean',
                         action='store_true',
-                        help='Clean or not',
+                        help='Delete previous report and replace it.',
                         required=False)
     parser.add_argument('-p',
                         '--physics',
                         action='store_true',
-                        help='Physics or not',
+                        help='Physics only switch.',
                         required=False)
     parser.add_argument('-r',
                         '--report',
-                        default=None,
-                        help='Report repo',
+                        default='reports/',
+                        help='Output directory to host the report.',
                         required=False)
 
     args = parser.parse_args()
@@ -1773,37 +1784,28 @@ def main():
     print(f'BGCVal2: {__version__}')
 
     args = get_args()
-    jobID = args.job_id 
 
-    if args.config_file:
-        config_user = os.path.join(os.getcwd(), args.config_file)
-        print(f"analysis_timeseries: Using user config file {config_user}")
-    else:
-        config_user = os.path.join(os.getcwd(), "bgcval2-config-user.yml")
-        print(f"analysis_timeseries: Using user default file {config_user}")
+    jobID = args.job_id
+    config_user = args.config_file
+    clean = args.clean
+    physics = args.physics
+    year = args.year
+
     if not os.path.isfile(config_user):
         print(f"analysis_timeseries: Could not find configuration file {config_user}")
         config_user = None
 
-    #defaults:
-    clean = False
-    physicsOnly = False
-    year = '*'
-    reportdir = folder('reports/' + jobID)
-
     if args.year:
         try:
             year = int(args.year)
         except ValueError:
             print("analysis_timeseries: Invalid input for year - must be an integer, got {args.year}")
-    if args.clean:
-        clean = True
-        print("analysis_timeseries: Running with Clean option!")
-    if args.physics:
-        physicsOnly = True
-        print("analysis_timeseries: Running with Physics option!")
-    if args.report:
-        reportdir = os.path.abspath(args.report)
+
+    if clean:
+        print("analysis_timeseries: Running with Clean option.")
+
+    if physics:
+        print("analysis_timeseries: Running with Physics only option.")
 
     # get runtime configuration; not implemented yet
     if config_user:
@@ -1814,16 +1816,19 @@ def main():
     # filter paths dict into an object that's usable below
     paths = paths_setter(paths_dict)
 
-    html5Maker(
-        jobID=jobID,
-        reportdir=reportdir,
-        year=year,
-        clean=clean,
-        physicsOnly=physicsOnly,
-        paths=paths,
-        config_user=config_user
-    )
-
+    for jobID in jobIDs:
+        reportdir =  os.path.abspath(folder([args.report, jobID]))
+
+        html5Maker(
+            jobID=jobID,
+            reportdir=reportdir,
+            year=year,
+            clean=clean,
+            physicsOnly=physics,
+            paths=paths,
+            config_user=config_user
+        )
+        
 
 if __name__ == "__main__":
     main()
diff --git a/setup.py b/setup.py
index 482668b3..3e644a13 100755
--- a/setup.py
+++ b/setup.py
@@ -196,10 +196,10 @@ def read_authors(filename):
             'analysis_level3_dms = bgcval2.analysis_level3_dms:main',
             'analysis_level3_omz = bgcval2.analysis_level3_omz:main',
             'analysis_level3_sameYear = bgcval2.analysis_level3_sameYear:main',
-            'analysis_p2p = bgcval2.analysis_p2p:run',
+            'analysis_p2p = bgcval2.analysis_p2p:main',
             'analysis_timeseries = bgcval2.analysis_timeseries:main',
             'bgcval = bgcval2.bgcval:run',
-            'download_from_mass = bgcval2.download_from_mass:main',   
+            'download_from_mass = bgcval2.download_from_mass:main',
             'bgcval2_make_report = bgcval2.bgcval2_make_report:main',
         ],
     },

From 8d4768d6308989f263496ed57bccbde191aa618d Mon Sep 17 00:00:00 2001
From: Lee de Mora <ledm@pml.ac.uk>
Date: Tue, 9 Aug 2022 15:53:57 +0100
Subject: [PATCH 2/3] Debugging help messages

---
 bgcval2/analysis_p2p.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bgcval2/analysis_p2p.py b/bgcval2/analysis_p2p.py
index 5506ec6a..4a859209 100755
--- a/bgcval2/analysis_p2p.py
+++ b/bgcval2/analysis_p2p.py
@@ -32,7 +32,6 @@
 
 #####
 #Standard Python modules:
-from sys import argv, exit
 from os.path import exists
 from calendar import month_name
 from socket import gethostname
@@ -41,6 +40,7 @@
 from netCDF4 import Dataset
 import numpy as np
 import sys
+import os
 import argparse
 
 #####
@@ -1505,6 +1505,7 @@ def get_args():
     parser = argparse.ArgumentParser(
         description=__doc__,
         formatter_class=argparse.RawDescriptionHelpFormatter)
+    accepted_keys = ['debug', 'physics', 'leve2', 'annual']
 
     parser.add_argument('-j',
                         '--jobID',
@@ -1528,7 +1529,7 @@ def get_args():
                         default=['best',],
                         nargs='+',
                         type=str,
-                        help=''.join(['Years to analysis. Default is "best". '])
+                        help=''.join(['Years to analysis. Default is "best". ']),
                         required=False,
                         )
 

From b4ce2ef819bc2cb166b60ac8f7997ea369d45b1a Mon Sep 17 00:00:00 2001
From: Lee de Mora <ledm@pml.ac.uk>
Date: Mon, 15 Aug 2022 11:09:45 +0100
Subject: [PATCH 3/3] working on single job analysis

---
 bgcval2/UKESMpython.py         |  2 +-
 bgcval2/analysis_p2p.py        | 40 +++++++++++++++++++---------------
 bgcval2/analysis_timeseries.py |  2 +-
 bgcval2/bgcval2_make_report.py |  2 +-
 bgcval2/download_from_mass.py  | 34 ++++++++++++++++++++---------
 5 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/bgcval2/UKESMpython.py b/bgcval2/UKESMpython.py
index f497c517..4405e6ba 100644
--- a/bgcval2/UKESMpython.py
+++ b/bgcval2/UKESMpython.py
@@ -72,7 +72,7 @@ def folder(name):
 	    It also accepts lists of strings.
 	"""
     if type(name) == type(['a', 'b', 'c']):
-        name = join(name, '/')
+        name = os.path.join(name)
     if name[-1] != '/':
         name = name + '/'
     if exists(name) is False:
diff --git a/bgcval2/analysis_p2p.py b/bgcval2/analysis_p2p.py
index 4a859209..6ee165f0 100755
--- a/bgcval2/analysis_p2p.py
+++ b/bgcval2/analysis_p2p.py
@@ -32,7 +32,6 @@
 
 #####
 #Standard Python modules:
-from os.path import exists
 from calendar import month_name
 from socket import gethostname
 from getpass import getuser
@@ -42,6 +41,7 @@
 import sys
 import os
 import argparse
+import itertools
 
 #####
 #Specific local code:
@@ -51,6 +51,7 @@
 from .p2p.patternAnalyses import InterAnnualPatterns, BGCvsPhysics
 from .bgcvaltools.pftnames import months
 from .p2p.shelveToDictionary import shelveToDictionary
+from .download_from_mass import findLastFinishedYear
 
 #####
 # User defined set of paths pointing towards the datasets.
@@ -1557,35 +1558,40 @@ def main():
 
     accepted_keys = ['debug', 'physics', 'leve2', 'annual']
     for jobID, year, suite in itertools.product(jobIDs, years, keys):
+        print('p2p:', jobID, year, suite)
         if year == 'best':
             best_year = False
             for divby in [100, 50, 25, 10, 5, 1]:
-                print("analysis_p2p:\t find best year", divby, year,best_year )
                 if best_year:
                     continue
+                print("analysis_p2p:\t find best year", divby, year,best_year )
                 best_year = findLastFinishedYear(jobID,
-                                            dividby=divby,) #  numberfiles=numberfiles)
+                                            dividby=divby, debug= False) #  numberfiles=numberfiles)
             if best_year == False:
                 continue
             else:
                 year = best_year
 
+        analysis_p2p(
+            models=[
+                'NEMO',
+                'MEDUSA',
+            ],
+            jobID=jobID,
+            years=[
+                year,
+            ],  #'2075','2076',
+            modelGrid='eORCA1',
+            annual=True,
+            noPlots=False,
+            analysisSuite=[suite,
+            ],
+        )
+
+ 
         print('analysis_p2p:', jobID, year, suite)
         single_p2p(jobID, suite, year)
-        # analysis_p2p(
-        #     models=[
-        #         'NEMO',
-        #         'MEDUSA',
-        #     ],
-        #     jobID=jobID,
-        #     years=[
-        #         year,
-        #     ],
-        #     modelGrid='eORCA1',
-        #     annual=True,
-        #     noPlots=False,
-        #     analysisSuite=suite,
-        # )
+
     print("Finished p2p... ")
 
 
diff --git a/bgcval2/analysis_timeseries.py b/bgcval2/analysis_timeseries.py
index 30ed9261..30a4d2cb 100755
--- a/bgcval2/analysis_timeseries.py
+++ b/bgcval2/analysis_timeseries.py
@@ -4991,7 +4991,7 @@ def main():
               "Will proceed with defaults.")
         config_user = None
 
-    for jobID, suite in itertools.product(keys, jobIDs):
+    for jobID, suite in itertools.product(jobIDs, keys):
         analysis_timeseries(
             jobID=jobID,
             analysisSuite=suite,
diff --git a/bgcval2/bgcval2_make_report.py b/bgcval2/bgcval2_make_report.py
index fba407fe..0c4c698e 100755
--- a/bgcval2/bgcval2_make_report.py
+++ b/bgcval2/bgcval2_make_report.py
@@ -1785,7 +1785,7 @@ def main():
 
     args = get_args()
 
-    jobID = args.job_id
+    jobIDs = args.jobID
     config_user = args.config_file
     clean = args.clean
     physics = args.physics
diff --git a/bgcval2/download_from_mass.py b/bgcval2/download_from_mass.py
index 0d6cf425..2484e284 100755
--- a/bgcval2/download_from_mass.py
+++ b/bgcval2/download_from_mass.py
@@ -31,13 +31,14 @@
 #####
 # Load Standard Python modules:
 import argparse
-
+import sys
 from sys import stdout
 import subprocess
 from socket import gethostname
 import os
 from glob import glob
 from re import findall
+import numpy as np
 
 #####
 # Load specific local code:
@@ -136,7 +137,7 @@ def rebaseSymlinks(fn, dryrun=True, debug=False):
     os.symlink(realpath, fn)
 
 
-def findLastFinishedYear(jobID, dividby=1, numberfiles=6):
+def findLastFinishedYear(jobID, dividby=1, numberfiles=None,debug=False,):
     """
 	:param jobID: The job ID, as elsewhere.
 	:param 	dividby: Outputs every "dividby" years.
@@ -163,16 +164,22 @@ def findLastFinishedYear(jobID, dividby=1, numberfiles=6):
 
     for fn in files:
         yr = getYearFromFile(fn)
-        print("download_from_mass:\tgetYearFromFile:", fn, yr)
+        if debug:
+            print("download_from_mass:\tgetYearFromFile:", fn, yr)
         try:
             fnDict[yr] += 1
         except:
             fnDict[yr] = 1
 
+    if not numberfiles:
+        numberfiles = np.max([v for v in fnDict.values()])
+        if debug:print('numberfiles:', numberfiles)
+
     years = sorted(fnDict.keys())
     years.reverse()
 
-    print("download_from_mass:\t",years, fnDict)
+    if debug:
+        print("download_from_mass:\t",years, fnDict)
 
     if len(years) == 0:
         print("download_from_mass:\tfindLastFinishedYear:\tNo files found.\t")
@@ -181,13 +188,14 @@ def findLastFinishedYear(jobID, dividby=1, numberfiles=6):
     if len(years) < dividby:
         print("download_from_mass:\tfindLastFinishedYear:\tLess than", dividby,
               "years of model run, returning first year:", years[-1])
-        return years[0]
+        return False
 
     for y in years:
-        if int(y) % dividby != 0: continue
+        if int(y) % dividby == 0: 
+            if debug: print('Found year:', y)
+            return y
 
-        print(y, ':', fnDict[y])
-        if fnDict[y] >= numberfiles: return y
+        #if fnDict[y] >= numberfiles: return y
 
     print(
         "No correct year, there's probably a problem here findLastFinishedYear(",
@@ -445,9 +453,15 @@ def download_from_mass(jobID, doMoo=True):
     else:
         print("download_from_mass:\trunning the command:", bashCommand)
         stdout.flush()
-        process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
-        output = process.communicate()[0]
+        try:
+            process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
+            output = process.communicate()[0]
+        except FileNotFoundError:
+            print('ERROR: FileNotFoundError: are you running this on a mass-connected machine like mass-cli1?')
+            print('ERROR: If not, try --dry-run')
+            sys.exit(1)
 
+    print('output', output)
     # moo get:
     if len(output.split('\n')) > 6000:
         failed = 0