From 7ebe450d252bf57b24e119c9606cb43932f80ce1 Mon Sep 17 00:00:00 2001
From: georgette <k_whitehall@yahoo.com>
Date: Sun, 15 Feb 2015 18:32:34 -0800
Subject: [PATCH] removed files.py & process.py after moving relevant functions
 into mccSearch.py

---
 code/files.py     |  696 ------------------------
 code/mccSearch.py |  113 +++-
 code/process.py   | 1284 ---------------------------------------------
 3 files changed, 100 insertions(+), 1993 deletions(-)
 delete mode 100644 code/files.py
 delete mode 100644 code/process.py

diff --git a/code/files.py b/code/files.py
deleted file mode 100644
index 82b508a..0000000
--- a/code/files.py
+++ /dev/null
@@ -1,696 +0,0 @@
-"""
-Module for handling data input files.  Requires PyNIO and Numpy be 
-installed.
-
-This module can easily open NetCDF, HDF and Grib files.  Search the PyNIO
-documentation for a complete list of supported formats.
-"""
-
-from os import path
-
-try:
-    import Nio
-except ImportError:
-    import nio as Nio
-
-import numpy as np
-import numpy.ma as ma
-import sys
-import process #KDW added 
-#from toolkit import process
-#from utils import fortranfile
-#from utils import misc
-
-
-VARIABLE_NAMES = {'time': ['time', 'times', 'date', 'dates', 'julian'],
-                  'latitude': ['latitude', 'lat', 'lats', 'latitudes'],
-                  'longitude': ['longitude', 'lon', 'lons', 'longitudes']
-                  }
-
-
-def findunique(seq):
-    keys = {}
-    for e in seq:
-        keys[e] = 1
-    return keys.keys()
-
-def getVariableByType(filename, variableType):
-    """
-    Function that will try to return the variable from a file based on a provided
-    parameter type.
-    
-    Input::
-        filename - the file to inspect
-        variableType - time | latitude | longitude
-    
-    Output::
-        variable name OR list of all variables in the file if a single variable
-        name match cannot be found.
-    """
-    try:
-        f = Nio.open_file(filename)
-    except:
-        #print 'PyNio had an issue opening the filename (%s) you provided' % filename
-        print "NIOError:", sys.exc_info()[0]
-        raise
-    
-    variableKeys = f.variables.keys()
-    f.close()
-    variableKeys = [variable.lower() for variable in variableKeys]
-    variableMatch = VARIABLE_NAMES[variableType]
-
-    commonVariables = list(set(variableKeys).intersection(variableMatch)) 
-
-    if len(commonVariables) == 1:
-        return str(commonVariables[0])
-    
-    else:
-        return variableKeys
-
-def getVariableRange(filename, variableName):
-    """
-    Function to return the min and max values of the given variable in
-    the supplied filename.
-   
-    Input::
-        filename - absolute path to a file
-        variableName - variable whose min and max values should be returned
-
-    Output::
-        variableRange - tuple of order (variableMin, variableMax)
-    """
-    try:
-        f = Nio.open_file(filename)
-    except:
-        #print 'PyNio had an issue opening the filename (%s) you provided' % filename
-        print "NIOError:", sys.exc_info()[0]
-        raise
-    
-    varArray = f.variables[variableName][:]
-    return (varArray.min(), varArray.max())
-
-
-def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, lonVarName):
-    '''
-    Read in data from a list of model files into a single data structure
-   
-    Input:
-       filelist - list of filenames (including path)
-       myvar    - string containing name of variable to load in (as it appears in file)
-    Output:
-       lat, lon - 2D array of latitude and longitude values
-       timestore    - list of times
-       t2store  - numpy array containing data from all files    
-   
-     NB. originally written specific for WRF netCDF output files
-         modified to make more general (Feb 2011)
-   
-      Peter Lean July 2010 
-    '''
-
-    filelist.sort()
-    filename = filelist[0]
-    # Crash nicely if 'filelist' is zero length
-    """TODO:  Throw Error instead via try Except"""
-    if len(filelist) == 0:
-        print 'Error: no files have been passed to read_data_from_file_list()'
-        sys.exit()
-
-    # Open the first file in the list to:
-    #    i) read in lats, lons
-    #    ii) find out how many timesteps in the file 
-    #        (assume same ntimes in each file in list)
-    #     -allows you to create an empty array to store variable data for all times
-    tmp = Nio.open_file(filename)
-    latsraw = tmp.variables[latVarName][:]
-    lonsraw = tmp.variables[lonVarName][:]
-    lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360.  # convert to -180,180 if necessary
-
-    """TODO:  Guard against case where latsraw and lonsraw are not the same dim?"""
-   
-    if(latsraw.ndim == 1):
-        lon, lat = np.meshgrid(lonsraw, latsraw)
-    if(latsraw.ndim == 2):
-        lon = lonsraw
-        lat = latsraw
-
-    timesraw = tmp.variables[timeVarName]
-    ntimes = len(timesraw)
-    
-    print 'Lats and lons read in for first file in filelist', ntimes
-
-    # Create a single empty masked array to store model data from all files
-    t2store = ma.zeros((ntimes * len(filelist), len(lat[:, 0]), len(lon[0, :])))
-    timestore = ma.zeros((ntimes * len(filelist))) 
-    
-    # Now load in the data for real
-    #  NB. no need to reload in the latitudes and longitudes -assume invariant
-    i = 0
-    timesaccu = 0 # a counter for number of times stored so far in t2store 
-    #  NB. this method allows for missing times in data files 
-    #      as no assumption made that same number of times in each file...
-
-
-    for ifile in filelist:
-
-        #print 'Loading data from file: ',filelist[i]
-        f = Nio.open_file(ifile)
-        t2raw = f.variables[myvar][:]
-        timesraw = f.variables[timeVarName]
-        time = timesraw[:]
-        ntimes = len(time)
-        print 'file= ', i, 'ntimes= ', ntimes, filelist[i]
-        print '\nt2raw shape: ', t2raw.shape
-        
-        # Flatten dimensions which needn't exist, i.e. level 
-        #   e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do.
-        #  Code requires data to have dimensions, (time,lat,lon)
-        #    i.e. remove level dimensions
-        # Remove 1d axis from the t2raw array
-        # Example: t2raw.shape == (365, 180, 360 1) <maps to (time, lat, lon, height)>
-        # After the squeeze you will be left with (365, 180, 360) instead
-        t2tmp = t2raw.squeeze()
-        # Nb. if this happens to be data for a single time only, then we just flattened it by accident
-        #     lets put it back... 
-        if t2tmp.ndim == 2:
-            t2tmp = np.expand_dims(t2tmp, 0)
-
-        print "\n timesaccu is: ", timesaccu, time,timesaccu + np.arange(ntimes)
-
-        t2store[timesaccu + np.arange(ntimes), :, :] = t2tmp[:, :, :]
-        timestore[timesaccu + np.arange(ntimes)] = time
-        timesaccu = timesaccu + ntimes
-        print "\n ***timesaccu is: ", timesaccu
-        f.close()
-        i += 1 
-      
-    print '\nData read in successfully with dimensions: ', t2store.shape, timesaccu
-    
-    # TODO: search for duplicated entries (same time) and remove duplicates.
-    # Check to see if number of unique times == number of times, if so then no problem
-
-    if(len(np.unique(timestore)) != len(np.where(timestore != 0)[0].view())):
-        print '\nWARNING: Possible duplicated times'
-
-    # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here
-    timestore, _ = process.getModelTimes(filename, timeVarName)
-    
-    data_dict = {}
-    data_dict['lats'] = lat
-    data_dict['lons'] = lon
-    data_dict['times'] = timestore
-    data_dict['data'] = t2store
-    #return lat, lon, timestore, t2store
-    return data_dict
-
-def select_var_from_file(myfile, fmt='not set'):
-    '''
-     Routine to act as user interface to allow users to select variable of interest from a file.
-     
-      Input:
-         myfile - filename
-         fmt - (optional) specify fileformat for PyNIO if filename suffix is non-standard
-    
-      Output:
-         myvar - variable name in file
-    
-        Peter Lean  September 2010
-    '''
-
-    print fmt
-    
-    if fmt == 'not set':
-        f = Nio.open_file(myfile)
-    
-    if fmt != 'not set':
-        f = Nio.open_file(myfile, format=fmt)
-    
-    keylist = f.variables.keys()
-    
-    i = 0
-    for v in keylist:
-        print '[', i, '] ', f.variables[v].long_name, ' (', v, ')'
-        i += 1
-
-    user_selection = raw_input('Please select variable : [0 -' + str(i - 1) + ']  ')
-    
-    myvar = keylist[int(user_selection)]
-    
-    return myvar
-
-def select_var_from_wrf_file(myfile):
-    '''
-     Routine to act as user interface to allow users to select variable of interest from a wrf netCDF file.
-     
-      Input:
-         myfile - filename
-    
-      Output:
-         mywrfvar - variable name in wrf file
-    
-        Peter Lean  September 2010
-    '''
-
-    f = Nio.open_file(myfile, format='nc')
-    
-    keylist = f.variables.keys()
-
-    i = 0
-    for v in keylist:
-        try:
-            print '[', i, '] ', f.variables[v].description, ' (', v, ')'
-        except:
-            print ''
-
-        i += 1
-    
-    user_selection = raw_input('Please select WRF variable : [0 -' + str(i - 1) + ']  ')
-    
-    mywrfvar = keylist[int(user_selection)]
-    
-    return mywrfvar
-
-def read_lolaT_from_file(filename, latVarName, lonVarName, timeVarName, file_type):
-    """
-    Function that will return lat, lon, and time arrays
-    
-    Input::
-        filename - the file to inspect
-        latVarName - name of the Latitude Variable
-        lonVarName - name of the Longitude Variable
-        timeVarName - name of the Time Variable
-        fileType = type of file we are trying to parse
-    
-    Output::
-        lat - MESH GRID of Latitude values with shape (nx, ny)
-        lon - MESH GRID of Longitude values with shape (nx, ny)
-        timestore - Python list of Datetime objects
-        
-        MESHGRID docs: http://docs.scipy.org/doc/numpy/reference/generated/numpy.meshgrid.html
-        
-    """
-
-    tmp = Nio.open_file(filename, format=file_type)
-    lonsraw = tmp.variables[lonVarName][:]
-    latsraw = tmp.variables[latVarName][:]
-    lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360.  # convert to -180,180 if necessary
-    if(latsraw.ndim == 1):
-        lon, lat = np.meshgrid(lonsraw, latsraw)
-    if(latsraw.ndim == 2):
-        lon = lonsraw
-        lat = latsraw
-    timestore, _ = process.getModelTimes(filename, timeVarName)
-    print '  read_lolaT_from_file: Lats, lons and times read in for the model domain'
-    return lat, lon, timestore
-
-def read_data_from_one_file(ifile, myvar, timeVarName, lat, file_type):
-    ##################################################################################
-    # Read in data from one file at a time
-    # Input:   filelist - list of filenames (including path)
-    #          myvar    - string containing name of variable to load in (as it appears in file)
-    # Output:  lat, lon - 2D array of latitude and longitude values
-    #          times    - list of times
-    #          t2store  - numpy array containing data from all files    
-    # Modified from read_data_from_file_list to read data from multiple models into a 4-D array
-    # 1. The code now processes model data that completely covers the 20-yr period. Thus,
-    #    all model data must have the same time levels (ntimes). Unlike in the oroginal, ntimes
-    #    is fixed here.
-    # 2. Because one of the model data exceeds 240 mos (243 mos), the model data must be
-    #    truncated to the 240 mons using the ntimes determined from the first file.
-    ##################################################################################
-    f = Nio.open_file(ifile)
-    try:
-        varUnit = f.variables[myvar].units.upper()
-    except:
-        varUnit = raw_input('Enter the model variable unit: \n> ').upper()
-    t2raw = f.variables[myvar][:]
-    t2tmp = t2raw.squeeze()
-    if t2tmp.ndim == 2:
-        t2tmp = np.expand_dims(t2tmp, 0)
-    t2tmp = t2tmp
-    f.close()
-    print '  success read_data_from_one_file: VarName=', myvar, ' Shape(Full)= ', t2tmp.shape, ' Unit= ', varUnit
-    timestore = process.decode_model_timesK(ifile, timeVarName, file_type)
-    return timestore, t2tmp, varUnit
-
-def findTimeVariable(filename):
-    """
-     Function to find what the time variable is called in a model file.
-        Input::
-            filename - file to crack open and check for a time variable
-        Output::
-            timeName - name of the input file's time variable
-            variableNameList - list of variable names from the input filename
-    """
-    try:
-        f = Nio.open_file(filename, mode='r')
-    except:
-        print("Unable to open '%s' to try and read the Time variable" % filename)
-        raise
-
-    variableNameList = f.variables.keys()
-    # convert all variable names into lower case
-    varNameListLowerCase = [x.lower() for x in variableNameList]
-
-    # Use "set" types for finding common variable name from in the file and from the list of possibilities
-    possibleTimeNames = set(['time', 'times', 'date', 'dates', 'julian'])
-    
-    # Use the sets to find the intersection where variable names are in possibleNames
-    timeNameSet = possibleTimeNames.intersection(varNameListLowerCase)
-    
-    if len(timeNameSet) == 0:
-        print("Unable to autodetect the Time Variable Name in the '%s'" % filename)
-        timeName = misc.askUserForVariableName(variableNameList, targetName ="Time")
-    
-    else:
-        timeName = timeNameSet.pop()
-    
-    return timeName, variableNameList
-
-
-def findLatLonVarFromFile(filename):
-    """
-    Function to find what the latitude and longitude variables are called in a model file.
-    
-    Input:: 
-        -filename 
-    Output::
-        -latVarName
-        -lonVarName
-        -latMin 
-        -latMax
-        -lonMin
-        -lonMax
-    """
-    try:
-        f = Nio.open_file(filename, mode='r')
-    except:
-        print("Unable to open '%s' to try and read the Latitude and Longitude variables" % filename)
-        raise
-
-    variableNameList = f.variables.keys()
-    # convert all variable names into lower case
-    varNameListLowerCase = [x.lower() for x in variableNameList]
-
-    # Use "set" types for finding common variable name from in the file and from the list of possibilities
-    possibleLatNames = set(['latitude', 'lat', 'lats', 'latitudes'])
-    possibleLonNames = set(['longitude', 'lon', 'lons', 'longitudes'])
-    
-    # Use the sets to find the intersection where variable names are in possibleNames
-    latNameSet = possibleLatNames.intersection(varNameListLowerCase)
-    lonNameSet = possibleLonNames.intersection(varNameListLowerCase)
-    
-    if len(latNameSet) == 0 or len(lonNameSet) == 0:
-        print("Unable to autodetect Latitude and/or Longitude values in the file")
-        latName = misc.askUserForVariableName(variableNameList, targetName ="Latitude")
-        lonName = misc.askUserForVariableName(variableNameList, targetName ="Longitude")
-    
-    else:
-        latName = latNameSet.pop()
-        lonName = lonNameSet.pop()
-    
-    lats = np.array(f.variables[latName][:])
-    lons = np.array(f.variables[lonName][:])
-    
-    latMin = lats.min()
-    latMax = lats.max()
-    
-    # Convert the lons from 0:360 into -180:180
-    lons[lons > 180] = lons[lons > 180] - 360.
-    lonMin = lons.min()
-    lonMax = lons.max()
-
-    return latName, lonName, latMin, latMax, lonMin, lonMax
-
-
-def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, lonVarName, file_type):
-    ##################################################################################
-    # Read in data from a list of model files into a single data structure
-    # Input:   filelist - list of filenames (including path)
-    #          myvar    - string containing name of variable to load in (as it appears in file)
-    # Output:  lat, lon - 2D array of latitude and longitude values
-    #          times    - list of times
-    #          t2store  - numpy array containing data from all files    
-    # Modified from read_data_from_file_list to read data from multiple models into a 4-D array
-    # 1. The code now processes model data that completely covers the 20-yr period. Thus,
-    #    all model data must have the same time levels (ntimes). Unlike in the oroginal, ntimes
-    #    is fixed here.
-    # 2. Because one of the model data exceeds 240 mos (243 mos), the model data must be
-    #    truncated to the 240 mons using the ntimes determined from the first file.
-    ##################################################################################
-    filelist.sort()
-    nfiles = len(filelist)
-    # Crash nicely if 'filelist' is zero length
-    if nfiles == 0:
-        print 'Error: no files have been passed to read_data_from_file_list(): Exit'
-        sys.exit()
-
-    # Open the first file in the list to:
-    #    i)  read in lats, lons
-    #    ii) find out how many timesteps in the file (assume same ntimes in each file in list)
-    #     -allows you to create an empty array to store variable data for all times
-    tmp = Nio.open_file(filelist[0], format=file_type)
-    latsraw = tmp.variables[latVarName][:]
-    lonsraw = tmp.variables[lonVarName][:]
-    lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360.  # convert to -180,180 if necessary
-    if(latsraw.ndim == 1):
-        lon, lat = np.meshgrid(lonsraw, latsraw)
-    if(latsraw.ndim == 2):
-        lon = lonsraw; lat = latsraw
-    
-    timesraw = tmp.variables[timeVarName]
-    ntimes = len(timesraw); nygrd = len(lat[:, 0]); nxgrd = len(lon[0, :])
-    
-    print 'Lats and lons read in for first file in filelist'
-
-    # Create a single empty masked array to store model data from all files
-    #t2store = ma.zeros((ntimes*nfiles,nygrd,nxgrd))
-    t2store = ma.zeros((nfiles, ntimes, nygrd, nxgrd))
-    #timestore=ma.zeros((ntimes*nfiles)) 
-    
-    ## Now load in the data for real
-    ##  NB. no need to reload in the latitudes and longitudes -assume invariant
-    #timesaccu=0 # a counter for number of times stored so far in t2store 
-    #  NB. this method allows for missing times in data files 
-    #      as no assumption made that same number of times in each file...
-
-    i = 0
-    for ifile in filelist:
-        #print 'Loading data from file: ',filelist[i]
-        f = Nio.open_file(ifile)
-        t2raw = f.variables[myvar][:]
-        timesraw = f.variables[timeVarName]
-        #time = timesraw[0:ntimes]
-        ntimes = len(timesraw)
-        #ntimes=len(time)
-        print 'file= ',i,' ntimes= ',ntimes,filelist[i]
-        ## Flatten dimensions which needn't exist, i.e. level 
-        ##   e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do.
-        ##  Code requires data to have dimensions, (time,lat,lon)
-        ##    i.e. remove level dimensions
-        t2tmp = t2raw.squeeze()
-        ## Nb. if data happen to be for a single time, we flattened it by accident; lets put it back... 
-        if t2tmp.ndim == 2:
-            t2tmp = np.expand_dims(t2tmp, 0)
-        #t2store[timesaccu+np.arange(ntimes),:,:]=t2tmp[0:ntimes,:,:]
-        t2store[i, 0:(ntimes-1), :, :] = t2tmp[0:(ntimes-1), :, :]
-        #timestore[timesaccu+np.arange(ntimes)]=time
-        #timesaccu=timesaccu+ntimes
-        f.close()
-        i += 1 
-
-    print 'Data read in successfully with dimensions: ', t2store.shape
-    
-    # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here
-    ifile = filelist[0]
-    timestore, _ = process.getModelTimes(ifile, timeVarName)
-    
-    return lat, lon, timestore, t2store
-
-def find_latlon_ranges(filelist, lat_var_name, lon_var_name):
-    # Function to return the latitude and longitude ranges of the data in a file,
-    # given the identifying variable names.
-    #
-    #    Input:
-    #            filelist - list of filenames (data is read in from first file only)
-    #            lat_var_name - variable name of the 'latitude' variable
-    #            lon_var_name - variable name of the 'longitude' variable
-    #
-    #    Output:
-    #            latMin, latMax, lonMin, lonMax - self explanatory
-    #
-    #                    Peter Lean      March 2011
-    
-    filename = filelist[0]
-    
-    try:
-        f = Nio.open_file(filename)
-        
-        lats = f.variables[lat_var_name][:]
-        latMin = lats.min()
-        latMax = lats.max()
-        
-        lons = f.variables[lon_var_name][:]
-        lons[lons > 180] = lons[lons > 180] - 360.
-        lonMin = lons.min()
-        lonMax = lons.max()
-        
-        return latMin, latMax, lonMin, lonMax
-
-    except:
-        print 'Error: there was a problem with finding the latitude and longitude ranges in the file'
-        print '       Please check that you specified the filename, and variable names correctly.'
-        
-        sys.exit()
-
-def writeBN_lola(fileName, lons, lats):
-    # write a binary data file that include longitude (1-d) and latitude (1-d) values
-    
-    F = fortranfile.FortranFile(fileName, mode='w')
-    ngrdY = lons.shape[0]; ngrdX = lons.shape[1]
-    tmpDat = ma.zeros(ngrdX); tmpDat[:] = lons[0, :]; F.writeReals(tmpDat)
-    tmpDat = ma.zeros(ngrdY); tmpDat[:] = lats[:, 0]; F.writeReals(tmpDat)
-    # release temporary arrays
-    tmpDat = 0
-    F.close()
-
-def writeBNdata(fileName, numOBSs, numMDLs, nT, ngrdX, ngrdY, numSubRgn, obsData, mdlData, obsRgnAvg, mdlRgnAvg):
-    #(fileName,maskOption,numOBSs,numMDLs,nT,ngrdX,ngrdY,numSubRgn,obsData,mdlData,obsRgnAvg,mdlRgnAvg):
-    # write spatially- and regionally regridded data into a binary data file
-    missing = -1.e26
-    F = fortranfile.FortranFile(fileName, mode='w')
-    # construct a data array to replace mask flag with a missing value (missing=-1.e12) for printing
-    data = ma.zeros((nT, ngrdY, ngrdX))
-    for m in np.arange(numOBSs):
-        data[:, :, :] = obsData[m, :, :, :]; msk = data.mask
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                for i in np.arange(ngrdX):
-                    if msk[n, j, i]: data[n, j, i] = missing
-
-        # write observed data. allowed to write only one row at a time
-        tmpDat = ma.zeros(ngrdX)
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                tmpDat[:] = data[n, j, :]
-                F.writeReals(tmpDat)
-
-    # write model data (dep. on the number of models).
-    for m in np.arange(numMDLs):
-        data[:, :, :] = mdlData[m, :, :, :]
-        msk = data.mask
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                for i in np.arange(ngrdX):
-                    if msk[n, j, i]:
-                        data[n, j, i] = missing
-
-        for n in np.arange(nT):
-            for j in np.arange(ngrdY):
-                tmpDat[:] = data[n, j, :]
-                F.writeReals(tmpDat)
-
-    data = 0     # release the array allocated for data
-    # write data in subregions
-    if numSubRgn > 0:
-        print 'Also included are the time series of the means over ', numSubRgn, ' areas from obs and model data'
-        tmpDat = ma.zeros(nT); print numSubRgn
-        for m in np.arange(numOBSs):
-            for n in np.arange(numSubRgn):
-                tmpDat[:] = obsRgnAvg[m, n, :]
-                F.writeReals(tmpDat)
-        for m in np.arange(numMDLs):
-            for n in np.arange(numSubRgn):
-                tmpDat[:] = mdlRgnAvg[m, n, :]
-                F.writeReals(tmpDat)
-    tmpDat = 0     # release the array allocated for tmpDat
-    F.close()
-
-def writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, obsRgnAvg, mdlRgnAvg, obsList, mdlList, subRegions):
-    # write an output file of variables up to 3 dimensions
-    # fileName: the name of the output data file
-    # numSubRgn  : the number of subregions
-    # lons[ngrdX]: longitude
-    # lats[ngrdY]: latitudes
-    # obsData[nT,ngrdY,ngrdX]: the obs time series of the entire model domain
-    # mdlData[numMDLs,nT,ngrdY,ngrdX]: the mdltime series of the entire model domain
-    # obsRgnAvg[numSubRgn,nT]: the obs time series for the all subregions
-    # mdlRgnAvg[numMDLs,numSubRgn,nT]: the mdl time series for the all subregions
-    dimO = obsData.shape[0]      # the number of obs data
-    dimM = mdlData.shape[0]      # the number of mdl data
-    dimT = mdlData.shape[1]      # the number of time levels
-    dimY = mdlData.shape[2]      # y-dimension
-    dimX = mdlData.shape[3]      # x-dimension
-    dimR = obsRgnAvg.shape[1]    # the number of subregions
-    f = Nio.open_file(fileName, mode='w', format='nc')
-    print mdlRgnAvg.shape, dimM, dimR, dimT
-    #create global attributes
-    f.globalAttName = ''
-    # create dimensions
-    print 'Creating Dimensions within the NetCDF Object...'
-    f.create_dimension('unity', 1)
-    f.create_dimension('time', dimT)
-    f.create_dimension('west_east', dimX)
-    f.create_dimension('south_north', dimY)
-    f.create_dimension('obs', dimO)
-    f.create_dimension('models', dimM)
-        
-    # create the variable (real*4) to be written in the file
-    print 'Creating Variables...'
-    f.create_variable('lon', 'd', ('south_north', 'west_east'))
-    f.create_variable('lat', 'd', ('south_north', 'west_east'))
-    f.create_variable('oDat', 'd', ('obs', 'time', 'south_north', 'west_east'))
-    f.create_variable('mDat', 'd', ('models', 'time', 'south_north', 'west_east'))
-    
-    if subRegions:
-        f.create_dimension('regions', dimR)
-        f.create_variable('oRgn', 'd', ('obs', 'regions', 'time'))
-        f.create_variable('mRgn', 'd', ('models', 'regions', 'time'))
-        f.variables['oRgn'].varAttName = 'Observation time series: Subregions'
-        f.variables['mRgn'].varAttName = 'Model time series: Subregions'
-
-    loadDataIntoNetCDF(f, obsList, obsData, 'Observation')
-    print 'Loaded the Observations into the NetCDF'
-
-    loadDataIntoNetCDF(f, mdlList, mdlData, 'Model')
-
-    # create attributes and units for the variable
-    print 'Creating Attributes and Units...'
-    f.variables['lon'].varAttName = 'Longitudes'
-    f.variables['lon'].varUnit = 'degrees East'
-    f.variables['lat'].varAttName = 'Latitudes'
-    f.variables['lat'].varUnit = 'degrees North'
-    f.variables['oDat'].varAttName = 'Observation time series: entire domain'
-    f.variables['mDat'].varAttName = 'Model time series: entire domain'
-
-    # assign the values to the variable and write it
-    f.variables['lon'][:, :] = lons
-    f.variables['lat'][:, :] = lats
-    if subRegions:
-        f.variables['oRgn'][:, :, :] = obsRgnAvg
-        f.variables['mRgn'][:, :, :] = mdlRgnAvg
-
-    f.close()
-
-def loadDataIntoNetCDF(fileObject, datasets, dataArray, dataType):
-    """
-    Input::
-        fileObject - PyNIO file object data will be loaded into
-        datasets - List of dataset names
-        dataArray - Multi-dimensional array of data to be loaded into the NetCDF file
-        dataType - String with value of either 'Model' or 'Observation'
-    Output::
-        No return value.  PyNIO file object is updated in place
-    """
-    datasetCount = 0
-    for dataset in datasets:
-        if dataType.lower() == 'observation':
-            datasetName = dataset.replace(' ','')
-        elif dataType.lower() == 'model':
-            datasetName = path.splitext(path.basename(dataset))[0]
-        print "Creating variable %s" % datasetName
-        fileObject.create_variable(datasetName, 'd', ('time', 'south_north', 'west_east'))
-        fileObject.variables[datasetName].varAttName = 'Obseration time series: entire domain'
-        print 'Loading values into %s' % datasetName
-        fileObject.variables[datasetName].assign_value(dataArray[datasetCount,:,:,:])
-        datasetCount += 1
diff --git a/code/mccSearch.py b/code/mccSearch.py
index 59ec44b..6e246f6 100644
--- a/code/mccSearch.py
+++ b/code/mccSearch.py
@@ -31,10 +31,8 @@
 import matplotlib.cm as cm
 import matplotlib.colors as mcolors
 from matplotlib.ticker import FuncFormatter, FormatStrFormatter
+from scipy.ndimage import map_coordinates
 
-#existing modules in services
-import files
-import process
 #----------------------- GLOBAL VARIABLES --------------------------
 # --------------------- User defined variables ---------------------
 #FYI the lat lon values are not necessarily inclusive of the points given. These are the limits
@@ -374,7 +372,7 @@ def findCloudElements(mergImgs,timelist,TRMMdirName=None):
 					#---------regrid the TRMM data to the MERG dataset ----------------------------------
 					#regrid using the do_regrid stuff from the Apache OCW 
 					regriddedTRMM = ma.zeros((0, nygrd, nxgrd))
-					regriddedTRMM = process.do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
+					regriddedTRMM = do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
 					#----------------------------------------------------------------------------------
 		
 					# #get the lat/lon info from cloudElement
@@ -666,7 +664,7 @@ def findPrecipRate(TRMMdirName, timelist):
 		#---------regrid the TRMM data to the MERG dataset ----------------------------------
 		#regrid using the do_regrid stuff from the Apache OCW 
 		regriddedTRMM = ma.zeros((0, nygrd, nxgrd))
-		regriddedTRMM = process.do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
+		regriddedTRMM = do_regrid(precipRateMasked[0,:,:], LATTRMM,  LONTRMM, LAT, LON, order=1, mdi= -999999999)
 		#----------------------------------------------------------------------------------
 
 		# #get the lat/lon info from
@@ -2644,17 +2642,106 @@ def decodeTimeFromString(time_string):
 
     print 'Error decoding time string: string does not match a predefined time format'
     return 0
+# #******************************************************************
+# def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi=-999999999):
+#     """ 
+#     This function has been moved to the ocw/dataset_processor module
+#     """
+#     from ocw import dataset_processor
+#     q2 = dataset_processor._rcmes_spatial_regrid(q, lat, lon, lat2, lon2, order=1)
+
+#     return q2
 #******************************************************************
-def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi=-999999999):
-    """ 
-    This function has been moved to the ocw/dataset_processor module
-    """
-    from ocw import dataset_processor
-    q2 = dataset_processor._rcmes_spatial_regrid(q, lat, lon, lat2, lon2, order=1)
+def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi= -999999999):
+    '''
+     Perform regridding from one set of lat,lon values onto a new set (lat2,lon2)
+    
+     Input::
+         q          - the variable to be regridded
+         lat,lon    - original co-ordinates corresponding to q values
+         lat2,lon2  - new set of latitudes and longitudes that you want to regrid q onto 
+         order      - (optional) interpolation order 1=bi-linear, 3=cubic spline
+         mdi  	    - (optional) fill value for missing data (used in creation of masked array)
+      
+     Output::
+         q2  - q regridded onto the new set of lat2,lon2 
+    
+    '''
+
+    nlat = q.shape[0]
+    nlon = q.shape[1]
+
+    nlat2 = lat2.shape[0]
+    nlon2 = lon2.shape[1]
+
+    # To make our lives easier down the road, let's 
+    # turn these into arrays of x & y coords
+    loni = lon2.ravel()
+    lati = lat2.ravel()
+
+    loni = loni.copy() # NB. it won't run unless you do this...
+    lati = lati.copy()
+
+    # Now, we'll set points outside the boundaries to lie along an edge
+    loni[loni > lon.max()] = lon.max()
+    loni[loni < lon.min()] = lon.min()
+    
+    # To deal with the "hard" break, we'll have to treat y differently,
+    # so we're just setting the min here...
+    lati[lati > lat.max()] = lat.max()
+    lati[lati < lat.min()] = lat.min()
+    
+    
+    # We need to convert these to (float) indicies
+    #   (xi should range from 0 to (nx - 1), etc)
+    loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min())
+    
+    # Deal with the "hard" break in the y-direction
+    lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min())
+    
+    # Notes on dealing with MDI when regridding data.
+    #  Method adopted here:
+    #    Use bilinear interpolation of data by default (but user can specify other order using order=... in call)
+    #    Perform bilinear interpolation of data, and of mask.
+    #    To be conservative, new grid point which contained some missing data on the old grid is set to missing data.
+    #            -this is achieved by looking for any non-zero interpolated mask values.
+    #    To avoid issues with bilinear interpolation producing strong gradients leading into the MDI,
+    #     set values at MDI points to mean data value so little gradient visible = not ideal, but acceptable for now.
+    
+    # Set values in MDI so that similar to surroundings so don't produce large gradients when interpolating
+    # Preserve MDI mask, by only changing data part of masked array object.
+    for shift in (-1, 1):
+        for axis in (0, 1):
+            q_shifted = np.roll(q, shift=shift, axis=axis)
+            idx = ~q_shifted.mask * q.mask
+            q.data[idx] = q_shifted[idx]
+
+    # Now we actually interpolate
+    # map_coordinates does cubic interpolation by default, 
+    # use "order=1" to preform bilinear interpolation instead...
+    q2 = map_coordinates(q, [lati, loni], order=order)
+    q2 = q2.reshape([nlat2, nlon2])
+
+    # Set values to missing data outside of original domain
+    q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= lat.max(), 
+                                                              lat2 <= lat.min()), 
+                                                np.logical_or(lon2 <= lon.min(), 
+                                                              lon2 >= lon.max())))
+    
+    # Make second map using nearest neighbour interpolation -use this to determine locations with MDI and mask these
+    qmdi = np.zeros_like(q)
+    qmdi[q.mask == True] = 1.
+    qmdi[q.mask == False] = 0.
+    qmdi_r = map_coordinates(qmdi, [lati, loni], order=order)
+    qmdi_r = qmdi_r.reshape([nlat2, nlon2])
+    mdimask = (qmdi_r != 0.0)
+    
+    # Combine missing data mask, with outside domain mask define above.
+    q2.mask = np.logical_or(mdimask, q2.mask)
 
     return q2
- 
-#******************************************************************
+
+#****************************************************************** 
 # 
 #             METRICS FUNCTIONS FOR MERG.PY
 # TODO: rewrite these metrics so that they use the data from the 
diff --git a/code/process.py b/code/process.py
deleted file mode 100644
index 5a362ec..0000000
--- a/code/process.py
+++ /dev/null
@@ -1,1284 +0,0 @@
-"""Collection of functions that process numpy arrays of varying shapes.  
-Functions can range from subsetting to regridding"""
-
-# Python Standard Libary Imports
-import math
-import datetime
-import re
-import string
-import sys
-import time
-
-# 3rd Party Imports
-import Nio
-import numpy as np
-import numpy.ma as ma
-from scipy.ndimage import map_coordinates
-
-
-def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, lonmin, lonmax):
-    '''
-     Extract a sub-region from a data array.
-     
-     Example::
-       **e.g.** the user may load a global model file, but only want to examine data over North America
-            This function extracts a sub-domain from the original data.
-            The defined sub-region must be a regular lat/lon bounding box,
-            but the model data may be on a non-regular grid (e.g. rotated, or Guassian grid layout).
-            Data are kept on the original model grid and a rectangular (in model-space) region 
-            is extracted which contains the rectangular (in lat/lon space) user supplied region.
-    
-     INPUT::
-       data - 3d masked data array
-       lats - 2d array of latitudes corresponding to data array
-       lons - 2d array of longitudes corresponding to data array
-       latmin, latmax, lonmin, lonmax - bounding box of required region to extract
-       
-     OUTPUT::
-       data2 - subset of original data array containing only data from required subregion
-       lats2 - subset of original latitude data array
-       lons2 - subset of original longitude data array
-
-    '''
-    
-    
-
-    # Mask model data array to find grid points inside the supplied bounding box
-    whlat = (lats > latmin) & (lats < latmax)
-    whlon = (lons > lonmin) & (lons < lonmax)
-    wh = whlat & whlon
-
-    # Find required matrix indices describing the limits of the regular lat/lon bounding box
-    jmax = np.where(wh == True)[0].max()
-    jmin = np.where(wh == True)[0].min()
-    imax = np.where(wh == True)[1].max()
-    imin = np.where(wh == True)[1].min()
-
-    # Cut out the sub-region from the data array
-    data2 = ma.zeros((data.shape[0], jmax - jmin, imax - imin))
-    data2 = data[:, jmin:jmax, imin:imax]
-
-    # Cut out sub-region from lats,lons arrays
-    lats2 = lats[jmin:jmax, imin:imax]
-    lons2 = lons[jmin:jmax, imin:imax]
-
-    return data2, lats2, lons2
-
-def calc_area_mean(data, lats, lons, mymask='not set'):
-    '''
-     Calculate Area Average of data in a masked array
-     
-     INPUT::
-         data:  a masked array of data (NB. only data from one time expected to be passed at once)
-         lats:  2d array of regularly gridded latitudes
-         lons:  2d array of regularly gridded longitudes
-         mymask:  (optional) defines spatial region to do averaging over
-     
-     OUTPUT::
-         area_mean: a value for the mean inside the area
-
-    '''
-    
-    
-
-    # If mask not passed in, then set maks to cover whole data domain
-    if(mymask == 'not set'):
-       mymask = np.empty(data.shape)
-       mymask[:] = False	# NB. mask means (don't show), so False everywhere means use everything.
-   
-    # Dimension check on lats, lons
-    #  Sometimes these arrays are 3d, sometimes 2d, sometimes 1d
-    #  This bit of code just converts to the required 2d array shape
-    if(len(lats.shape) == 3):
-       lats = lats[0, :, :]
-
-    if(len(lons.shape) == 3):
-       lons = lons[0, :, :]
-
-    if(np.logical_and(len(lats.shape) == 1, len(lons.shape) == 1)):
-       lons, lats = np.meshgrid(lons, lats)
- 
-    # Calculate grid length (assuming regular lat/lon grid)
-    dlat = lats[1, 0] - lats[0, 0]
-    dlon = lons[0, 1] - lons[0, 0]
-
-    # Calculates weights for each grid box
-    myweights = calc_area_in_grid_box(lats, dlon, dlat)
-
-    # Create a new masked array covering just user selected area (defined in mymask)
-    #   NB. this preserves missing data points in the observations data
-    subdata = ma.masked_array(data, mask=mymask)
- 
-    if(myweights.shape != subdata.shape):
-       myweights.resize(subdata.shape)
-       myweights[1:, :] = myweights[0, :]
-
-    # Calculate weighted mean using ma.average (which takes weights)
-    area_mean = ma.average(subdata, weights=myweights)
- 
-    return area_mean
-
-def calc_area_in_grid_box(latitude, dlat, dlon):
-    '''
-     Calculate area of regular lat-lon grid box
-     
-     INPUT::
-        latitude: latitude of grid box (degrees)
-        dlat:     grid length in latitude direction (degrees)
-        dlon:     grid length in longitude direction (degrees)
-        
-     OUTPUT::
-        A:        area of the grid box
-      
-      '''
-
-    R = 6371000  # radius of Earth in metres
-    C = 2 * math.pi * R
-
-    latitude = np.radians(latitude)
-
-    A = (dlon * (C / 360.)) * (dlat * (C / 360.) * np.cos(latitude))
-
-    return A
-
-def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi= -999999999):
-    '''
-     Perform regridding from one set of lat,lon values onto a new set (lat2,lon2)
-    
-     Input::
-         q          - the variable to be regridded
-         lat,lon    - original co-ordinates corresponding to q values
-         lat2,lon2  - new set of latitudes and longitudes that you want to regrid q onto 
-         order      - (optional) interpolation order 1=bi-linear, 3=cubic spline
-         mdi  	    - (optional) fill value for missing data (used in creation of masked array)
-      
-     Output::
-         q2  - q regridded onto the new set of lat2,lon2 
-    
-    '''
-
-    nlat = q.shape[0]
-    nlon = q.shape[1]
-
-    nlat2 = lat2.shape[0]
-    nlon2 = lon2.shape[1]
-
-    # To make our lives easier down the road, let's 
-    # turn these into arrays of x & y coords
-    loni = lon2.ravel()
-    lati = lat2.ravel()
-
-    loni = loni.copy() # NB. it won't run unless you do this...
-    lati = lati.copy()
-
-    # Now, we'll set points outside the boundaries to lie along an edge
-    loni[loni > lon.max()] = lon.max()
-    loni[loni < lon.min()] = lon.min()
-    
-    # To deal with the "hard" break, we'll have to treat y differently,
-    # so we're just setting the min here...
-    lati[lati > lat.max()] = lat.max()
-    lati[lati < lat.min()] = lat.min()
-    
-    
-    # We need to convert these to (float) indicies
-    #   (xi should range from 0 to (nx - 1), etc)
-    loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min())
-    
-    # Deal with the "hard" break in the y-direction
-    lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min())
-    
-    # Notes on dealing with MDI when regridding data.
-    #  Method adopted here:
-    #    Use bilinear interpolation of data by default (but user can specify other order using order=... in call)
-    #    Perform bilinear interpolation of data, and of mask.
-    #    To be conservative, new grid point which contained some missing data on the old grid is set to missing data.
-    #            -this is achieved by looking for any non-zero interpolated mask values.
-    #    To avoid issues with bilinear interpolation producing strong gradients leading into the MDI,
-    #     set values at MDI points to mean data value so little gradient visible = not ideal, but acceptable for now.
-    
-    # Set values in MDI so that similar to surroundings so don't produce large gradients when interpolating
-    # Preserve MDI mask, by only changing data part of masked array object.
-    for shift in (-1, 1):
-        for axis in (0, 1):
-            q_shifted = np.roll(q, shift=shift, axis=axis)
-            idx = ~q_shifted.mask * q.mask
-            q.data[idx] = q_shifted[idx]
-
-    # Now we actually interpolate
-    # map_coordinates does cubic interpolation by default, 
-    # use "order=1" to preform bilinear interpolation instead...
-    q2 = map_coordinates(q, [lati, loni], order=order)
-    q2 = q2.reshape([nlat2, nlon2])
-
-    # Set values to missing data outside of original domain
-    q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= lat.max(), 
-                                                              lat2 <= lat.min()), 
-                                                np.logical_or(lon2 <= lon.min(), 
-                                                              lon2 >= lon.max())))
-    
-    # Make second map using nearest neighbour interpolation -use this to determine locations with MDI and mask these
-    qmdi = np.zeros_like(q)
-    qmdi[q.mask == True] = 1.
-    qmdi[q.mask == False] = 0.
-    qmdi_r = map_coordinates(qmdi, [lati, loni], order=order)
-    qmdi_r = qmdi_r.reshape([nlat2, nlon2])
-    mdimask = (qmdi_r != 0.0)
-    
-    # Combine missing data mask, with outside domain mask define above.
-    q2.mask = np.logical_or(mdimask, q2.mask)
-
-    return q2
-
-def create_mask_using_threshold(masked_array, threshold=0.5):
-   '''
-    Routine to create a mask, depending on the proportion of times with missing data.
-    For each pixel, calculate proportion of times that are missing data,
-    **if** the proportion is above a specified threshold value,
-    then mark the pixel as missing data.
-   
-    Input::
-        masked_array - a numpy masked array of data (assumes time on axis 0, and space on axes 1 and 2).
-        threshold    - (optional) threshold proportion above which a pixel is marked as 'missing data'. 
-        NB. default threshold = 50%
-   
-    Output::
-        mymask       - a numpy array describing the mask. NB. not a masked array, just the mask itself.
-
-   '''
-   
-
-   # try, except used as some model files don't have a full mask, but a single bool
-   #  the except catches this situation and deals with it appropriately.
-   try:
-       nT = masked_array.mask.shape[0]
-
-       # For each pixel, count how many times are masked.
-       nMasked = masked_array.mask[:, :, :].sum(axis=0)
-
-       # Define new mask as when a pixel has over a defined threshold ratio of masked data
-       #   e.g. if the threshold is 75%, and there are 10 times,
-       #        then a pixel will be masked if more than 5 times are masked.
-       mymask = nMasked > (nT * threshold)
-
-   except:
-       mymask = np.zeros_like(masked_array.data[0, :, :])
-
-   return mymask
-
-def calc_average_on_new_time_unit(data, dateList, unit='monthly'):
-   '''
-    Routine to calculate averages on longer time units than the data exists on.
-    
-    Example::
-        If the data is 6-hourly, calculate daily, or monthly means.
-   
-    Input::
-        data     - data values
-        dateList - list of python datetime structures corresponding to data values
-        unit     - string describing time unit to average onto. 
-        *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'*
-      
-     Output:
-        meanstorem - numpy masked array of data values meaned over required time period
-        newTimesList - a list of python datetime objects representing the data in the new averagin period
-        *NB.* currently set to beginning of averaging period, 
-        **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.**
-   '''
-
-   # First catch unknown values of time unit passed in by user
-   acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | (unit == 'daily') | (unit == 'pentad')
-
-   if not acceptable:
-      print 'Error: unknown unit type selected for time averaging'
-      print '       Please check your code.'
-      return
-
-   # Calculate arrays of years (2007,2007),
-   #                     yearsmonths (200701,200702),
-   #                     or yearmonthdays (20070101,20070102) 
-   #  -depending on user selected averaging period.
-
-   # Year list
-   if unit == 'annual':
-      print 'Calculating annual mean data'
-      timeunits = []
-
-      for i in np.arange(len(dateList)):
-
-         timeunits.append(str(dateList[i].year))
-
-      timeunits = np.array(timeunits, dtype=int)
-         
-   # YearMonth format list
-   if unit == 'monthly':
-      print 'Calculating monthly mean data'
-      timeunits = []
-
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
-
-      timeunits = np.array(timeunits, dtype=int)
-
-   # YearMonthDay format list
-   if unit == 'daily':
-      print 'Calculating daily mean data'
-      timeunits = []
- 
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + \
-                          str("%02d" % dateList[i].day))
-
-      timeunits = np.array(timeunits, dtype=int)
-
-
-   # TODO: add pentad setting using Julian days?
-
-
-   # Full list: a special case
-   if unit == 'full':
-      print 'Calculating means data over full time range'
-      timeunits = []
-
-      for i in np.arange(len(dateList)):
-         timeunits.append(999)  # i.e. we just want the same value for all times.
-
-      timeunits = np.array(timeunits, dtype=int)
-
-
-
-   # empty list to store new times
-   newTimesList = []
-
-   # Decide whether or not you need to do any time averaging.
-   #   i.e. if data are already on required time unit then just pass data through and 
-   #        calculate and return representative datetimes.
-   processing_required = True
-   if len(timeunits) == (len(np.unique(timeunits))):
-        processing_required = False
-
-   # 1D data arrays, i.e. time series
-   if data.ndim == 1:
-     # Create array to store the resulting data
-     meanstore = np.zeros(len(np.unique(timeunits)))
-  
-     # Calculate the means across each unique time unit
-     i = 0
-     for myunit in np.unique(timeunits):
-       if processing_required:
-         datam = ma.masked_array(data, timeunits != myunit)
-         meanstore[i] = ma.average(datam)
-
-       # construct new times list
-       smyunit = str(myunit)
-       if len(smyunit) == 4:  # YYYY
-        yyyy = int(myunit[0:4])
-        mm = 1
-        dd = 1
-       if len(smyunit) == 6:  # YYYYMM
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = 1
-       if len(smyunit) == 8:  # YYYYMMDD
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = int(smyunit[6:8])
-       if len(smyunit) == 3:  # Full time range
-        # Need to set an appropriate time representing the mid-point of the entire time span
-        dt = dateList[-1] - dateList[0]
-        halfway = dateList[0] + (dt / 2)
-        yyyy = int(halfway.year)
-        mm = int(halfway.month)
-        dd = int(halfway.day)
-  
-       newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
-       i = i + 1
-
-   # 3D data arrays
-   if data.ndim == 3:
-
-     #datamask = create_mask_using_threshold(data,threshold=0.75)
-
-     # Create array to store the resulting data
-     meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], data.shape[2]])
-  
-     # Calculate the means across each unique time unit
-     i = 0
-     datamask_store = []
-     for myunit in np.unique(timeunits):
-       if processing_required:
-
-         mask = np.zeros_like(data)
-         mask[timeunits != myunit, :, :] = 1.0
-
-         # Calculate missing data mask within each time unit...
-         datamask_at_this_timeunit = np.zeros_like(data)
-         datamask_at_this_timeunit[:] = create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75)
-         # Store results for masking later
-         datamask_store.append(datamask_at_this_timeunit[0])
-
-         # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
-         datam = ma.masked_array(data, np.logical_or(mask, datamask_at_this_timeunit))
-         meanstore[i, :, :] = ma.average(datam, axis=0)
-
-       # construct new times list
-       smyunit = str(myunit)
-       if len(smyunit) == 4:  # YYYY
-        yyyy = int(smyunit[0:4])
-        mm = 1
-        dd = 1
-       if len(smyunit) == 6:  # YYYYMM
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = 1
-       if len(smyunit) == 8:  # YYYYMMDD
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = int(smyunit[6:8])
-       if len(smyunit) == 3:  # Full time range
-        # Need to set an appropriate time representing the mid-point of the entire time span
-        dt = dateList[-1] - dateList[0]
-        halfway = dateList[0] + (dt / 2)
-        yyyy = int(halfway.year)
-        mm = int(halfway.month)
-        dd = int(halfway.day)
-
-       newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
-
-       i += 1
-
-     if not processing_required:
-        meanstorem = data
-
-     if processing_required:
-        # Create masked array (using missing data mask defined above)
-        datamask_store = np.array(datamask_store)
-        meanstorem = ma.masked_array(meanstore, datamask_store)
-
-   return meanstorem, newTimesList
-
-def calc_running_accum_from_period_accum(data):
-    '''
-     Routine to calculate running total accumulations from individual period accumulations.
-     ::
-    
-         e.g.  0,0,1,0,0,2,2,1,0,0
-            -> 0,0,1,1,1,3,5,6,6,6
-    
-     Input::
-          data: numpy array with time in the first axis
-    
-     Output::
-          running_acc: running accumulations
-    
-    '''
-    
-    
-    running_acc = np.zeros_like(data)
-    
-    if(len(data.shape) == 1):
-        running_acc[0] = data[0]
-    
-    if(len(data.shape) > 1):
-        running_acc[0, :] = data[0, :]
-    
-    for i in np.arange(data.shape[0] - 1):
-        if(len(data.shape) == 1):
-            running_acc[i + 1] = running_acc[i] + data[i + 1]
-    
-        if(len(data.shape) > 1):
-            running_acc[i + 1, :] = running_acc[i, :] + data[i + 1, :]
-    
-    return running_acc
-
-def ignore_boundaries(data, rim=10):
-    '''
-     Routine to mask the lateral boundary regions of model data to ignore them from calculations.
-     
-     Input::
-         data - a masked array of model data
-         rim - (optional) number of grid points to ignore
-    
-     Output::
-         data - data array with boundary region masked
-    
-    '''
-    
-    nx = data.shape[1]
-    ny = data.shape[0]
-    
-    rimmask = np.zeros_like(data)
-    for j in np.arange(rim):
-        rimmask[j, 0:nx - 1] = 1.0
-    
-    for j in ny - 1 - np.arange(rim):
-        rimmask[j, 0:nx - 1] = 1.0
-    
-    for i in np.arange(rim):
-        rimmask[0:ny - 1, i] = 1.0
-    
-    for i in nx - 1 - np.arange(rim):
-        rimmask[0:ny - 1, i] = 1.0
-    
-    data = ma.masked_array(data, mask=rimmask)
-    
-    return data
-
-def normalizeDatetimes(datetimes, timestep):
-    """
-    Input::
-        datetimes - list of datetime objects that need to be normalized
-        timestep - string of value ('daily' | 'monthly')
-    Output::
-        normalDatetimes - list of datetime objects that have been normalized
-    
-    Normalization Rules::
-        Daily data will be forced to an hour value of 00:00:00
-        Monthly data will be forced to the first of the month at midnight 
-    """
-    normalDatetimes = []
-    if timestep.lower() == 'monthly':
-        for inputDatetime in datetimes:
-            if inputDatetime.day != 1:
-                # Clean the inputDatetime
-                inputDatetimeString = inputDatetime.strftime('%Y%m%d')
-                normalInputDatetimeString = inputDatetimeString[:6] + '01'
-                inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d')
-
-            normalDatetimes.append(inputDatetime)
-
-    elif timestep.lower() == 'daily':
-        for inputDatetime in datetimes:
-            if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0:
-                datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
-                normalDatetimeString = datetimeString[:8] + '000000'
-                inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
-            
-            normalDatetimes.append(inputDatetime)
-
-    return normalDatetimes
-
-def getModelTimes(modelFile, timeVarName):
-    '''
-    TODO:  Do a better job handling dates here
-    Routine to convert from model times ('hours since 1900...', 'days since ...')
-    into a python datetime structure
-
-    Input::
-        modelFile - path to the model tile you want to extract the times list and modelTimeStep from
-        timeVarName - name of the time variable in the model file
-
-    Output::
-        times  - list of python datetime objects describing model data times
-        modelTimeStep - 'hourly','daily','monthly','annual'
-    '''
-
-    f = Nio.open_file(modelFile)
-    xtimes = f.variables[timeVarName]
-    timeFormat = xtimes.attributes['units']
-    
-    # search to check if 'since' appears in units
-    try:
-         sinceLoc = re.search('since', timeFormat).end()
-
-
-    #KDW the below block generates and error. But the print statement, indicates that sinceLoc found something
-    except AttributeError:
-         print 'Error decoding model times: time variable attributes do not contain "since"'
-         raise
-
-    units = ''
-    TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
-    # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units
-    for unit in TIME_UNITS:
-        if re.search(unit, timeFormat):
-            units = unit
-            break
-
-    # cut out base time (the bit following 'since')
-    base_time_string = string.lstrip(timeFormat[sinceLoc:])
-    
-    # decode base time
-    base_time = decodeTimeFromString(base_time_string)
-    
-    times = []
-
-
-
-    for xtime in xtimes[:]:
-              
-        # Cast time as an int ***KDW remove this so fractional xtime can be read from MERG
-        xtime = int(xtime)
-
-        if units == 'minutes':
-            dt = datetime.timedelta(minutes=xtime)
-            new_time = base_time + dt
-        elif units == 'hours':
-            dt = datetime.timedelta(hours=xtime)
-            new_time = base_time + dt
-        elif units == 'days':
-            dt = datetime.timedelta(days=xtime)
-            new_time = base_time + dt
-        elif units == 'months':
-            # NB. adding months in python is complicated as month length varies and hence ambiguous.
-            # Perform date arithmatic manually
-            #  Assumption: the base_date will usually be the first of the month
-            #              NB. this method will fail if the base time is on the 29th or higher day of month
-            #                      -as can't have, e.g. Feb 31st.
-            new_month = int(base_time.month + xtime % 12)
-            new_year = int(math.floor(base_time.year + xtime / 12.))
-            new_time = datetime.datetime(new_year, new_month, base_time.day, base_time.hour, base_time.second, 0)
-        elif units == 'years':
-            dt = datetime.timedelta(years=xtime)
-            new_time = base_time + dt
-        
-        #print "xtime is:", xtime, "dt is: ", dt
-        
-
-
-        times.append(new_time)
-       
-    try:
-        timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12)
-        modelTimeStep = getModelTimeStep(units, timeStepLength)
-     
-        #KDW if timeStepLength is zero do not normalize times as this would create an empty list
-        if timeStepLength != 0:
-          times = normalizeDatetimes(times, modelTimeStep) 
-    except:
-        raise
-
-    return times, modelTimeStep
-
-def getModelTimeStep(units, stepSize):
-    # Time units are now determined. Determine the time intervals of input data (mdlTimeStep)
-
-    if units == 'minutes':
-        if stepSize == 60:
-            modelTimeStep = 'hourly'
-        elif stepSize == 1440:
-            modelTimeStep = 'daily'
-        # 28 days through 31 days
-        elif 40320 <= stepSize <= 44640:
-            modelTimeStep = 'monthly'
-        # 365 days through 366 days 
-        elif 525600 <= stepSize <= 527040:
-            modelTimeStep = 'annual' 
-        else:
-            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
-
-    elif units == 'hours':
-      #need a check for fractional hrs and only one hr i.e. stepSize=0
-        if stepSize == 0 or stepSize == 1:
-            modelTimeStep = 'hourly'
-        elif stepSize == 24:
-            modelTimeStep = 'daily'
-        elif 672 <= stepSize <= 744:
-            modelTimeStep = 'monthly' 
-        elif 8760 <= stepSize <= 8784:
-            modelTimeStep = 'annual' 
-        else:
-            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
-
-    elif units == 'days':
-        if stepSize == 1:
-            modelTimeStep = 'daily'
-        elif 28 <= stepSize <= 31:
-            modelTimeStep = 'monthly'
-        elif 365 <= stepSize <= 366:
-            modelTimeStep = 'annual'
-        else:
-            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
-
-    elif units == 'months':
-        if stepSize == 1:
-            modelTimeStep = 'monthly'
-        elif stepSize == 12:
-            modelTimeStep = 'annual'
-        else:
-            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
-
-    elif units == 'years':
-        if stepSize == 1:
-            modelTimeStep = 'annual'
-        else:
-            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
-
-    else:
-        errorMessage = 'the time unit ', units, ' is not currently handled in this version.'
-        raise Exception(errorMessage)
-
-    return modelTimeStep
-
-def decodeTimeFromString(time_string):
-    '''
-     Decodes string into a python datetime object
-     *Method:* tries a bunch of different time format possibilities and hopefully one of them will hit.
-     ::
-
-       **Input:**  time_string - a string that represents a date/time
-       
-       **Output:** mytime - a python datetime object
-    '''
-
-    # This will deal with times that use decimal seconds
-    if '.' in time_string:
-        time_string = time_string.split('.')[0] + '0'
-    else:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y-%m-%d %H:%M:%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y/%m/%d %H:%M:%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y%m%d %H:%M:%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y:%m:%d %H:%M:%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y%m%d%H%M%S')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y-%m-%d %H:%M')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-#KDW added
-    try:
-        mytime = time.strptime(time_string, '%Y-%m-%d')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-    try:
-        mytime = time.strptime(time_string, '%Y-%m-%d %H')
-        mytime = datetime.datetime(*mytime[0:6])
-        return mytime
-
-    except ValueError:
-        pass
-
-
-    print 'Error decoding time string: string does not match a predefined time format'
-    return 0
-
-def regrid_wrapper(regrid_choice, obsdata, obslats, obslons, wrfdata, wrflats, wrflons):
-   '''
-    Wrapper routine for regridding.
-    
-    Either regrids model to obs grid, or obs to model grid, depending on user choice
-    
-        Inputs::
-            regrid_choice - [0] = Regrid obs data onto model grid or 
-                            [1] = Regrid model data onto obs grid
-
-            obsdata,wrfdata - data arrays
-            obslats,obslons - observation lat,lon arrays
-            wrflats,wrflons - model lat,lon arrays
-   
-        Output::
-            rdata,rdata2 - regridded data
-   	        lats,lons    - latitudes and longitudes of regridded data
-   
-   '''
-
-   # Regrid obs data onto model grid
-   if(regrid_choice == '0'):
-
-      ndims = len(obsdata.shape)
-      if(ndims == 3):
-         newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]]
-	 nT = obsdata.shape[0]
-
-      if(ndims == 2):
-         newshape = [wrfdata.shape[0], wrfdata.shape[1]]
-	 nT = 0
-
-      rdata = wrfdata
-      lats, lons = wrflats, wrflons
-
-      # Create a new masked array with the required dimensions
-      tmp = np.zeros(newshape)
-      rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0))
-      tmp = 0 # free up some memory
-
-      rdata2[:] = 0.0
-
-      if(nT > 0):
-       for t in np.arange(nT):
-         rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
-
-      if(nT == 0):
-         rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
-
-
-   # Regrid model data onto obs grid
-   if(regrid_choice == '1'):
-      ndims = len(wrfdata.shape)
-      if(ndims == 3):
-         newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]]
-	 nT = obsdata.shape[0]
-
-      if(ndims == 2):
-         newshape = [obsdata.shape[0], obsdata.shape[1]]
-	 nT = 0
-
-      rdata2 = obsdata
-      lats, lons = obslats, obslons
-
-      tmp = np.zeros(newshape)
-      rdata = ma.MaskedArray(tmp, mask=(tmp == 0))
-      tmp = 0 # free up some memory
-
-      rdata[:] = 0.0
- 
-      if(nT > 0):
-        for t in np.arange(nT):
-	 rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
-
-      if(nT == 0):
-	rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
-
-  
-   return rdata, rdata2, lats, lons
-
-def extract_sub_time_selection(allTimes, subTimes, data):
-    '''
-     Routine to extract a sub-selection of times from a data array.
-
-     Example::
-           Suppose a data array has 30 time values for daily data for a whole month, 
-           but you only want the data from the 5th-15th of the month.
-    
-     Input::
-           allTimes - list of datetimes describing what times the data in the data array correspond to
-           subTimes - the times you want to extract data from
-           data     - the data array
-    
-     Output::
-           subdata     - subselection of data array
-    
-    '''
-    # Create new array to store the subselection
-    subdata = np.zeros([len(subTimes), data.shape[1], data.shape[2]])
-    
-    # Loop over all Times and when it is a member of the required subselection, copy the data across
-    i = 0     # counter for allTimes
-    subi = 0  # counter for subTimes
-    for t in allTimes:
-        if(np.setmember1d(allTimes, subTimes)):
-            subdata[subi, :, :] = data[i, :, :]
-            subi += 1
-        i += 1
-    
-    return subdata
-
-def calc_average_on_new_time_unit_K(data, dateList, unit):
-    '''
-    Routine to calculate averages on longer time units than the data exists on.
-    e.g. if the data is 6-hourly, calculate daily, or monthly means.
-     
-    Input:
-        data     - data values
-        dateList - list of python datetime structures corresponding to data values
-        unit     - string describing time unit to average onto 
-                      e.g. 'monthly', 'daily', 'pentad','annual','decadal'
-    Output:
-        meanstorem - numpy masked array of data values meaned over required time period
-        newTimesList - a list of python datetime objects representing the data in the new averagin period
-                           NB. currently set to beginning of averaging period, 
-                           i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
-    '''
-
-    # Check if the user-selected temporal grid is valid. If not, EXIT
-    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
-    if not acceptable:
-        print 'Error: unknown unit type selected for time averaging: EXIT'
-        return -1,-1,-1,-1
-
-    # Calculate arrays of: annual timeseries: year (2007,2007),
-    #                      monthly time series: year-month (200701,200702),
-    #                      daily timeseries:  year-month-day (20070101,20070102) 
-    #  depending on user-selected averaging period.
-
-    # Year list
-    if unit=='annual':
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year))
-        timeunits = np.array(timeunits, dtype=int)
-         
-    # YearMonth format list
-    if unit=='monthly':
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
-        timeunits = np.array(timeunits,dtype=int)
-
-    # YearMonthDay format list
-    if unit=='daily':
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
-        timeunits = np.array(timeunits,dtype=int)
-
-    # TODO: add pentad setting using Julian days?
-
-    # Full list: a special case
-    if unit == 'full':
-        comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(999)  # i.e. we just want the same value for all times.
-        timeunits = np.array(timeunits, dtype=int)
-
-    # empty list to store new times
-    newTimesList = []
-
-    # Decide whether or not you need to do any time averaging.
-    #   i.e. if data are already on required time unit then just pass data through and 
-    #        calculate and return representative datetimes.
-    processing_required = True
-    if len(timeunits)==(len(np.unique(timeunits))):
-        processing_required = False
-
-    # 1D data arrays, i.e. time series
-    if data.ndim==1:
-        # Create array to store the resulting data
-        meanstore = np.zeros(len(np.unique(timeunits)))
-  
-        # Calculate the means across each unique time unit
-        i=0
-        for myunit in np.unique(timeunits):
-            if processing_required:
-                datam=ma.masked_array(data,timeunits!=myunit)
-                meanstore[i] = ma.average(datam)
-            
-            # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit)==4:  # YYYY
-                yyyy = int(myunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit)==6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit)==8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit)==3:  # Full time range
-                # Need to set an appropriate time representing the mid-point of the entire time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-            i = i+1
-
-    # 3D data arrays
-    if data.ndim==3:
-        # datamask = create_mask_using_threshold(data,threshold=0.75)
-        # Create array to store the resulting data
-        meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
-  
-        # Calculate the means across each unique time unit
-        i=0
-        datamask_store = []
-        for myunit in np.unique(timeunits):
-            if processing_required:
-                mask = np.zeros_like(data)
-                mask[timeunits!=myunit,:,:] = 1.0
-                # Calculate missing data mask within each time unit...
-                datamask_at_this_timeunit = np.zeros_like(data)
-                datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
-                # Store results for masking later
-                datamask_store.append(datamask_at_this_timeunit[0])
-                # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
-                datam = ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit))
-                meanstore[i,:,:] = ma.average(datam,axis=0)
-            # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit)==4:  # YYYY
-                yyyy = int(smyunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit)==6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit)==8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit)==3:  # Full time range
-                # Need to set an appropriate time representing the mid-point of the entire time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-            i += 1
-
-        if not processing_required:
-            meanstorem = data
-
-        if processing_required:
-            # Create masked array (using missing data mask defined above)
-            datamask_store = np.array(datamask_store)
-            meanstorem = ma.masked_array(meanstore, datamask_store)
-
-    return meanstorem,newTimesList
-
-def decode_model_timesK(ifile,timeVarName,file_type):
-    #################################################################################################
-    #  Convert model times ('hours since 1900...', 'days since ...') into a python datetime structure
-    #  Input:
-    #      filelist - list of model files
-    #      timeVarName - name of the time variable in the model files
-    #  Output:
-    #      times  - list of python datetime objects describing model data times
-    #     Peter Lean February 2011
-    #################################################################################################
-    f = Nio.open_file(ifile,mode='r',options=None,format=file_type)
-    xtimes = f.variables[timeVarName]
-    timeFormat = xtimes.attributes['units']
-    #timeFormat = "days since 1979-01-01 00:00:00"
-    # search to check if 'since' appears in units
-    try:
-        sinceLoc = re.search('since',timeFormat).end()
-    except:
-        print 'Error decoding model times: time var attributes do not contain "since": Exit'
-        sys.exit()
-    # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units
-    # TODO:  Swap out this section for a set of if...elseif statements
-    units = ''
-    try:
-        _ = re.search('minutes',timeFormat).end()
-        units = 'minutes'
-    except:
-        pass
-    try:
-        _ = re.search('hours',timeFormat).end()
-        units = 'hours'
-    except:
-        pass
-    try:
-        _ = re.search('days',timeFormat).end()
-        units = 'days'
-    except:
-        pass
-    try:
-        _ = re.search('months',timeFormat).end()
-        units = 'months'
-    except:
-        pass
-    try:
-        _ = re.search('years',timeFormat).end()
-        units = 'years'
-    except:
-        pass
-    # cut out base time (the bit following 'since')
-    base_time_string = string.lstrip(timeFormat[sinceLoc:])
-    # decode base time
-    # 9/25/2012: datetime.timedelta has problem with the argument because xtimes is NioVariable.
-    # Soln (J. Kim): use a temp variable ttmp in the for loop, then convert it into an integer xtime.
-    base_time = decodeTimeFromString(base_time_string)
-    times=[]
-    for ttmp in xtimes[:]:
-        xtime = int(ttmp)
-        if(units=='minutes'):  
-            dt = datetime.timedelta(minutes=xtime); new_time = base_time + dt
-        if(units=='hours'):  
-            dt = datetime.timedelta(hours=xtime); new_time = base_time + dt
-        if(units=='days'):  
-            dt = datetime.timedelta(days=xtime); new_time = base_time + dt
-        if(units=='months'):   # NB. adding months in python is complicated as month length varies and hence ambigous.
-            # Perform date arithmatic manually
-            #  Assumption: the base_date will usually be the first of the month
-            #              NB. this method will fail if the base time is on the 29th or higher day of month
-            #                      -as can't have, e.g. Feb 31st.
-            new_month = int(base_time.month + xtime % 12)
-            new_year = int(math.floor(base_time.year + xtime / 12.))
-            #print type(new_year),type(new_month),type(base_time.day),type(base_time.hour),type(base_time.second)
-            new_time = datetime.datetime(new_year,new_month,base_time.day,base_time.hour,base_time.second,0)
-        if(units=='years'):
-            dt = datetime.timedelta(years=xtime); new_time = base_time + dt
-        times.append(new_time)
-    return times
-
-
-def regrid_in_time(data,dateList,unit):
-   #################################################################################################
-   # Routine to calculate averages on longer time units than the data exists on.
-   #  e.g. if the data is 6-hourly, calculate daily, or monthly means.
-   #  Input:
-   #         data     - data values
-   #         dateList - list of python datetime structures corresponding to data values
-   #         unit     - string describing time unit to average onto 
-   #                       e.g. 'monthly', 'daily', 'pentad','annual','decadal'
-   #  Output:
-   #         meanstorem - numpy masked array of data values meaned over required time period
-   #         newTimesList - a list of python datetime objects representing the data in the new averagin period
-   #                            NB. currently set to beginning of averaging period, 
-   #                            i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
-   # ..............................
-   #   Jinwon Kim, Sept 30, 2012
-   #   Created from calc_average_on_new_time_unit_K, Peter's original, with the modification below:
-   #   Instead of masked array used by Peter, use wh to defined data within the averaging range.
-   #################################################################################################
-
-   print '***  Begin calc_average_on_new_time_unit_KK  ***'
-   # Check if the user-selected temporal grid is valid. If not, EXIT
-   acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
-   if not acceptable:
-     print 'Error: unknown unit type selected for time averaging: EXIT'; return -1,-1,-1,-1
-
-   # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, [200701,200702]), daily (yyyymmdd, [20070101,20070102]) 
-   # "%02d" is similar to i2.2 in Fortran
-   if unit=='annual':                      # YYYY
-      timeunits = []
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year))
-   elif unit=='monthly':                   # YYYYMM
-      timeunits = []
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
-   elif unit=='daily':                     # YYYYMMDD
-      timeunits = []
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
-   elif unit=='full':                      # Full list: a special case
-      comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
-      timeunits = []
-      for i in np.arange(len(dateList)):
-         timeunits.append(999)             # i.e. we just want the same value for all times.
-   timeunits = np.array(timeunits,dtype=int)
-   print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits
-   #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits
-
-   # empty list to store time levels after temporal regridding
-   newTimesList = []
-
-   # Decide whether or not you need to do any time averaging.
-   #   i.e. if data are already on required time unit then just pass data through and calculate and return representative datetimes.
-   processing_required = True
-   if len(timeunits)==(len(np.unique(timeunits))):
-        processing_required = False
-   print 'processing_required= ',processing_required,': input time steps= ',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits))
-   #print 'np.unique(timeunits): ',np.unique(timeunits)
-   print 'data.ndim= ',data.ndim
-
-   if data.ndim==1:                                           # 1D data arrays, i.e. 1D time series
-      # Create array to store the temporally regridded data
-     meanstore = np.zeros(len(np.unique(timeunits)))
-      # Calculate the means across each unique time unit
-     i=0
-     for myunit in np.unique(timeunits):
-       if processing_required:
-         wh = timeunits==myunit
-         datam = data[wh]
-         meanstore[i] = ma.average(datam)
-        # construct new times list
-       smyunit = str(myunit)
-       if len(smyunit)==4:  # YYYY
-        yyyy = int(myunit[0:4])
-        mm = 1
-        dd = 1
-       if len(smyunit)==6:  # YYYYMM
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = 1
-       if len(smyunit)==8:  # YYYYMMDD
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = int(smyunit[6:8])
-       if len(smyunit)==3:  # Full time range
-        # Need to set an appropriate time representing the mid-point of the entire time span
-        dt = dateList[-1]-dateList[0]
-        halfway = dateList[0]+(dt/2)
-        yyyy = int(halfway.year)
-        mm = int(halfway.month)
-        dd = int(halfway.day)
-       newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-       i = i+1
-
-   elif data.ndim==3:                                         # 3D data arrays, i.e. 2D time series
-      # Create array to store the resulting data
-     meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
-     # Calculate the means across each unique time unit
-     i=0
-     datamask_store = []
-     for myunit in np.unique(timeunits):
-       if processing_required:
-         wh = timeunits==myunit
-         datam = data[wh,:,:]
-         meanstore[i,:,:] = ma.average(datam,axis=0)
-        # construct new times list
-       smyunit = str(myunit)
-       if len(smyunit)==4:  # YYYY
-        yyyy = int(smyunit[0:4])
-        mm = 1
-        dd = 1
-       if len(smyunit)==6:  # YYYYMM
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = 1
-       if len(smyunit)==8:  # YYYYMMDD
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = int(smyunit[6:8])
-       if len(smyunit)==3:  # Full time range
-        # Need to set an appropriate time representing the mid-point of the entire time span
-        dt = dateList[-1]-dateList[0]
-        halfway = dateList[0]+(dt/2)
-        yyyy = int(halfway.year)
-        mm = int(halfway.month)
-        dd = int(halfway.day)
-       newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-       i += 1
-
-     if not processing_required:
-        meanstorem = data
-     elif processing_required:
-        meanstorem = meanstore
-
-   print '***  End calc_average_on_new_time_unit_KK  ***'
-   return meanstorem,newTimesList