From d8a4325a6eeff9a9af93eb19787ede26836018a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20B=C3=A4rring?= Date: Fri, 5 May 2023 22:05:22 +0200 Subject: [PATCH 1/8] fix to xml file (remove ctrl-M), cf CF-conventions/discuss/#229: https://github.com/cf-convention/discuss/issues/229 Standard names: hard to spot "typo" in `surface_upward_mass_flux_of_methane_due_to_emission_from_fires` --- etc/cf-standard-name-table.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/etc/cf-standard-name-table.xml b/etc/cf-standard-name-table.xml index 3b145ae86e..e40141043e 100644 --- a/etc/cf-standard-name-table.xml +++ b/etc/cf-standard-name-table.xml @@ -24594,8 +24594,7 @@ kg m-2 s-1 - Methane emitted from the surface, generated by biomass burning (fires). Positive direction upwards. -The surface called "surface" means the lower boundary of the atmosphere. "Upward" indicates a vector component which is positive when directed upward (negative downward). In accordance with common usage in geophysical disciplines, "flux" implies per unit area, called "flux density" in physics. The chemical formula for methane is CH4. The mass is the total mass of the molecules. The specification of a physical process by the phrase "due_to_" process means that the quantity named is a single term in a sum of terms which together compose the general quantity named by omitting the phrase. "Emission" means emission from a primary source located anywhere within the atmosphere, including at the lower boundary (i.e. the surface of the earth). "Emission" is a process entirely distinct from "re-emission" which is used in some standard names. The term "fires" means all biomass fires, whether naturally occurring or ignited by humans. The precise conditions under which fires produce and consume methane can vary between models. + Methane emitted from the surface, generated by biomass burning (fires). Positive direction upwards. The surface called "surface" means the lower boundary of the atmosphere. "Upward" indicates a vector component which is positive when directed upward (negative downward). In accordance with common usage in geophysical disciplines, "flux" implies per unit area, called "flux density" in physics. The chemical formula for methane is CH4. The mass is the total mass of the molecules. The specification of a physical process by the phrase "due_to_" process means that the quantity named is a single term in a sum of terms which together compose the general quantity named by omitting the phrase. "Emission" means emission from a primary source located anywhere within the atmosphere, including at the lower boundary (i.e. the surface of the earth). "Emission" is a process entirely distinct from "re-emission" which is used in some standard names. The term "fires" means all biomass fires, whether naturally occurring or ignited by humans. The precise conditions under which fires produce and consume methane can vary between models. From 3adf5df1c991c42d0f2cf90ec0e64e08bd0e2742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20B=C3=A4rring?= Date: Wed, 10 May 2023 11:33:13 +0200 Subject: [PATCH 2/8] Reorganised generate_std_names.py: * new optional argument -d/--descr for including standard name descriptions * including the description of all standard names * including standard name table version and related info. * now contains several variables: - variables (starting with underscore) used in the processing - VERSION (dict) - CONVENTIONS_STRING (str) - STD_NAME (dict) - ALIASES (dict) - optionally DESCRIPTION (dict) harmonize quotation marks to single quotes in description string --- tools/generate_std_names.py | 107 +++++++++++++++++++++++++++--------- 1 file changed, 80 insertions(+), 27 deletions(-) diff --git a/tools/generate_std_names.py b/tools/generate_std_names.py index 08bacbe1e0..9fa1de6cf2 100644 --- a/tools/generate_std_names.py +++ b/tools/generate_std_names.py @@ -7,8 +7,9 @@ A script to convert the standard names information from the provided XML file into a Python dictionary format. -Takes two arguments: the first is the XML file to process and the second -is the name of the file to write the Python dictionary file into. +Takes two or three arguments: the first is the XML file to process and the second +is the name of the file to write the Python dictionary file into. The optional +third argument, '--descr', includes the standard name descriptions in the file. By default, Iris will use the source XML file: etc/cf-standard-name-table.xml @@ -20,23 +21,28 @@ """ import argparse -import pprint import xml.etree.ElementTree as ET -STD_VALUES_FILE_TEMPLATE = ''' +STD_NAME_TABLE_FILE_TEMPLATE = ''' # Copyright Iris contributors # # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -This file contains a dictionary of standard value names that are mapped -to another dictionary of other standard name attributes. Currently only -the `canonical_unit` exists in these attribute dictionaries. - This file is automatically generated. Do not edit this file by hand. +The file contains the following elements, formatted as python code: + * Information on the source standard name table version. + * A dictionary of standard value names that are mapped + to another dictionary of other standard name attributes. + Currently only the `canonical_unit` exists in these attribute + dictionaries. + * A dictionary of aliased standard names that are mapped to the + current standad name. + * Optionally, a dictionary of standard names mapped to their descriptions. + The file will be generated during a standard build/installation:: python setup.py build @@ -49,10 +55,17 @@ Or for more control (e.g. to use an alternative XML file) via:: python tools/generate_std_names.py XML_FILE MODULE_FILE - """ +'''.lstrip() + + +def found_or_none(elem): + return elem.text if elem is not None else None + -STD_NAMES = '''.lstrip() +# Take care of inconsistent quotes in standard name descriptions. +def replace_quote(txt): + return txt.replace('"', "'") if txt is not None else None def process_name_table(tree, element_name, *child_elements): @@ -62,32 +75,69 @@ def process_name_table(tree, element_name, *child_elements): """ for elem in tree.iterfind(element_name): sub_section = {} - for child_elem in child_elements: - found_elem = elem.find(child_elem) - sub_section[child_elem] = found_elem.text if found_elem is not None else None - + sub_section[child_elem] = found_or_none(elem.find(child_elem)) yield {elem.get("id") : sub_section} -def to_dict(infile, outfile): - values = {} - aliases = {} - +def prettydict(outfile, varname, data): + """Pretty formatted output of the data (dict) assigned to the variable 'varname'.""" + outfile.write(f'{varname} = {{\n') + for k, v in dict(sorted(data.items())).items(): + outfile.write(f' "{k}": "{v}",\n') + outfile.write("}\n\n") + + +def decode_version(outfile, tree): + """Decode the version information in the xml header information.""" + version = {} + for elem in ["table_name", "version_number", "last_modified", "institution", "contact"]: + version[elem] = found_or_none(tree.find(elem)) + if version["table_name"] is None: + if (version["institution"] == "Centre for Environmental Data Analysis" + and version["contact"] == "support@ceda.ac.uk"): + version["table_name"] = "CF-StdNameTable" + else: + version["table_name"] = "USER-StdNameTable" + prettydict(outfile, "VERSION", version) + version_string = "-".join(version[k] for k in ["table_name", "version_number"]) + outfile.write(f'CONVENTIONS_STRING = "{version_string}"\n\n') + + +def write_useful_variables(outfile): + outfile.write( + '\n# The following three variables are used for processing the standard names information below\n' + '_ALTERNATIVE_MODES = ["accept", "warn", "replace"]\n' + '_DEFAULT = "warn"\n' + '_MODE = _DEFAULT\n\n' + ) + + +def decode_standard_name_table(infile, outfile, description=False): + """Process the different parts of the xml file.""" tree = ET.parse(infile) + outfile.write(STD_NAME_TABLE_FILE_TEMPLATE) + write_useful_variables(outfile) + decode_version(outfile, tree) + + data = {} for section in process_name_table(tree, 'entry', 'canonical_units'): - values.update(section) + data.update(section) + prettydict(outfile, "STD_NAMES", data) + data = {} for section in process_name_table(tree, 'alias', 'entry_id'): - aliases.update(section) - - for key, valued in aliases.items(): - values.update({ - key : {'canonical_units' : values.get(valued['entry_id']).get('canonical_units')} - }) + for k, v in section.items(): + data.update({k: v["entry_id"]}) + prettydict(outfile, "ALIASES", data) - outfile.write(STD_VALUES_FILE_TEMPLATE + pprint.pformat(values)) + if description: + data = {} + for section in process_name_table(tree, 'entry', 'description'): + for k, v in section.items(): + data.update({k: replace_quote(v["description"])}) + prettydict(outfile, "DESCRIPTIONS", data) if __name__ == "__main__": @@ -97,10 +147,13 @@ def to_dict(infile, outfile): help='Path to CF standard name XML') parser.add_argument('output', metavar='OUTPUT', help='Path to resulting Python code') + parser.add_argument('-d', '--descr', action="store_true", + help="Include standard name descriptions") args = parser.parse_args() encoding = {'encoding': 'utf-8'} with open(args.input, 'r', **encoding) as in_fh: with open(args.output, 'w', **encoding) as out_fh: - to_dict(in_fh, out_fh) + decode_standard_name_table(in_fh, out_fh, args.descr) + pass From e94d6543b3689a05d765f871c0bcd6c5d72a6bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20B=C3=A4rring?= Date: Wed, 10 May 2023 11:31:24 +0200 Subject: [PATCH 3/8] added standard name handling functions in standard_name_table.py --- lib/iris/std_name_table.py | 124 +++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 lib/iris/std_name_table.py diff --git a/lib/iris/std_name_table.py b/lib/iris/std_name_table.py new file mode 100644 index 0000000000..1c405bc9b9 --- /dev/null +++ b/lib/iris/std_name_table.py @@ -0,0 +1,124 @@ +import warnings + +import iris.std_names + + +def get_convention(): + """Return the 'Conventions' string of the CF standard name table.""" + try: + convention = iris.std_names.CONVENTIONS_STRING + except AttributeError: + convention = None + return convention + + +def set_alias_processing(mode): + """ + Set how standard name aliases are handled. + + Arg: + + * mode `string` specifying handling: + 'accept' - aliases are handled as any other standard name, + 'warn' - as above, but a warning is issued, + 'replace' - aliased standard names are replaced with the current one. + """ + if not hasattr(iris.std_names, "ALIASES"): + raise ValueError("The standard name table has no aliases defined.") + if mode == "default": + iris.std_names._MODE = iris.std_names._DEFAULT + elif mode in iris.std_names._ALTERNATIVE_MODES: + iris.std_names._MODE = mode + else: + raise ValueError( + "{!r} is not a valid alternative for processing " + "of standard name aliases.".format(mode) + ) + + +def get_description(name): + """ + Return the standard name description as a `string`. + + Arg: + + * name `string` containing the standard name. + + Requesting the description of a aliased standard name results in a error + if the alias proceessing is set to "replace", because the aliased standard + name should already have been replaced. + """ + if not hasattr(iris.std_names, "DESCRIPTIONS"): + return None + + if name in iris.std_names.STD_NAMES: + descr = iris.std_names.DESCRIPTIONS[name] + action = iris.std_names._ALTERNATIVE_MODES[0] + elif hasattr(iris.std_names, "ALIASES"): + if name in iris.std_names.ALIASES: + descr = iris.std_names.DESCRIPTIONS[iris.std_names.ALIASES[name]] + action = iris.std_names._MODE + else: + action = iris.std_names._ALTERNATIVE_MODES[2] + else: + action = iris.std_names._ALTERNATIVE_MODES[2] + + if action == iris.std_names._ALTERNATIVE_MODES[1]: + msg = ( + "\nStandard name {!r} is aliased and is \nreplaced by {!r}.\n" + "The description for the latter will be used." + ) + warnings.warn(msg.format(name, iris.std_names.ALIASES[name])) + elif action == iris.std_names._ALTERNATIVE_MODES[2]: + raise ValueError( + "{!r} is not a valid standard name (or it may have been aliased).".format( + name + ) + ) + return descr + + +def check_valid_std_name(name): + """ + Returning standard name as a `string`. + + Arg: + + * name `string` containing the prospective standard name. + + Depending on the setting of the alias proceessing the following will + happen if 'name' is an aliased standard name: + "accept" - the aliased standard name is accepted as valid and returned, + "warn" - a warning is issued and the valid standard name is returned, + "replace" - the valid standard name is returned without warning. + + When 'name' is neither a standard name nor an alias an error results. + """ + if name in iris.std_names.STD_NAMES: + std_name = name + action = iris.std_names._ALTERNATIVE_MODES[0] + elif hasattr(iris.std_names, "ALIASES"): + if name in iris.std_names.ALIASES: + if iris.std_names._MODE == iris.std_names._ALTERNATIVE_MODES[0]: + std_name = name + action = iris.std_names._ALTERNATIVE_MODES[0] + else: + std_name = iris.std_names.ALIASES[name] + if ( + iris.std_names._MODE + == iris.std_names._ALTERNATIVE_MODES[1] + ): + action = iris.std_names._MODE + else: + action = iris.std_names._ALTERNATIVE_MODES[0] + else: + action = iris.std_names._ALTERNATIVE_MODES[2] + else: + action = iris.std_names._ALTERNATIVE_MODES[2] + + if action == iris.std_names._ALTERNATIVE_MODES[2]: + raise ValueError("{repr(name)} is not a valid standard_name.") + elif action == iris.std_names._ALTERNATIVE_MODES[1]: + msg = "\nThe standard name {!r} is aliased and is \nreplaced by {!r}." + warnings.warn(msg.format(name, iris.std_names.ALIASES[name])) + return std_name From dc989050da10cb6c37b49461b753c5d02a55059c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20B=C3=A4rring?= Date: Wed, 10 May 2023 01:12:43 +0200 Subject: [PATCH 4/8] Updated mixin.py to use new standard name handling --- lib/iris/common/mixin.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/iris/common/mixin.py b/lib/iris/common/mixin.py index 4c19dd756b..1bc03ae703 100644 --- a/lib/iris/common/mixin.py +++ b/lib/iris/common/mixin.py @@ -8,12 +8,13 @@ """ + from collections.abc import Mapping from functools import wraps import cf_units -import iris.std_names +from iris.std_name_table import check_valid_std_name from .metadata import BaseMetadata @@ -23,7 +24,6 @@ def _get_valid_standard_name(name): # Standard names are optionally followed by a standard name # modifier, separated by one or more blank spaces - if name is not None: # Supported standard name modifiers. Ref: [CF] Appendix C. valid_std_name_modifiers = [ @@ -35,21 +35,21 @@ def _get_valid_standard_name(name): name_groups = name.split(maxsplit=1) if name_groups: - std_name = name_groups[0] - name_is_valid = std_name in iris.std_names.STD_NAMES + std_name = check_valid_std_name(name_groups[0]) try: std_name_modifier = name_groups[1] except IndexError: - pass # No modifier + result = std_name else: - name_is_valid &= std_name_modifier in valid_std_name_modifiers - - if not name_is_valid: - raise ValueError( - "{!r} is not a valid standard_name".format(name) - ) - - return name + if std_name_modifier in valid_std_name_modifiers: + result = f"{std_name} {std_name_modifier}" + else: + raise ValueError( + f"{repr(std_name_modifier)} is not a valid standard_name" + ) + else: + result = None + return result class LimitedAttributeDict(dict): From 7b5467c5105b8be819e03d3f8bb6b70a2c16ec89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20B=C3=A4rring?= Date: Wed, 10 May 2023 11:46:19 +0200 Subject: [PATCH 5/8] Naively placed 'import iris.std_name_table' in iris.__init__ --- lib/iris/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py index 38465472ee..0644a83a0b 100644 --- a/lib/iris/__init__.py +++ b/lib/iris/__init__.py @@ -100,6 +100,7 @@ def callback(cube, field, filename): import iris._constraints import iris.config import iris.io +import iris.std_name_table from ._deprecation import IrisDeprecation, warn_deprecated From 1ab8fd921d1e21c3bbfd86c28a7ec87f4909b406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20B=C3=A4rring?= Date: Thu, 11 May 2023 10:24:20 +0200 Subject: [PATCH 6/8] Cleanup of structure --- lib/iris/std_name_table.py | 67 ++++++++++++++----------------------- tools/generate_std_names.py | 9 +++-- 2 files changed, 33 insertions(+), 43 deletions(-) diff --git a/lib/iris/std_name_table.py b/lib/iris/std_name_table.py index 1c405bc9b9..9e14759c90 100644 --- a/lib/iris/std_name_table.py +++ b/lib/iris/std_name_table.py @@ -43,44 +43,35 @@ def get_description(name): Arg: * name `string` containing the standard name. - - Requesting the description of a aliased standard name results in a error - if the alias proceessing is set to "replace", because the aliased standard - name should already have been replaced. """ if not hasattr(iris.std_names, "DESCRIPTIONS"): return None + error = False if name in iris.std_names.STD_NAMES: descr = iris.std_names.DESCRIPTIONS[name] - action = iris.std_names._ALTERNATIVE_MODES[0] elif hasattr(iris.std_names, "ALIASES"): if name in iris.std_names.ALIASES: descr = iris.std_names.DESCRIPTIONS[iris.std_names.ALIASES[name]] - action = iris.std_names._MODE + if iris.std_names._MODE == iris.std_names._REPLACE: + msg = ( + "\nStandard name {!r} is aliased and is \nreplaced by {!r}.\n" + "The description for the latter will be used." + ) + warnings.warn(msg.format(name, iris.std_names.ALIASES[name])) else: - action = iris.std_names._ALTERNATIVE_MODES[2] + error = True else: - action = iris.std_names._ALTERNATIVE_MODES[2] + error = True - if action == iris.std_names._ALTERNATIVE_MODES[1]: - msg = ( - "\nStandard name {!r} is aliased and is \nreplaced by {!r}.\n" - "The description for the latter will be used." - ) - warnings.warn(msg.format(name, iris.std_names.ALIASES[name])) - elif action == iris.std_names._ALTERNATIVE_MODES[2]: - raise ValueError( - "{!r} is not a valid standard name (or it may have been aliased).".format( - name - ) - ) + if error: + raise ValueError("{!r} is not a valid standard name.".format(name)) return descr def check_valid_std_name(name): """ - Returning standard name as a `string`. + Check and return if argument is a valid standard name or alias. Arg: @@ -89,36 +80,30 @@ def check_valid_std_name(name): Depending on the setting of the alias proceessing the following will happen if 'name' is an aliased standard name: "accept" - the aliased standard name is accepted as valid and returned, - "warn" - a warning is issued and the valid standard name is returned, + "warn" - a warning is issued, otherwise the same as "accept", "replace" - the valid standard name is returned without warning. When 'name' is neither a standard name nor an alias an error results. """ + error = False if name in iris.std_names.STD_NAMES: std_name = name - action = iris.std_names._ALTERNATIVE_MODES[0] elif hasattr(iris.std_names, "ALIASES"): if name in iris.std_names.ALIASES: - if iris.std_names._MODE == iris.std_names._ALTERNATIVE_MODES[0]: - std_name = name - action = iris.std_names._ALTERNATIVE_MODES[0] - else: + if iris.std_names._MODE == iris.std_names._REPLACE: std_name = iris.std_names.ALIASES[name] - if ( - iris.std_names._MODE - == iris.std_names._ALTERNATIVE_MODES[1] - ): - action = iris.std_names._MODE - else: - action = iris.std_names._ALTERNATIVE_MODES[0] + else: + std_name = name + if iris.std_names._MODE == iris.std_names._WARN: + msg = "\nThe standard name {!r} is aliased should be \nreplaced by {!r}." + warnings.warn( + msg.format(name, iris.std_names.ALIASES[name]) + ) else: - action = iris.std_names._ALTERNATIVE_MODES[2] + error = True else: - action = iris.std_names._ALTERNATIVE_MODES[2] + error = True - if action == iris.std_names._ALTERNATIVE_MODES[2]: - raise ValueError("{repr(name)} is not a valid standard_name.") - elif action == iris.std_names._ALTERNATIVE_MODES[1]: - msg = "\nThe standard name {!r} is aliased and is \nreplaced by {!r}." - warnings.warn(msg.format(name, iris.std_names.ALIASES[name])) + if error: + raise ValueError("{!r} is not a valid standard_name.".format(name)) return std_name diff --git a/tools/generate_std_names.py b/tools/generate_std_names.py index 9fa1de6cf2..fb3aad7a7a 100644 --- a/tools/generate_std_names.py +++ b/tools/generate_std_names.py @@ -34,6 +34,8 @@ This file is automatically generated. Do not edit this file by hand. The file contains the following elements, formatted as python code: + * A few variablles used internally in the standard name processing. + These beginn with an underscore. * Information on the source standard name table version. * A dictionary of standard value names that are mapped to another dictionary of other standard name attributes. @@ -106,8 +108,11 @@ def decode_version(outfile, tree): def write_useful_variables(outfile): outfile.write( - '\n# The following three variables are used for processing the standard names information below\n' - '_ALTERNATIVE_MODES = ["accept", "warn", "replace"]\n' + '\n# The following variables are used for processing the standard names information below\n' + '_ACCEPT = "accept"\n' + '_WARN = "warn"\n' + '_REPLACE ="replace"\n' + '_ALTERNATIVE_MODES = [_ACCEPT, _WARN, _REPLACE]\n' '_DEFAULT = "warn"\n' '_MODE = _DEFAULT\n\n' ) From 235ac029b5c74e396d3dc6ee5533cd7562e9ebb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20B=C3=A4rring?= Date: Thu, 11 May 2023 11:22:54 +0200 Subject: [PATCH 7/8] aligned common/mixin.py with original --- lib/iris/common/mixin.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/lib/iris/common/mixin.py b/lib/iris/common/mixin.py index 1bc03ae703..029a9a9294 100644 --- a/lib/iris/common/mixin.py +++ b/lib/iris/common/mixin.py @@ -8,7 +8,6 @@ """ - from collections.abc import Mapping from functools import wraps @@ -24,6 +23,7 @@ def _get_valid_standard_name(name): # Standard names are optionally followed by a standard name # modifier, separated by one or more blank spaces + if name is not None: # Supported standard name modifiers. Ref: [CF] Appendix C. valid_std_name_modifiers = [ @@ -35,21 +35,24 @@ def _get_valid_standard_name(name): name_groups = name.split(maxsplit=1) if name_groups: - std_name = check_valid_std_name(name_groups[0]) + std_name = name_groups[0] + try: + new_std_name = check_valid_std_name(name_groups[0]) + name = name.replace(std_name, new_std_name) + except ValueError: + raise ValueError( + "{!r} is not a valid standard_name".format(name) + ) try: std_name_modifier = name_groups[1] except IndexError: - result = std_name + pass # No modifier else: - if std_name_modifier in valid_std_name_modifiers: - result = f"{std_name} {std_name_modifier}" - else: + if std_name_modifier not in valid_std_name_modifiers: raise ValueError( - f"{repr(std_name_modifier)} is not a valid standard_name" + "{!r} is not a valid standard_name".format(name) ) - else: - result = None - return result + return name class LimitedAttributeDict(dict): From d2216e7a8b8db4c253cd53274882cd56b71d5694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20B=C3=A4rring?= Date: Thu, 11 May 2023 11:39:04 +0200 Subject: [PATCH 8/8] Added license header to std_name_table.py --- lib/iris/std_name_table.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/iris/std_name_table.py b/lib/iris/std_name_table.py index 9e14759c90..5a1a0ee13f 100644 --- a/lib/iris/std_name_table.py +++ b/lib/iris/std_name_table.py @@ -1,3 +1,12 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +Handling of standard names and standard name aliases. +""" + import warnings import iris.std_names