diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml new file mode 100644 index 00000000..ac32e90a --- /dev/null +++ b/.github/workflows/pr.yaml @@ -0,0 +1,62 @@ +name: Test Pull Request + +on: [pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.8'] + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Cache .cache/pip + uses: actions/cache@v3 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_py_${{ matrix.python-version }} + - name: Install package + run: | + touch requirements.txt + pip install -r requirements.txt black==23.3 flake8==6.0 + - name: Flake8 + run: flake8 --ignore=C901,W503,E501 + # at the moment I would prefer not to enable this + # - name: Black + # uses: psf/black@23.3.0 + + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.8'] + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Cache .cache/pip + uses: actions/cache@v3 + id: cache-pip + with: + path: ~/.cache/pip + key: pip_cache_py_${{ matrix.python-version }} + - name: setup + run: | + pip install -r requirements.txt . + pip install -r requirements-dev.txt + #TODO reenable: works only on branch? + # - name: diff_pydocstyle_report + # run: make diff_pydocstyle_report + - name: Test + run: make test + - name: Planemo lint tools + run: planemo l tests/test-data/ + # planemo test content of tests/test-data (this is OK, because the previous + # tests ensure equality of the xmls that are generated and those in the package) + - name: planemo test tools + run: export PATH=$(pwd)/tests/test-data:$PATH && planemo t tests/test-data/ \ No newline at end of file diff --git a/ctdconverter/common/logger.py b/ctdconverter/common/logger.py deleted file mode 100644 index d1573c3a..00000000 --- a/ctdconverter/common/logger.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python -import sys - -MESSAGE_INDENTATION_INCREMENT = 2 - - -def _get_indented_text(text, indentation_level): - return ("%(indentation)s%(text)s" % - {"indentation": " " * (MESSAGE_INDENTATION_INCREMENT * indentation_level), - "text": text}) - - -def warning(warning_text, indentation_level=0): - sys.stdout.write(_get_indented_text("WARNING: %s\n" % warning_text, indentation_level)) - - -def error(error_text, indentation_level=0): - sys.stderr.write(_get_indented_text("ERROR: %s\n" % error_text, indentation_level)) - - -def info(info_text, indentation_level=0): - sys.stdout.write(_get_indented_text("INFO: %s\n" % info_text, indentation_level)) diff --git a/ctdconverter/common/utils.py b/ctdconverter/common/utils.py index 7bddc814..dd8f5213 100644 --- a/ctdconverter/common/utils.py +++ b/ctdconverter/common/utils.py @@ -1,8 +1,10 @@ #!/usr/bin/env python import json +import logging import ntpath import operator import os +import re from functools import reduce from CTDopts.CTDopts import ( @@ -16,7 +18,6 @@ ) from lxml import etree -from ..common import logger from ..common.exceptions import ApplicationException @@ -109,7 +110,7 @@ def validate_path_exists(path): def validate_argument_is_directory(args, argument_name): file_name = getattr(args, argument_name) - logger.info("REALPATH %s" % os.path.realpath(file_name)) + logging.info("REALPATH %s" % os.path.realpath(file_name)) if file_name is not None and os.path.isdir(os.path.realpath(file_name)): raise ApplicationException("The provided output file name (%s) points to a directory." % file_name) @@ -150,12 +151,12 @@ def parse_input_ctds(xsd_location, input_ctds, output_destination, output_file_e schema = None if xsd_location is not None: try: - logger.info("Loading validation schema from %s" % xsd_location, 0) + logging.info("Loading validation schema from %s" % xsd_location) schema = etree.XMLSchema(etree.parse(xsd_location)) except Exception as e: - logger.error("Could not load validation schema {}. Reason: {}".format(xsd_location, str(e)), 0) + logging.error("Could not load validation schema {}. Reason: {}".format(xsd_location, str(e))) else: - logger.warning("Validation against a schema has not been enabled.", 0) + logging.warning("Validation against a schema has not been enabled.") for input_ctd in input_ctds: if schema is not None: @@ -165,7 +166,7 @@ def parse_input_ctds(xsd_location, input_ctds, output_destination, output_file_e # if multiple inputs are being converted, we need to generate a different output_file for each input if is_converting_multiple_ctds: output_file = os.path.join(output_file, get_filename_without_suffix(input_ctd) + "." + output_file_extension) - logger.info("Parsing %s" % input_ctd) + logging.info("Parsing %s" % input_ctd) model = None try: @@ -184,7 +185,14 @@ def parse_input_ctds(xsd_location, input_ctds, output_destination, output_file_e def flatten_list_of_lists(args, list_name): - setattr(args, list_name, [item for sub_list in getattr(args, list_name) for item in sub_list]) + lst = getattr(args, list_name) + ret = [] + for e in lst: + if isinstance(e, list): + ret.extend(e) + else: + ret.append(e) + setattr(args, list_name, ret) def validate_against_schema(ctd_file, schema): @@ -197,7 +205,7 @@ def validate_against_schema(ctd_file, schema): def add_common_parameters(parser, version, last_updated): parser.add_argument("FORMAT", default=None, help="Output format (mandatory). Can be one of: cwl, galaxy.") - parser.add_argument("-i", "--input", dest="input_files", default=[], required=True, nargs="+", action="append", + parser.add_argument("-i", "--input", dest="input_files", required=True, nargs="+", action="append", help="List of CTD files to convert.") parser.add_argument("-o", "--output-destination", dest="output_destination", required=True, help="If multiple input files are given, then a folder in which all converted " @@ -367,72 +375,23 @@ def extract_and_flatten_parameters(ctd_model, nodes=False): # return reversed(parameters) -# some parameters are mapped to command line options, this method helps resolve those mappings, if any -def resolve_param_mapping(param, ctd_model, fix_underscore=False): - # go through all mappings and find if the given param appears as a reference name in a mapping element - param_mapping = None - ctd_model_cli = [] - if hasattr(ctd_model, "cli"): - ctd_model_cli = ctd_model.cli - - for cli_element in ctd_model_cli: - for mapping_element in cli_element.mappings: - if mapping_element.reference_name == param.name: - if param_mapping is not None: - logger.warning("The parameter %s has more than one mapping in the section. " - "The first found mapping, %s, will be used." % (param.name, param_mapping), 1) - else: - param_mapping = cli_element.option_identifier - if param_mapping is not None: - ret = param_mapping - else: - ret = param.name - if fix_underscore and ret.startswith("_"): - return ret[1:] - else: - return ret - - -def _extract_param_cli_name(param, ctd_model, fix_underscore=False): - # we generate parameters with colons for subgroups, but not for the two topmost parents (OpenMS legacy) - if type(param.parent) == ParameterGroup: - if hasattr(ctd_model, "cli") and ctd_model.cli: - logger.warning("Using nested parameter sections (NODE elements) is not compatible with ", 1) - return ":".join(extract_param_path(param, fix_underscore)[:-1]) + ":" + resolve_param_mapping(param, ctd_model, fix_underscore) - else: - return resolve_param_mapping(param, ctd_model, fix_underscore) - - -def extract_param_path(param, fix_underscore=False): +def extract_param_path(param, fix_dedup_prefix=False): pl = param.get_lineage(name_only=True) - if fix_underscore: + if fix_dedup_prefix: for i, p in enumerate(pl): - if p.startswith("_"): - pl[i] = pl[i][1:] + dedup_match = re.match("DEDUP_[0-9]+_DEDUP_(.*)", p) + if dedup_match: + pl[i] = dedup_match.group(1) return pl -# if type(param.parent) == ParameterGroup or type(param.parent) == Parameters: -# if not hasattr(param.parent.parent, "parent"): -# return [param.name] -# elif not hasattr(param.parent.parent.parent, "parent"): -# return [param.name] -# else: -# return extract_param_path(param.parent) + [param.name] -# else: -# return [param.name] -def extract_param_name(param, fix_underscore=False): +def extract_param_name(param, fix_dedup_prefix=False): # we generate parameters with colons for subgroups, but not for the two topmost parents (OpenMS legacy) - return ":".join(extract_param_path(param, fix_underscore)) + return ":".join(extract_param_path(param, fix_dedup_prefix)) def extract_command_line_prefix(param, ctd_model): - param_name = extract_param_name(param, True) - param_cli_name = _extract_param_cli_name(param, ctd_model, True) - if param_name == param_cli_name: - # there was no mapping, so for the cli name we will use a '-' in the prefix - param_cli_name = "-" + param_name - return param_cli_name + return "-" + extract_param_name(param, True) def indent(s, indentation=" "): diff --git a/ctdconverter/convert.py b/ctdconverter/convert.py index 935639b6..c647adcd 100644 --- a/ctdconverter/convert.py +++ b/ctdconverter/convert.py @@ -1,3 +1,4 @@ +import logging import os import sys import traceback @@ -75,7 +76,7 @@ def main(argv=None): # at this point we cannot parse the arguments, because each converter takes different arguments, meaning each # converter will register its own parameters after we've registered the basic ones... we have to do it old school if len(argv) < 2: - utils.logger.error("Not enough arguments provided") + logging.error("Not enough arguments provided") print("\nUsage: $ CTDConverter [TARGET] [ARGUMENTS]\n\n" "Where:\n" " target: one of 'cwl' or 'galaxy'\n\n" @@ -94,10 +95,10 @@ def main(argv=None): # print(program_license) # return 0 else: - utils.logger.error("Unrecognized target engine. Supported targets are 'cwl' and 'galaxy'.") + logging.error("Unrecognized target engine. Supported targets are 'cwl' and 'galaxy'.") return 1 - utils.logger.info("Using %s converter" % target) + logging.info("Using %s converter" % target) try: # Setup argument parser @@ -126,20 +127,20 @@ def main(argv=None): except ApplicationException as e: traceback.print_exc() - utils.logger.error("CTDConverter could not complete the requested operation.", 0) - utils.logger.error("Reason: " + e.msg, 0) + logging.error("CTDConverter could not complete the requested operation.") + logging.error("Reason: " + e.msg) return 1 except ModelError as e: traceback.print_exc() - utils.logger.error("There seems to be a problem with one of your input CTDs.", 0) - utils.logger.error("Reason: " + e.msg, 0) + logging.error("There seems to be a problem with one of your input CTDs.") + logging.error("Reason: " + e.msg) return 1 except Exception as e: traceback.print_exc() - utils.logger.error("CTDConverter could not complete the requested operation.", 0) - utils.logger.error("Reason: " + e.msg, 0) + logging.error("CTDConverter could not complete the requested operation.") + logging.error("Reason: " + e.msg) return 2 diff --git a/ctdconverter/cwl/converter.py b/ctdconverter/cwl/converter.py index eaa55ce0..595d97a7 100755 --- a/ctdconverter/cwl/converter.py +++ b/ctdconverter/cwl/converter.py @@ -7,7 +7,7 @@ # which kind of sucks, because this seems to be the way to state that a parameter is truly optional and has no default # since cwlgen is just "fancy classes" around the yaml.dump() method, we implemented our own generation of yaml - +import logging import ruamel.yaml as yaml from CTDopts.CTDopts import ( _Choices, @@ -17,10 +17,7 @@ ) from .. import __version__ as version -from ..common import ( - logger, - utils -) +from ..common import utils # all cwl-related properties are defined here @@ -75,10 +72,10 @@ def convert_models(args, parsed_ctds): origin_file = parsed_ctd.input_file output_file = parsed_ctd.suggested_output_file - logger.info("Converting {} (source {})".format(model.name, utils.get_filename(origin_file))) + logging.info("Converting {} (source {})".format(model.name, utils.get_filename(origin_file))) cwl_tool = convert_to_cwl(model, args) - logger.info("Writing to %s" % utils.get_filename(output_file), 1) + logging.info("Writing to %s" % utils.get_filename(output_file)) stream = open(output_file, 'w') stream.write(CWL_SHEBANG + '\n\n') diff --git a/ctdconverter/galaxy/converter.py b/ctdconverter/galaxy/converter.py index d0744000..75e609c0 100755 --- a/ctdconverter/galaxy/converter.py +++ b/ctdconverter/galaxy/converter.py @@ -1,8 +1,10 @@ #!/usr/bin/env python import copy import json +import logging import os import os.path +import random import re import sys from collections import OrderedDict @@ -30,7 +32,6 @@ ) from ..common import ( - logger, utils ) from ..common.exceptions import ( @@ -47,9 +48,11 @@ STDIO_MACRO_NAME = "stdio" REQUIREMENTS_MACRO_NAME = "requirements" -ADVANCED_OPTIONS_NAME = "adv_opts_" +ADVANCED_OPTIONS_NAME = "adv_opts" -REQUIRED_MACROS = [REQUIREMENTS_MACRO_NAME, STDIO_MACRO_NAME, ADVANCED_OPTIONS_NAME + "macro"] +REQUIRED_MACROS = [REQUIREMENTS_MACRO_NAME, STDIO_MACRO_NAME, ADVANCED_OPTIONS_NAME + "_macro"] + +random.seed(42) class ExitCode: @@ -76,18 +79,6 @@ def add_specific_args(parser): "brief example on the layout of this file.", default=None, required=False) parser.add_argument("-a", "--add-to-command-line", dest="add_to_command_line", help="Adds content to the command line", default="", required=False) - parser.add_argument("-d", "--datatypes-destination", dest="data_types_destination", - help="Specify the location of a datatypes_conf.xml to modify and add the registered " - "data types. If the provided destination does not exist, a new file will be created.", - default=None, required=False) - parser.add_argument("-c", "--default-category", dest="default_category", default="DEFAULT", required=False, - help="Default category to use for tools lacking a category when generating tool_conf.xml") - parser.add_argument("-t", "--tool-conf-destination", dest="tool_conf_destination", default=None, required=False, - help="Specify the location of an existing tool_conf.xml that will be modified to include " - "the converted tools. If the provided destination does not exist, a new file will" - "be created.") - parser.add_argument("-g", "--galaxy-tool-path", dest="galaxy_tool_path", default=None, required=False, - help="The path that will be prepended to the file names when generating tool_conf.xml") parser.add_argument("-r", "--required-tools", dest="required_tools_file", default=None, required=False, help="Each line of the file will be interpreted as a tool name that needs translation. " "Run with '-h' or '--help' to see a brief example on the format of this file.") @@ -101,10 +92,10 @@ def add_specific_args(parser): "valid macros file. All defined macros will be imported.") parser.add_argument("--test-macros", dest="test_macros_files", default=[], nargs="*", action="append", required=None, - help="Import tests from the files given file(s) as macros. " + help="Insert tests from the files given file(s) in the test section. " "The macro names must end with the id of the tools") parser.add_argument("--test-macros-prefix", dest="test_macros_prefix", default=[], nargs="*", - action="append", required=None, help="The prefix of the macro name in the corresponding trest macros file") + action="append", required=None, help="The prefix of the macro name in the corresponding test macros file") parser.add_argument("--test-test", dest="test_test", action='store_true', default=False, required=False, help="Generate a simple test for the internal unit tests.") @@ -114,6 +105,8 @@ def add_specific_args(parser): help="File extensions that can't be sniffed in Galaxy." "Needs to be the OpenMS extensions (1st column in --formats-file)." "For testdata with such extensions ftype will be set in the tes according to the file extension") + parser.add_argument("--test-condition", dest="test_condition", nargs="+", default=["compare=sim_size", "delta_frac=0.05"], required=False, + help="Condition for dile tests") parser.add_argument("--tool-version", dest="tool_version", required=False, default=None, help="Tool version to use (if not given its extracted from the CTD)") @@ -122,7 +115,7 @@ def add_specific_args(parser): parser.add_argument("--bump-file", dest="bump_file", required=False, default=None, help="json file defining tool versions." "tools not listed in the file default to 0." - "if not given @GALAXY_VERSION@ is used") + "if not given @VERSION_SUFFIX@ is used") def modify_param_for_galaxy(param): @@ -136,7 +129,7 @@ def modify_param_for_galaxy(param): # need to be taken care by hardcoded values and the other cases # are chosen automatically if not specified on the command line) if param.required and not (param.default is None or type(param.default) is _Null): - logger.warning(f"Data parameter {param.name} with default ({param.default})", 1) + logging.warning(f"Data parameter {param.name} with default ({param.default})") param.required = False param.default = _Null() return param @@ -161,7 +154,7 @@ def convert_models(args, parsed_ctds): tool_version=args.tool_version, supported_file_types=supported_file_formats, required_macros=REQUIRED_MACROS, - dont_expand=[ADVANCED_OPTIONS_NAME + "macro", "references", + dont_expand=[ADVANCED_OPTIONS_NAME + "_macro", "references", "list_string_val", "list_string_san", "list_float_valsan", "list_integer_valsan"]) @@ -183,6 +176,7 @@ def convert_models(args, parsed_ctds): parameter_hardcoder=args.parameter_hardcoder, test_test=args.test_test, test_only=args.test_only, + test_condition=args.test_condition, test_unsniffable=args.test_unsniffable, test_macros_file_names=args.test_macros_files, test_macros_prefix=args.test_macros_prefix, @@ -221,15 +215,14 @@ def parse_macros_files(macros_file_names, tool_version, supported_file_types, re for macros_file_name in macros_file_names: try: macros_file = open(macros_file_name) - logger.info("Loading macros from %s" % macros_file_name, 0) + logging.info("Loading macros from %s" % macros_file_name) root = parse(macros_file).getroot() for xml_element in root.findall("xml"): name = xml_element.attrib["name"] if name in macros_to_expand: - logger.warning("Macro %s has already been found. Duplicate found in file %s." % - (name, macros_file_name), 0) + logging.warning("Macro %s has already been found. Duplicate found in file %s." % (name, macros_file_name)) continue - logger.info("Macro %s found" % name, 1) + logging.info("Macro %s found" % name) macros_to_expand.append(name) except ParseError as e: raise ApplicationException("The macros file " + macros_file_name + " could not be parsed. Cause: " + str(e)) @@ -240,7 +233,7 @@ def parse_macros_files(macros_file_names, tool_version, supported_file_types, re macros_file.close() tool_ver_tk = root.find("token[@name='@TOOL_VERSION@']") - galaxy_ver_tk = root.find("token[@name='@GALAXY_VERSION@']") + galaxy_ver_tk = root.find("token[@name='@VERSION_SUFFIX@']") if tool_ver_tk is None: tool_ver_tk = add_child_node(root, "token", OrderedDict([("name", "@TOOL_VERSION@")])) tool_ver_tk.text = tool_version @@ -315,7 +308,7 @@ def check_test_macros(test_macros_files, test_macros_prefix, parsed_ctds): for xml_element in root.findall("xml"): name = xml_element.attrib["name"] if not name.startswith(mp): - logger.warning("Testmacro with invalid prefix %s." % (mp), 0) + logging.warning("Testmacro with invalid prefix %s." % (mp)) continue name = name[len(mp):] macro_ids.add(name) @@ -325,11 +318,11 @@ def check_test_macros(test_macros_files, test_macros_prefix, parsed_ctds): except OSError as e: raise ApplicationException("The macros file " + mf + " could not be opened. Cause: " + str(e)) for t in tool_ids - macro_ids: - logger.error("missing %s" % t) + logging.error("missing %s" % t) add_child_node(root, "xml", OrderedDict([("name", mp + t)])) if len(macro_ids - tool_ids): - logger.warning("Unnecessary macros in {}: {}".format(mf, macro_ids - tool_ids)) + logging.warning("Unnecessary macros in {}: {}".format(mf, macro_ids - tool_ids)) tree = ElementTree(root) tree.write(mf, encoding="UTF-8", xml_declaration=True, pretty_print=True) @@ -360,7 +353,7 @@ def parse_file_formats(formats_file): parsed_formats[1], composite)) else: - logger.warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" % (line_number, line), 0) + logging.warning("Invalid line at line number %d of the given formats file. Line will be ignored:\n%s" % (line_number, line)) return supported_formats @@ -401,16 +394,9 @@ def validate_and_prepare_args(args, model): for variable_name in input_variables_to_check: utils.validate_argument_is_valid_path(args, variable_name) - # check that the provided output files, if provided, contain a valid file path (i.e., not a folder) - output_variables_to_check = ["data_types_destination", "tool_conf_destination"] - for variable_name in output_variables_to_check: - file_name = getattr(args, variable_name) - if file_name is not None and os.path.isdir(file_name): - raise ApplicationException("The provided output file name (%s) points to a directory." % file_name) - if not args.macros_files: # list is empty, provide the default value - logger.warning("Using default macros from galaxy/macros.xml", 0) + logging.warning("Using default macros from galaxy/macros.xml") args.macros_files = [os.path.dirname(os.path.abspath(__file__)) + "/macros.xml"] if args.tool_version is None: @@ -434,22 +420,21 @@ def _convert_internal(parsed_ctds, **kwargs): expand_macros, create_command, create_inputs, create_outputs @return a tuple containing the model, output destination, origin file """ - parameter_hardcoder = kwargs["parameter_hardcoder"] for parsed_ctd in parsed_ctds: model = parsed_ctd.ctd_model if kwargs["skip_tools"] is not None and model.name in kwargs["skip_tools"]: - logger.info("Skipping tool %s" % model.name, 0) + logging.info("Skipping tool %s" % model.name) continue elif kwargs["required_tools"] is not None and model.name not in kwargs["required_tools"]: - logger.info("Tool %s is not required, skipping it" % model.name, 0) + logging.info("Tool %s is not required, skipping it" % model.name) continue origin_file = parsed_ctd.input_file output_file = parsed_ctd.suggested_output_file - # overwrite attributes of the parsed ctd parameters as specified in hardcoded parameterd json + # overwrite attributes of the parsed ctd parameters as specified in hardcoded parameters json for param in utils.extract_and_flatten_parameters(model): hardcoded_attributes = parameter_hardcoder.get_hardcoded_attributes(utils.extract_param_name(param), model.name, 'CTD') if hardcoded_attributes is not None: @@ -460,15 +445,21 @@ def _convert_internal(parsed_ctds, **kwargs): try: t = GALAXY_TYPE_TO_TYPE[hardcoded_attributes[a]] except KeyError: - logger.error("Could not set hardcoded attribute {}={} for {}".format(a, hardcoded_attributes[a], param.name)) + logging.error("Could not set hardcoded attribute {}={} for {}".format(a, hardcoded_attributes[a], param.name)) sys.exit(1) setattr(param, a, t) - elif type(getattr(param, a)) is _FileFormat or (param.type in [_InFile, _OutFile, _OutPrefix] and a == "restrictions"): - setattr(param, a, _FileFormat(str(hardcoded_attributes[a]))) - elif type(getattr(param, a)) is _Choices: - setattr(param, a, _Choices(str(hardcoded_attributes[a]))) - elif type(getattr(param, a)) is _NumericRange: - raise Exception("Overwriting of Numeric Range not implemented") + elif a == "restrictions": + if type(getattr(param, a)) is _FileFormat or param.type in [_InFile, _OutFile, _OutPrefix]: + setattr(param, a, _FileFormat(str(hardcoded_attributes[a]))) + elif type(getattr(param, a)) is _Choices: + setattr(param, a, _Choices(str(hardcoded_attributes[a]))) + elif type(getattr(param, a)) is _NumericRange: + raise Exception("Overwriting of Numeric Range not implemented") + elif getattr(param, a) is None: + # if the parameter has no restrictions yet the we just use Choices .. might need to be extended + setattr(param, a, _Choices(str(hardcoded_attributes[a]))) + else: + setattr(param, a, hardcoded_attributes[a]) else: setattr(param, a, hardcoded_attributes[a]) @@ -476,11 +467,11 @@ def _convert_internal(parsed_ctds, **kwargs): test = create_test_only(parsed_ctd.ctd_model, **kwargs) tree = ElementTree(test) output_file = parsed_ctd.suggested_output_file - logger.info("Writing to %s" % utils.get_filename(output_file), 1) + logging.info("Writing to %s" % utils.get_filename(output_file)) tree.write(output_file, encoding="UTF-8", xml_declaration=False, pretty_print=True) continue - logger.info("Converting {} (source {})".format(model.name, utils.get_filename(origin_file)), 0) + logging.info("Converting {} (source {})".format(model.name, utils.get_filename(origin_file))) tool = create_tool(model, kwargs.get("tool_profile", None), kwargs.get("bump", None)) @@ -495,18 +486,19 @@ def _convert_internal(parsed_ctds, **kwargs): outputs = create_outputs(tool, model, **kwargs) if kwargs["test_test"]: create_tests(tool, inputs=copy.deepcopy(inputs), outputs=copy.deepcopy(outputs)) - if kwargs["test_macros_prefix"]: - create_tests(tool, test_macros_prefix=kwargs['test_macros_prefix'], name=model.name) + else: + add_child_node(tool, "tests") create_help(tool, model) # citations are required to be at the end expand_macro(tool, "references") + add_macros(tool, model, kwargs.get("test_macros_prefix"), kwargs.get("test_macros_file_names")) # wrap our tool element into a tree to be able to serialize it tree = ElementTree(tool) - logger.info("Writing to %s" % utils.get_filename(output_file), 1) - tree.write(output_file, encoding="UTF-8", xml_declaration=True, pretty_print=True) - + logging.info("Writing to %s" % utils.get_filename(output_file)) + etree.indent(tree) + tree.write(output_file, encoding="UTF-8", pretty_print=True) def write_header(tool, model): """ @@ -529,13 +521,13 @@ def create_tool(model, profile, bump): tool_id = model.name.replace(" ", "_") if bump is None: - gxy_version = "@GALAXY_VERSION@" + gxy_version = "@VERSION_SUFFIX@" elif model.name in bump: gxy_version = str(bump[model.name]) elif tool_id in bump: gxy_version = str(bump[tool_id]) else: - gxy_version = "@GALAXY_VERSION@" + gxy_version = "@VERSION_SUFFIX@" attrib = OrderedDict([("id", tool_id), ("name", model.name), @@ -553,7 +545,11 @@ def create_description(tool, model): """ if "description" in model.opt_attribs.keys() and model.opt_attribs["description"] is not None: description = SubElement(tool, "description") - description.text = model.opt_attribs["description"] + text = model.opt_attribs["description"] + text = text.split("\n")[0].strip() + if text[-1] == ".": + text = text[:-1] + description.text = text def create_configfiles(tool, model, **kwargs): @@ -623,8 +619,8 @@ def create_command(tool, model, **kwargs): final_cmd['command'].extend(kwargs["add_to_command_line"]) final_cmd['postprocessing'].extend(["", "## Postprocessing"]) - advanced_command_start = "#if ${aon}cond.{aon}selector=='advanced':".format(aon=ADVANCED_OPTIONS_NAME) - advanced_command_end = "#end if" + advanced_command_start = "## advanced options" + advanced_command_end = "" parameter_hardcoder = kwargs["parameter_hardcoder"] supported_file_formats = kwargs["supported_file_formats"] @@ -647,7 +643,7 @@ def create_command(tool, model, **kwargs): # in the else branch the parameter is neither blacklisted nor hardcoded... _actual_parameter = get_galaxy_parameter_path(param) - actual_parameter = get_galaxy_parameter_path(param, fix_underscore=True) + actual_parameter = get_galaxy_parameter_path(param, fix_dedup_prefix=True) # all but bool params need the command line argument (bools have it already in the true/false value) if param.type is _OutFile or param.type is _OutPrefix or param.type is _InFile: param_cmd['command'].append(command_line_prefix) @@ -659,19 +655,29 @@ def create_command(tool, model, **kwargs): # this leads to conflicts while linking... might also be better in general if param.type is _InFile: param_cmd['preprocessing'].append("mkdir %s &&" % actual_parameter) + # TODO we copy input files to the JWD because of https://github.com/OpenMS/OpenMS/issues/7439 if param.is_list: - param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + _actual_parameter + ") if f])} && ") - param_cmd['preprocessing'].append("${' '.join([\"ln -s '%s' '" + actual_parameter + "/%s/%s.%s' && \" % (f, i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($" + _actual_parameter + ") if f])}") - param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate($" + _actual_parameter + ") if f])}") + param_cmd['preprocessing'].append(f'#if ${_actual_parameter}_select == "no"') + param_cmd['preprocessing'].append(f"mkdir ${{' '.join([\"'{actual_parameter}/%s'\" % (i) for i, f in enumerate(${_actual_parameter}) if f])}} && ") + param_cmd['preprocessing'].append(f"${{' '.join([\"cp '%s' '{actual_parameter}/%s/%s.%s' && \" % (f, i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate(${_actual_parameter}) if f])}}") + param_cmd['preprocessing'].append('#else') + param_cmd['preprocessing'].append(f"cp '${_actual_parameter}' '{actual_parameter}/${{re.sub(\"[^\\w\\-_]\", \"_\", ${_actual_parameter}.element_identifier)}}.$gxy2omsext(${_actual_parameter}.ext)' &&") + param_cmd['preprocessing'].append('#end if') + param_cmd['command'].append(f'#if ${actual_parameter}_select == "no"') + param_cmd['command'].append(f"${{' '.join([\"'{actual_parameter}/%s/%s.%s'\"%(i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $gxy2omsext(f.ext)) for i, f in enumerate(${_actual_parameter}) if f])}}") + param_cmd['command'].append('#else') + param_cmd['command'].append(f"'{actual_parameter}/${{re.sub(\"[^\\w\\-_]\", \"_\", ${_actual_parameter}.element_identifier)}}.$gxy2omsext(${_actual_parameter}.ext)'") + param_cmd['command'].append('#end if') else: - param_cmd['preprocessing'].append("ln -s '$" + _actual_parameter + "' '" + actual_parameter + "/${re.sub(\"[^\\w\\-_]\", \"_\", $" + _actual_parameter + ".element_identifier)}.$gxy2omsext($" + _actual_parameter + ".ext)' &&") + param_cmd['preprocessing'].append("cp '$" + _actual_parameter + "' '" + actual_parameter + "/${re.sub(\"[^\\w\\-_]\", \"_\", $" + _actual_parameter + ".element_identifier)}.$gxy2omsext($" + _actual_parameter + ".ext)' &&") param_cmd['command'].append("'" + actual_parameter + "/${re.sub(\"[^\\w\\-_]\", \"_\", $" + _actual_parameter + ".element_identifier)}.$gxy2omsext($" + _actual_parameter + ".ext)'") + elif param.type is _OutPrefix: param_cmd['preprocessing'].append("mkdir %s &&" % actual_parameter) param_cmd['command'].append(actual_parameter + "/") elif param.type is _OutFile: _actual_parameter = get_galaxy_parameter_path(param, separator="_") - actual_parameter = get_galaxy_parameter_path(param, separator="_", fix_underscore=True) + actual_parameter = get_galaxy_parameter_path(param, separator="_", fix_dedup_prefix=True) # check if there is a parameter that sets the format # if so we add an extension to the generated files which will be used to # determine the format in the output tag @@ -705,12 +711,12 @@ def create_command(tool, model, **kwargs): if len(formats) == 1: fmt = formats.pop() if param.is_list: - logger.info(f"1 fmt + list {param.name} -> {actual_input_parameter}", 1) + logging.info(f"1 fmt + list {param.name} -> {actual_input_parameter}") param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + actual_input_parameter + ") if f])} && ") param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $gxy2omsext(\"" + fmt + "\")) for i, f in enumerate($" + actual_input_parameter + ") if f])}") param_cmd['postprocessing'].append("${' '.join([\"&& mv -n '" + actual_parameter + "/%(bn)s/%(id)s.%(gext)s' '" + _actual_parameter + "/%(bn)s/%(id)s'\"%{\"bn\": i, \"id\": re.sub('[^\\w\\-_]', '_', f.element_identifier), \"gext\": $gxy2omsext(\"" + fmt + "\")} for i, f in enumerate($" + actual_input_parameter + ") if f])}") else: - logger.info("1 fmt + dataset %s" % param.name, 1) + logging.info("1 fmt + dataset %s" % param.name) param_cmd['command'].append("'" + actual_parameter + "/output.${gxy2omsext(\"" + fmt + "\")}'") param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${gxy2omsext(\"" + fmt + "\")}' '$" + _actual_parameter + "'") @@ -719,12 +725,12 @@ def create_command(tool, model, **kwargs): # - list: let the command create output files with the oms extensions, postprocessing renames them to the galaxy extensions, output is then discover + __name_and_ext__ elif type_param_name is not None: if param.is_list: - logger.info("type + list %s" % param.name, 1) + logging.info("type + list %s" % param.name) param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + actual_input_parameter + ") if f])} && ") param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\\w\\-_]', '_', f.element_identifier), $" + type_param_name + ") for i, f in enumerate($" + actual_input_parameter + ") if f])}") param_cmd['postprocessing'].append("${' '.join([\"&& mv -n '" + actual_parameter + "/%(bn)s/%(id)s.%(omsext)s' '" + actual_parameter + "/%(bn)s/%(id)s.%(gext)s'\"%{\"bn\": i, \"id\": re.sub('[^\\w\\-_]', '_', f.element_identifier), \"omsext\":$" + type_param_name + ", \"gext\": $oms2gxyext(str($" + type_param_name + "))} for i, f in enumerate($" + actual_input_parameter + ") if f])}") else: - logger.info("type + dataset %s" % param.name, 1) + logging.info("type + dataset %s" % param.name) # 1st create file with openms extension (often required by openms) # then move it to the actual place specified by the parameter # the format is then set by the tag using @@ -732,11 +738,11 @@ def create_command(tool, model, **kwargs): param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${" + type_param_name + "}' '$" + actual_parameter + "'") elif actual_input_parameter is not None: if param.is_list: - logger.info("actual + list %s" % param.name, 1) + logging.info("actual + list %s" % param.name) param_cmd['preprocessing'].append("mkdir ${' '.join([\"'" + actual_parameter + "/%s'\" % (i) for i, f in enumerate($" + actual_input_parameter + ") if f])} && ") param_cmd['command'].append("${' '.join([\"'" + actual_parameter + "/%s/%s.%s'\"%(i, re.sub('[^\\w\\-_]', '_', f.element_identifier), f.ext) for i, f in enumerate($" + actual_input_parameter + ") if f])}") else: - logger.info(f"actual + dataset {param.name} {actual_input_parameter} {corresponding_input.is_list}", 1) + logging.info(f"actual + dataset {param.name} {actual_input_parameter} {corresponding_input.is_list}") if corresponding_input.is_list: param_cmd['command'].append("'" + actual_parameter + "/output.${" + actual_input_parameter + "[0].ext}'") param_cmd['postprocessing'].append("&& mv '" + actual_parameter + "/output.${" + actual_input_parameter + "[0].ext}' '$" + _actual_parameter + "'") @@ -747,7 +753,7 @@ def create_command(tool, model, **kwargs): if param.is_list: raise Exception("output parameter itemlist %s without corresponding input") else: - logger.info("else + dataset %s" % param.name, 1) + logging.info("else + dataset %s" % param.name) param_cmd['command'].append("'$" + _actual_parameter + "'") # # select with multiple = true @@ -768,7 +774,7 @@ def create_command(tool, model, **kwargs): # need no if (otherwise the empty string could not be provided) if not (param.required or is_boolean_parameter(param) or (param.type is str and param.restrictions is None)): # and not(param.type is _InFile and param.is_list): - actual_parameter = get_galaxy_parameter_path(param, suffix="FLAG", fix_underscore=True) + actual_parameter = get_galaxy_parameter_path(param, suffix="FLAG", fix_dedup_prefix=True) _actual_parameter = get_galaxy_parameter_path(param, suffix="FLAG") for stage in param_cmd: if len(param_cmd[stage]) == 0: @@ -825,13 +831,27 @@ def import_macros(tool, model, **kwargs): token_node.text = utils.extract_tool_executable_path(model, kwargs["default_executable_path"]) # add nodes - for macro_file_name in kwargs["macros_file_names"] + kwargs["test_macros_file_names"]: + for macro_file_name in kwargs["macros_file_names"]: macro_file = open(macro_file_name) import_node = add_child_node(macros_node, "import") # do not add the path of the file, rather, just its basename import_node.text = os.path.basename(macro_file.name) +def add_macros(tool, model, test_macros_prefix=None, test_macros_file_names=None): + """ + paste all test macros for a tool into the tests node of the tool + """ + if test_macros_file_names is None: + return + tool_id = model.name.replace(" ", "_") + tests_node = tool.find(".//tests") + for macros_file, macros_prefix in zip(test_macros_file_names, test_macros_prefix): + macros_root = parse(macros_file) + tool_tests = macros_root.find(f".//xml[@name='{macros_prefix}{tool_id}']") + for test in tool_tests: + tests_node.append(test) + def expand_macro(node, macro, attribs=None): """Add to node.""" expand_node = add_child_node(node, "expand") @@ -851,23 +871,28 @@ def expand_macros(node, macros_to_expand): expand_node.attrib["macro"] = expand_macro -def get_galaxy_parameter_path(param, separator=".", suffix=None, fix_underscore=False): +def get_galaxy_parameter_path(param, separator=".", suffix=None, fix_dedup_prefix=False): """ Get the complete path for a parameter as a string where the path components are joined by the given separator. A given suffix can - be appended. + be appended """ - p = get_galaxy_parameter_name(param, suffix, fix_underscore) - path = utils.extract_param_path(param, fix_underscore) + p = get_galaxy_parameter_name(param, suffix, fix_dedup_prefix) + path = utils.extract_param_path(param, fix_dedup_prefix) if len(path) > 1: - return (separator.join(path[:-1]) + separator + p).replace("-", "_") - elif param.advanced and (param.type is not _OutFile or suffix): - return ADVANCED_OPTIONS_NAME + "cond." + p + path = path[:-1] + [p] + elif param.advanced and (param.type is not _OutFile and param.type is not _OutPrefix or suffix): + path = [ADVANCED_OPTIONS_NAME, p] else: - return p + path = [p] + # data input params with multiple="true" are in a (batch mode) conditional + # which needs to be added to the param path list + if param.type is _InFile and param.is_list: + path.insert(len(path) - 1, f"{p}_cond") + return separator.join(path).replace("-", "_") -def get_galaxy_parameter_name(param, suffix=None, fix_underscore=False): +def get_galaxy_parameter_name(param, suffix=None, fix_dedup_prefix=False): """ get the name of the parameter used in the galaxy tool - replace : and - by _ @@ -881,8 +906,9 @@ def get_galaxy_parameter_name(param, suffix=None, fix_underscore=False): @return the name used for the parameter in the tool form """ p = param.name.replace("-", "_") - if fix_underscore and p.startswith("_"): - p = p[1:] + dedup_match = re.match("DEDUP_[0-9]+_DEDUP_(.*)", p) + if fix_dedup_prefix and dedup_match: + p = dedup_match.group(1) if param.type is _OutFile and suffix is not None: return f"{p}_{suffix}" else: @@ -961,7 +987,7 @@ def get_input_with_same_restrictions(out_param, model, check_formats): for param in utils.extract_and_flatten_parameters(model): if param.type is not _InFile: continue -# logger.error("%s %s %s %s %s %s" %(out_param.name, param.name, param.is_list, out_param.is_list, param.restrictions, out_param.restrictions)) +# logging.error("%s %s %s %s %s %s" %(out_param.name, param.name, param.is_list, out_param.is_list, param.restrictions, out_param.restrictions)) if allow_different_type or param.is_list == out_param.is_list: if check_formats: if param.restrictions is None and out_param.restrictions is None: @@ -970,7 +996,7 @@ def get_input_with_same_restrictions(out_param, model, check_formats): matching.append(param) else: matching.append(param) -# logger.error("match %s "%([_.name for _ in matching])) +# logging.error("match %s "%([_.name for _ in matching])) if len(matching) > 0: break if len(matching) == 1: @@ -992,7 +1018,7 @@ def create_inputs(tool, model, **kwargs): section_params = dict() # some suites (such as OpenMS) need some advanced options when handling inputs - advanced_node = Element("expand", OrderedDict([("macro", ADVANCED_OPTIONS_NAME + "macro")])) + advanced_node = Element("expand", OrderedDict([("macro", ADVANCED_OPTIONS_NAME + "_macro")])) parameter_hardcoder = kwargs["parameter_hardcoder"] supported_file_formats = kwargs["supported_file_formats"] g2o, o2g = get_fileformat_maps(supported_file_formats) @@ -1035,25 +1061,23 @@ def create_inputs(tool, model, **kwargs): corresponding_input, fmt_from_corresponding = get_corresponding_input(param, model) if len(formats) > 1 and type_param is None and (corresponding_input is None or not fmt_from_corresponding): # and not param.is_list: - fmt_select = add_child_node(parent_node, "param", OrderedDict([("name", param.name + "_type"), ("type", "select"), ("optional", "false"), ("label", f"File type of output {param.name} ({param.description})")])) + fmt_select_attrib = OrderedDict([ + ("name", param.name + "_type"), + ("type", "select"), + ("label", f"File type of output {param.name} ({param.description})") + ]) + fmt_select = add_child_node(parent_node, "param", fmt_select_attrib) g2o, o2g = get_fileformat_maps(kwargs["supported_file_formats"]) -# for f in formats: -# option_node = add_child_node(fmt_select, "option", OrderedDict([("value", g2o[f])]), f) for choice in param.restrictions.formats: option_node = add_child_node(fmt_select, "option", OrderedDict([("value", str(choice))])) option_node.text = o2g[str(choice)] if choice.lower() != o2g[str(choice)]: - option_node.text += " (%s)" % choice + option_node.text += f" ({choice})" continue # create the actual param node and fill the attributes - param_node = add_child_node(parent_node, "param") - create_param_attribute_list(param_node, param, model, kwargs["supported_file_formats"]) - - hardcoded_attributes = parameter_hardcoder.get_hardcoded_attributes(param.name, model.name, 'XML') - if hardcoded_attributes is not None: - for a in hardcoded_attributes: - param_node.attrib[a] = str(hardcoded_attributes[a]) + param_node = create_param_attribute_list(param, model, kwargs["supported_file_formats"], parameter_hardcoder) + parent_node.append(param_node) section_parents = [utils.extract_param_name(section_params[sn].parent) for sn in section_nodes] for sn in section_nodes: @@ -1110,15 +1134,13 @@ def get_formats(param, model, o2g): elif is_out_type_param(param, model): choices = param.restrictions.choices else: - raise InvalidModelException("Unrecognized restriction type [%(type)s] " - "for [%(name)s]" % {"type": type(param.restrictions), - "name": param.name}) + raise InvalidModelException(f"Unrecognized restriction type [{type(param.restrictions)}] for [{param.name}]") # check if there are formats that have not been registered yet... formats = set() for format_name in choices: if format_name not in o2g: - logger.warning(f"Ignoring unknown format {format_name} for parameter {param.name}", 1) + logging.warning(f"Ignoring unknown format {format_name} for parameter {param.name}") else: formats.add(format_name) return sorted(formats) @@ -1127,7 +1149,7 @@ def get_formats(param, model, o2g): def get_galaxy_formats(param, model, o2g, default=None): """ determine galaxy formats for a parm (i.e. list of allowed Galaxy extensions) - from the CTD restictions (i.e. the OpenMS extensions) + from the CTD restrictions (i.e. the OpenMS extensions) - if there is a single one, then take this - if there is none than use given default """ @@ -1143,16 +1165,28 @@ def get_galaxy_formats(param, model, o2g, default=None): return sorted(gxy_formats) -def create_param_attribute_list(param_node, param, model, supported_file_formats): +def get_oms_format(formats, gxy_format, o2g): + """ + determine the format from a list of openms formats + (restriction of a ctd input-file param) that corresponds + to a galaxy format + """ + for f in formats: + if o2g.get(f) == gxy_format: + return f + + +def create_param_attribute_list(param, model, supported_file_formats, parameter_hardcoder): """ get the attributes of input parameters - @param param_node the galaxy tool param node + @param param the ctd parameter @param supported_file_formats """ g2o, o2g = get_fileformat_maps(supported_file_formats) + param_node = Element("param") # set the name, argument and a first guess for the type (which will be over written # in some cases .. see below) # even if the conversion relies on the fact that the param names are identical @@ -1160,24 +1194,23 @@ def create_param_attribute_list(param_node, param, model, supported_file_formats # parameters need to be treated in cheetah. variable names are currently fixed back # to dashes in fill_ctd.py. currently there seems to be only a single tool # requiring this https://github.com/OpenMS/OpenMS/pull/4529 - param_node.attrib["name"] = get_galaxy_parameter_name(param) - param_node.attrib["argument"] = "-%s" % utils.extract_param_name(param) + argument = "-%s" % utils.extract_param_name(param) + name = get_galaxy_parameter_name(param) + if argument.lstrip("-").replace("-", "_") != name: + param_node.attrib["name"] = get_galaxy_parameter_name(param) + param_node.attrib["argument"] = argument + param_type = TYPE_TO_GALAXY_TYPE[param.type] if param_type is None: raise ModelError("Unrecognized parameter type %(type)s for parameter %(name)s" % {"type": param.type, "name": param.name}) + # ITEMLIST is rendered as text field (even if its integers or floats), an - # exception is files which are treated a bit below + # exception is files which are treated just below if param.is_list: param_type = "text" - if is_selection_parameter(param): param_type = "select" - if len(param.restrictions.choices) < 5: - param_node.attrib["display"] = "checkboxes" - if param.is_list: - param_node.attrib["multiple"] = "true" - if is_boolean_parameter(param): param_type = "boolean" @@ -1204,10 +1237,25 @@ def create_param_attribute_list(param_node, param, model, supported_file_formats # (in Galaxy the default would be prefilled in the form and at least # one option needs to be selected). if not (param.default is None or type(param.default) is _Null) and param_node.attrib["type"] in ["integer", "float", "text", "boolean", "select"]: - logger.error("%s %s %s %s %s" % (param.name, param.default is None, type(param.default) is _Null, param_type, param.type)) - param_node.attrib["optional"] = "false" + # logger.error("%s %s %s %s %s" % (param.name, param.default is None, type(param.default) is _Null, param_type, param.type)) + optional = False + else: + optional = not param.required + + # set multiple for select parameters if needed + # also set optional if it's not the default + # - for select with multiple=true optional defaults to true, i.e. we set it if the parameter is not optional + # - never set it for bool + # - otherwise the default is false, i.e. we set the attribute if the parameter is optional + if is_selection_parameter(param) and param.is_list: + param_node.attrib["multiple"] = "true" + if not optional: + param_node.attrib["optional"] = str(optional).lower() + if param_type == "boolean": + pass else: - param_node.attrib["optional"] = str(not param.required).lower() + if optional: + param_node.attrib["optional"] = str(optional).lower() # check for parameters with restricted values (which will correspond to a "select" in galaxy) if param.restrictions is not None or param_type == "boolean": @@ -1215,15 +1263,8 @@ def create_param_attribute_list(param_node, param, model, supported_file_formats if param_type == "boolean": create_boolean_parameter(param_node, param) elif type(param.restrictions) is _Choices: - - # TODO if the parameter is used to select the output file type the - # options need to be replaced with the Galaxy data types - # if is_out_type_param(param, model): - # param.restrictions.choices = get_supported_file_types(param.restrictions.choices, supported_file_formats) - # create as many