Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Module meta.yml restructure #3028

Closed
5 changes: 3 additions & 2 deletions nf_core/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1196,11 +1196,12 @@ def command_modules_test(ctx, tool, dir, no_prompts, update, once, profile):
is_flag=True,
help="Fix the module version if a newer version is available",
)
def command_modules_lint(ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by, fix_version):
@click.option("--fix", is_flag=True, help="Fix all linting tests if possible.")
def command_modules_lint(ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by, fix_version, fix):
"""
Lint one or more modules in a directory.
"""
modules_lint(ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by, fix_version)
modules_lint(ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by, fix_version, fix)


# nf-core modules info
Expand Down
3 changes: 2 additions & 1 deletion nf_core/commands_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def modules_test(ctx, tool, dir, no_prompts, update, once, profile):
sys.exit(1)


def modules_lint(ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by, fix_version):
def modules_lint(ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by, fix_version, fix):
"""
Lint one or more modules in a directory.

Expand All @@ -261,6 +261,7 @@ def modules_lint(ctx, tool, dir, registry, key, all, fail_warned, local, passed,
module_lint = ModuleLint(
dir,
fail_warned=fail_warned,
fix=fix,
registry=ctx.params["registry"],
remote_url=ctx.obj["modules_repo_url"],
branch=ctx.obj["modules_repo_branch"],
Expand Down
2 changes: 2 additions & 0 deletions nf_core/components/lint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def __init__(
component_type,
dir,
fail_warned=False,
fix=False,
remote_url=None,
branch=None,
no_pull=False,
Expand All @@ -72,6 +73,7 @@ def __init__(
)

self.fail_warned = fail_warned
self.fix = fix
self.passed = []
self.warned = []
self.failed = []
Expand Down
45 changes: 32 additions & 13 deletions nf_core/components/nfcore_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,20 @@ def get_inputs_from_main_nf(self):
log.debug(f"Could not find any inputs in {self.main_nf}")
return inputs
input_data = data.split("input:")[1].split("output:")[0]
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, input_data, re.MULTILINE)
for _, match in enumerate(matches, start=1):
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
inputs.append(input_val)
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
inputs.append(input_val)
for line in input_data.split("\n"):
channel_elements = []
regex = r"(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
matches = re.finditer(regex, line)
for _, match in enumerate(matches, start=1):
input_val = None
if match.group(3):
input_val = match.group(3).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
elif match.group(4):
input_val = match.group(4).split(",")[0] # handle `files, stageAs: "inputs/*"` cases
if input_val:
channel_elements.append({input_val: {}})
if len(channel_elements) > 0:
inputs.append(channel_elements)
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
self.inputs = inputs

Expand All @@ -194,9 +199,23 @@ def get_outputs_from_main_nf(self):
log.debug(f"Could not find any outputs in {self.main_nf}")
return outputs
output_data = data.split("output:")[1].split("when:")[0]
regex = r"emit:\s*([^)\s,]+)"
matches = re.finditer(regex, output_data, re.MULTILINE)
for _, match in enumerate(matches, start=1):
outputs.append(match.group(1))
regex_emit = r"emit:\s*([^)\s,]+)"
regex_elements = r"(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
for line in output_data.split("\n"):
match_emit = re.search(regex_emit, line)
matches_elements = re.finditer(regex_elements, line)
if not match_emit:
continue
output_channel = {match_emit.group(1): []}
for _, match_element in enumerate(matches_elements, start=1):
output_val = None
if match_element.group(3):
output_val = match_element.group(3)
elif match_element.group(4):
output_val = match_element.group(4)
if output_val:
output_val = output_val.strip("'").strip('"') # remove quotes
output_channel[match_emit.group(1)].append({output_val: {}})
outputs.append(output_channel)
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
self.outputs = outputs
48 changes: 25 additions & 23 deletions nf_core/module-template/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,42 +26,44 @@ tools:
{% endif -%}
input:
#{% if has_meta %} Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
{% endif %}
{% if not_empty_template -%}
## TODO nf-core: Delete / customise this example input
{%- endif %}
- {{ 'bam:' if not_empty_template else "input:" }}
type: file
description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }}
pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }}
- {{ 'bam:' if not_empty_template else "input:" }}
type: file
description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }}
pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }}

{% if not_empty_template -%}
## TODO nf-core: Add a description of all of the variables used as output
{% endif -%}
output:
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"
- {{ 'bam:' if not_empty_template else "output:" }}
#{% if has_meta -%} Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
{% endif %}
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
{% if not_empty_template -%}
## TODO nf-core: Delete / customise this example output
## TODO nf-core: Delete / customise this example output
{%- endif %}
- {{ 'bam:' if not_empty_template else "output:" }}
type: file
description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }}
pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }}
- {{ '"*.bam":' if not_empty_template else '"*":' }}
type: file
description: {{ 'Sorted BAM/CRAM/SAM file' if not_empty_template else "" }}
pattern: {{ '"*.{bam,cram,sam}"' if not_empty_template else "" }}

authors:
- "{{ author }}"
Expand Down
105 changes: 104 additions & 1 deletion nf_core/modules/lint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import questionary
import rich
import yaml

import nf_core.modules.modules_utils
import nf_core.utils
Expand All @@ -29,7 +30,12 @@ class ModuleLint(ComponentLint):
# Import lint functions
from .environment_yml import environment_yml # type: ignore[misc]
from .main_nf import main_nf # type: ignore[misc]
from .meta_yml import meta_yml # type: ignore[misc]
from .meta_yml import ( # type: ignore[misc]
meta_yml,
obtain_correct_and_specified_inputs,
obtain_correct_and_specified_outputs,
read_meta_yml,
)
from .module_changes import module_changes # type: ignore[misc]
from .module_deprecations import module_deprecations # type: ignore[misc]
from .module_patch import module_patch # type: ignore[misc]
Expand All @@ -41,6 +47,7 @@ def __init__(
self,
dir,
fail_warned=False,
fix=False,
remote_url=None,
branch=None,
no_pull=False,
Expand All @@ -51,6 +58,7 @@ def __init__(
component_type="modules",
dir=dir,
fail_warned=fail_warned,
fix=fix,
remote_url=remote_url,
branch=branch,
no_pull=no_pull,
Expand Down Expand Up @@ -213,6 +221,12 @@ def lint_module(self, mod, progress_bar, registry, local=False, fix_version=Fals

# Otherwise run all the lint tests
else:
mod.get_inputs_from_main_nf()
mod.get_outputs_from_main_nf()
# Update meta.yml file if requested
if self.fix:
self.update_meta_yml_file(mod)

if self.repo_type == "pipeline" and self.modules_json:
# Set correct sha
version = self.modules_json.get_module_version(mod.component_name, mod.repo_url, mod.org)
Expand All @@ -232,3 +246,92 @@ def lint_module(self, mod, progress_bar, registry, local=False, fix_version=Fals
self.failed += warned

self.failed += [LintResult(mod, *m) for m in mod.failed]

def update_meta_yml_file(self, mod):
"""
Update the meta.yml file with the correct inputs and outputs
"""
meta_yml = self.read_meta_yml(mod)
corrected_meta_yml = meta_yml.copy()

# Obtain inputs and outputs from main.nf and meta.yml
# Used to compare only the structure of channels and elements
# Do not compare features to allow for custom features in meta.yml (i.e. pattern)
if "input" in meta_yml:
correct_inputs, meta_inputs = self.obtain_correct_and_specified_inputs(mod, meta_yml)
if "output" in meta_yml:
correct_outputs, meta_outputs = self.obtain_correct_and_specified_outputs(mod, meta_yml)

if correct_inputs != meta_inputs:
log.debug(
f"Correct inputs: '{correct_inputs}' differ from current inputs: '{meta_inputs}' in '{mod.meta_yml}'"
)
corrected_meta_yml["input"] = mod.inputs.copy() # list of lists (channels) of dicts (elements)
for i, channel in enumerate(corrected_meta_yml["input"]):
for j, element in enumerate(channel):
element_name = list(element.keys())[0]
for k, meta_element in enumerate(meta_yml["input"]):
try:
# Handle old format of meta.yml: list of dicts (channels)
if element_name in meta_element.keys():
# Copy current features of that input element form meta.yml
for feature in meta_element[element_name].keys():
if feature not in element[element_name].keys():
corrected_meta_yml["input"][i][j][element_name][feature] = meta_element[
element_name
][feature]
break
except AttributeError:
# Handle new format of meta.yml: list of lists (channels) of elements (dicts)
for x, meta_ch_element in enumerate(meta_element):
if element_name in meta_ch_element.keys():
# Copy current features of that input element form meta.yml
for feature in meta_element[x][element_name].keys():
if feature not in element[element_name].keys():
corrected_meta_yml["input"][i][j][element_name][feature] = meta_element[x][
element_name
][feature]
break

if correct_outputs != meta_outputs:
log.debug(
f"Correct outputs: '{correct_outputs}' differ from current outputs: '{meta_outputs}' in '{mod.meta_yml}'"
)
corrected_meta_yml["output"] = mod.outputs.copy() # list of dicts (channels) with list of dicts (elements)
for i, channel in enumerate(corrected_meta_yml["output"]):
ch_name = list(channel.keys())[0]
for j, element in enumerate(channel[ch_name]):
element_name = list(element.keys())[0]
for k, meta_element in enumerate(meta_yml["output"]):
if element_name in meta_element.keys():
# Copy current features of that output element form meta.yml
for feature in meta_element[element_name].keys():
if feature not in element[element_name].keys():
corrected_meta_yml["output"][i][ch_name][j][element_name][feature] = meta_element[
element_name
][feature]
break
elif ch_name in meta_element.keys():
# When the previous output element was using the name of the channel
# Copy current features of that output element form meta.yml
try:
# Handle old format of meta.yml
for feature in meta_element[ch_name].keys():
if feature not in element[element_name].keys():
corrected_meta_yml["output"][i][ch_name][j][element_name][feature] = (
meta_element[ch_name][feature]
)
except AttributeError:
# Handle new format of meta.yml
for x, meta_ch_element in enumerate(meta_element[ch_name]):
for meta_ch_element_name in meta_ch_element.keys():
for feature in meta_ch_element[meta_ch_element_name].keys():
if feature not in element[element_name].keys():
corrected_meta_yml["output"][i][ch_name][j][element_name][feature] = (
meta_ch_element[meta_ch_element_name][feature]
)
break

with open(mod.meta_yml, "w") as fh:
log.info(f"Updating {mod.meta_yml}")
yaml.dump(corrected_meta_yml, fh, sort_keys=False, Dumper=nf_core.utils.custom_yaml_dumper())
Loading
Loading