Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to mask absolute version numbers in Nextflow tests #40

Merged
merged 5 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion run-nextflow-tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ Configuration tests are self-contained JSON files named `configtest*.json` with
| nf_params | A map of command-line parameters to pass to Nextflow (`nextflow --<key>=<value>`) |
| envvars | A map of environment variables to set (`KEY=VALUE nextflow ...`) |
| mocks | Method names to be mocked, mapped to the objects they should return |
| dated_field | A list of JSONPath-like keys indicating values in the rendered configuration that contain datestamps |
| dated_fields | A list of JSONPath-like keys indicating values in the rendered configuration that contain datestamps |
| version_fields | A list of JSONPath-like keys indicating values in the rendered configuration that contain the pipeline version number |
| expected_results | The expected output of the test |

For each test, this Action parses the configuration and runs a modified version of [`nextflow config`](https://www.nextflow.io/docs/latest/cli.html#config), comparing the results against `expected_results` and warning about any differences.
Expand Down Expand Up @@ -153,6 +154,14 @@ jobs:
"trace.file",
"params.date"
],
"version_fields": [
"manifest.version",
"params.log_output_dir",
"params.output_dir_base",
"report.file",
"trace.file",
"timeline.file"
],
"expected_result": {}
}
```
Expand All @@ -162,6 +171,7 @@ jobs:
The true Nextflow configuration output is slightly modified for usability:

* Every field listed in `dated_fields` has timestamps matching the format `YYYYMMDDTHHMMSSZ` replaced with the static value `19970704T165655Z` ([Pathfinder's landing](https://science.nasa.gov/mission/mars-pathfinder/)).
* Every field listed in `version_fields` has sub-strings matching the `manifest.version` value replaced with the static value `VER.SI.ON`.
* Every value that looks like a Java object (e.g. `[Ljava.lang.String;@49c7b90e`) has the hash code replaced with the static value `dec0ded`.
* These should not appear in test files. When they do, it is a sign that the corresponding variable is missing a `def` in the configuration file.
* Closures are expressed as the first valid item in this list:
Expand Down
32 changes: 30 additions & 2 deletions run-nextflow-tests/configtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,20 @@ class NextflowConfigTest:
# pylint: disable=too-many-instance-attributes
SENTINEL: ClassVar = "=========SENTINEL_OUTPUT=========="

# python3.7 doesn't support `kw_only` and other useful dataclass features.
# These two fields are workarounds for that.
OPTIONAL_DICTS: ClassVar = {
"nf_params",
"envvars",
"mocks",
"dated_fields",
"version_fields",
}
OPTIONAL_LISTS: ClassVar = {
"dated_fields",
"version_fields",
}

pipeline: Path = dataclasses.field(init=False, compare=False)
filepath: Path = dataclasses.field(init=False, compare=False)

Expand All @@ -42,6 +56,7 @@ class NextflowConfigTest:
mocks: Dict

dated_fields: List[str]
version_fields: List[str]

expected_result: Dict

Expand All @@ -55,6 +70,12 @@ def from_file(cls: Type[T], pipeline: Path, filepath: Path) -> T:
data.pop("empty_files", None)
data.pop("mapped_files", None)

for fieldname in cls.OPTIONAL_DICTS:
data.setdefault(fieldname, {})

for fieldname in cls.OPTIONAL_LISTS:
data.setdefault(fieldname, [])

result = cls(**data)
result.pipeline = pipeline
result.filepath = filepath.resolve()
Expand Down Expand Up @@ -82,6 +103,11 @@ def to_file(self):
data = dataclasses.asdict(self)
data.pop("pipeline")

# Strip any empty optional fields from the output
for field in self.OPTIONAL_DICTS | self.OPTIONAL_LISTS:
if not data[field]:
data.pop(field)

with data.pop("filepath").open(mode="w") as outfile:
json.dump(
data,
Expand Down Expand Up @@ -181,12 +207,14 @@ def _run_test(self):
config_text = config_output.rsplit(self.SENTINEL, maxsplit=1)[-1]

try:
return parse_config(config_text, self.dated_fields)
return parse_config(
config_text, self.dated_fields, self.version_fields
)
except Exception:
print(config_output)
raise

def print_diffs(self, other: T):
def print_diffs(self, other: "NextflowConfigTest"):
"Print the diff results to the console."
diff_process = subprocess.run(
["diff", self.filepath, other.filepath],
Expand Down
20 changes: 19 additions & 1 deletion run-nextflow-tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,12 @@ def parse_value(value_str: str) -> Any:
return value


def parse_config(config_str: str, dated_fields: List[str]) -> dict:
def parse_config(config_str: str,
dated_fields: List[str],
version_fields: List[str]) -> dict:
"Parse a string of Java properties."
param_re = re.compile(r"^(?P<key>\S+?[^\\])=(?P<value>.*)$")
version_fields = list(version_fields)

def assign_value(closure, key, value):
if "." not in key:
Expand All @@ -170,6 +173,17 @@ def assign_value(closure, key, value):

assign_value(closure[local_key], remainder, value)

# Parse out the current manifest version
try:
version_str = re.search(
r"^manifest.version=(.*)$",
config_str,
re.MULTILINE
yashpatel6 marked this conversation as resolved.
Show resolved Hide resolved
).group(1)

except AttributeError:
version_str = None

config: dict[str, Any] = {}

for line in config_str.splitlines():
Expand All @@ -188,6 +202,10 @@ def assign_value(closure, key, value):
# Replace the date with Pathfinder's landing
value = DATE_RE.sub("19970704T165655Z", value)

if escaped_key in version_fields and version_str:
# Replace the version with an obvious weird value
value = value.replace(version_str, "VER.SI.ON")

assign_value(config, escaped_key, value)

# Specifically sort the config
Expand Down