Skip to content

Commit

Permalink
Merge pull request #40 from uclahs-cds/nwiltsie-mask-versions
Browse files Browse the repository at this point in the history
Add ability to mask absolute version numbers in Nextflow tests
  • Loading branch information
nwiltsie authored May 28, 2024
2 parents f012156 + 1bd89d4 commit 9d7245d
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 4 deletions.
12 changes: 11 additions & 1 deletion run-nextflow-tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ Configuration tests are self-contained JSON files named `configtest*.json` with
| nf_params | A map of command-line parameters to pass to Nextflow (`nextflow --<key>=<value>`) |
| envvars | A map of environment variables to set (`KEY=VALUE nextflow ...`) |
| mocks | Method names to be mocked, mapped to the objects they should return |
| dated_field | A list of JSONPath-like keys indicating values in the rendered configuration that contain datestamps |
| dated_fields | A list of JSONPath-like keys indicating values in the rendered configuration that contain datestamps |
| version_fields | A list of JSONPath-like keys indicating values in the rendered configuration that contain the pipeline version number |
| expected_results | The expected output of the test |

For each test, this Action parses the configuration and runs a modified version of [`nextflow config`](https://www.nextflow.io/docs/latest/cli.html#config), comparing the results against `expected_results` and warning about any differences.
Expand Down Expand Up @@ -153,6 +154,14 @@ jobs:
"trace.file",
"params.date"
],
"version_fields": [
"manifest.version",
"params.log_output_dir",
"params.output_dir_base",
"report.file",
"trace.file",
"timeline.file"
],
"expected_result": {}
}
```
Expand All @@ -162,6 +171,7 @@ jobs:
The true Nextflow configuration output is slightly modified for usability:

* Every field listed in `dated_fields` has timestamps matching the format `YYYYMMDDTHHMMSSZ` replaced with the static value `19970704T165655Z` ([Pathfinder's landing](https://science.nasa.gov/mission/mars-pathfinder/)).
* Every field listed in `version_fields` has sub-strings matching the `manifest.version` value replaced with the static value `VER.SI.ON`.
* Every value that looks like a Java object (e.g. `[Ljava.lang.String;@49c7b90e`) has the hash code replaced with the static value `dec0ded`.
* These should not appear in test files. When they do, it is a sign that the corresponding variable is missing a `def` in the configuration file.
* Closures are expressed as the first valid item in this list:
Expand Down
32 changes: 30 additions & 2 deletions run-nextflow-tests/configtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,20 @@ class NextflowConfigTest:
# pylint: disable=too-many-instance-attributes
SENTINEL: ClassVar = "=========SENTINEL_OUTPUT=========="

# python3.7 doesn't support `kw_only` and other useful dataclass features.
# These two fields are workarounds for that.
OPTIONAL_DICTS: ClassVar = {
"nf_params",
"envvars",
"mocks",
"dated_fields",
"version_fields",
}
OPTIONAL_LISTS: ClassVar = {
"dated_fields",
"version_fields",
}

pipeline: Path = dataclasses.field(init=False, compare=False)
filepath: Path = dataclasses.field(init=False, compare=False)

Expand All @@ -42,6 +56,7 @@ class NextflowConfigTest:
mocks: Dict

dated_fields: List[str]
version_fields: List[str]

expected_result: Dict

Expand All @@ -55,6 +70,12 @@ def from_file(cls: Type[T], pipeline: Path, filepath: Path) -> T:
data.pop("empty_files", None)
data.pop("mapped_files", None)

for fieldname in cls.OPTIONAL_DICTS:
data.setdefault(fieldname, {})

for fieldname in cls.OPTIONAL_LISTS:
data.setdefault(fieldname, [])

result = cls(**data)
result.pipeline = pipeline
result.filepath = filepath.resolve()
Expand Down Expand Up @@ -82,6 +103,11 @@ def to_file(self):
data = dataclasses.asdict(self)
data.pop("pipeline")

# Strip any empty optional fields from the output
for field in self.OPTIONAL_DICTS | self.OPTIONAL_LISTS:
if not data[field]:
data.pop(field)

with data.pop("filepath").open(mode="w") as outfile:
json.dump(
data,
Expand Down Expand Up @@ -181,12 +207,14 @@ def _run_test(self):
config_text = config_output.rsplit(self.SENTINEL, maxsplit=1)[-1]

try:
return parse_config(config_text, self.dated_fields)
return parse_config(
config_text, self.dated_fields, self.version_fields
)
except Exception:
print(config_output)
raise

def print_diffs(self, other: T):
def print_diffs(self, other: "NextflowConfigTest"):
"Print the diff results to the console."
diff_process = subprocess.run(
["diff", self.filepath, other.filepath],
Expand Down
20 changes: 19 additions & 1 deletion run-nextflow-tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,12 @@ def parse_value(value_str: str) -> Any:
return value


def parse_config(config_str: str, dated_fields: List[str]) -> dict:
def parse_config(config_str: str,
dated_fields: List[str],
version_fields: List[str]) -> dict:
"Parse a string of Java properties."
param_re = re.compile(r"^(?P<key>\S+?[^\\])=(?P<value>.*)$")
version_fields = list(version_fields)

def assign_value(closure, key, value):
if "." not in key:
Expand All @@ -170,6 +173,17 @@ def assign_value(closure, key, value):

assign_value(closure[local_key], remainder, value)

# Parse out the current manifest version
try:
version_str = re.search(
r"^manifest.version=(.*)$",
config_str,
re.MULTILINE
).group(1)

except AttributeError:
version_str = None

config: dict[str, Any] = {}

for line in config_str.splitlines():
Expand All @@ -188,6 +202,10 @@ def assign_value(closure, key, value):
# Replace the date with Pathfinder's landing
value = DATE_RE.sub("19970704T165655Z", value)

if escaped_key in version_fields and version_str:
# Replace the version with an obvious weird value
value = value.replace(version_str, "VER.SI.ON")

assign_value(config, escaped_key, value)

# Specifically sort the config
Expand Down

0 comments on commit 9d7245d

Please sign in to comment.