From c51bf666d742f980a308705404e0310d3c42e8c7 Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Fri, 12 Jan 2024 11:51:30 -0500
Subject: [PATCH 01/19] update to accommodate multi-assay--component assays
 passed from IVT in Validator.contains

---
 .../codex_common_errors_validator.py                     | 7 ++++++-
 src/ingest_validation_tests/codex_json_validator.py      | 8 ++++++--
 src/ingest_validation_tests/publication_validator.py     | 9 +++++++--
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/ingest_validation_tests/codex_common_errors_validator.py b/src/ingest_validation_tests/codex_common_errors_validator.py
index 01bda98..74cedc3 100644
--- a/src/ingest_validation_tests/codex_common_errors_validator.py
+++ b/src/ingest_validation_tests/codex_common_errors_validator.py
@@ -45,11 +45,16 @@ class CodexCommonErrorsValidator(Validator):
     """
     description = "Test for common problems found in CODEX"
     cost = 1.0
+    required = "codex"
+
     def collect_errors(self, **kwargs) -> List[str]:
         """
         Return the errors found by this validator
         """
-        if self.assay_type != 'CODEX':
+        if (
+            self.required not in self.contains
+            and self.assay_type.lower() != self.required
+        ):
             return []  # We only test CODEX data
         rslt = []
         try:
diff --git a/src/ingest_validation_tests/codex_json_validator.py b/src/ingest_validation_tests/codex_json_validator.py
index 17b4ee9..e75b9ba 100644
--- a/src/ingest_validation_tests/codex_json_validator.py
+++ b/src/ingest_validation_tests/codex_json_validator.py
@@ -10,10 +10,14 @@
 class CodexJsonValidator(Validator):
     description = "Check CODEX JSON against schema"
     cost = 1.0
+    required = "codex"
 
     def collect_errors(self, **kwargs) -> List[str]:
-        if 'codex' not in self.assay_type.lower():
-            return []
+        if (
+            self.required not in self.contains
+            and self.assay_type.lower() != self.required
+        ):
+            return []  # We only test CODEX data
 
         schema_path = Path(__file__).parent / 'codex_schema.json'
         schema = json.loads(schema_path.read_text())
diff --git a/src/ingest_validation_tests/publication_validator.py b/src/ingest_validation_tests/publication_validator.py
index 9399f2c..59cdadf 100644
--- a/src/ingest_validation_tests/publication_validator.py
+++ b/src/ingest_validation_tests/publication_validator.py
@@ -19,11 +19,16 @@ class PublicationValidator(Validator):
     cost = 1.0
     base_url_re = r'(\s*\{\{\s*base_url\s*\}\})/(.*)'
     url_re = r'[Uu][Rr][Ll]'
+    required = "publications"
+
     def collect_errors(self, **kwargs) -> List[str]:
         """
         Return the errors found by this validator
         """
-        if self.assay_type != 'Publication':
+        if (
+            self.required not in self.contains
+            and self.assay_type.lower() != self.required
+        ):
             return []  # We only test Publication data
         rslt = []
         try:
@@ -86,7 +91,7 @@ def validate_vitessce_config(self, json_path):
         rslt = []
         with open(json_path) as f:
             dct = json.load(f)
-            for key, val in self.url_search_iter(dct):
+            for _, val in self.url_search_iter(dct):
                 try:
                     match = re.match(self.base_url_re, val)
                     if match:  # it starts with {{ base_url }}

From f69bf7db12ead2fba59ce200b85c92dbfba4530d Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Tue, 23 Jan 2024 14:22:10 -0500
Subject: [PATCH 02/19] adding linting and formatting tools

---
 .flake8              |  3 +++
 pyproject.toml       |  7 +++++++
 requirements-dev.txt | 10 ++++++++++
 3 files changed, 20 insertions(+)
 create mode 100644 .flake8
 create mode 100644 pyproject.toml
 create mode 100644 requirements-dev.txt

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..ad98539
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length = 100
+extend-ignore = E203
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..72dd8d9
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,7 @@
+[tool.black]
+line-length = 99
+
+[tool.isort]
+profile = "black"
+multi_line_output = 3
+src_paths = ["src/ingest-validation-tests"]
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..d5c087a
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,10 @@
+black==23.12.1
+flake8==7.0.0
+git+https://github.com/hubmapconsortium/fastq-utils.git@v0.2.5#egg=hubmap-fastq-utils
+imagecodecs>=2023.3.16
+isort==5.13.2
+jsonschema==4.4.0
+pandas>=1.2.0
+python-frontmatter>=1.0.0
+tifffile==2020.10.1
+xmlschema>=1.6

From 930fd015a671cdadd51520c8024d39ab8b4fe71d Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 11:35:10 -0500
Subject: [PATCH 03/19] ran black / flake8 / isort, fixed errors, updated
 .gitignore

---
 .gitignore                                    |   1 +
 .../codex_common_errors_validator.py          | 104 +++++++---------
 .../codex_json_validator.py                   |   8 +-
 .../fastq_validator.py                        |   6 +-
 .../fastq_validator_logic.py                  | 107 ++++++++--------
 src/ingest_validation_tests/gz_validator.py   |   4 +-
 .../publication_validator.py                  |  63 ++++++----
 src/ingest_validation_tests/tiff_validator.py |  12 +-
 tests/pytest_runner.py                        |  18 +--
 tests/test_codex_common_errors_validator.py   | 107 ++++++++++------
 tests/test_codex_json_validator.py            |  25 ++--
 tests/test_fastq_validator_logic.py           | 116 ++++++++----------
 tests/test_gz_validator.py                    |  19 +--
 tests/test_ome_tiff_validator.py              |  23 ++--
 tests/test_publication_validator.py           |  56 +++++----
 tests/test_tiff_validator.py                  |  32 +++--
 16 files changed, 381 insertions(+), 320 deletions(-)

diff --git a/.gitignore b/.gitignore
index 916db7b..da9fde3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -105,6 +105,7 @@ celerybeat.pid
 
 # Environments
 .env
+.envrc
 .venv
 env/
 venv/
diff --git a/src/ingest_validation_tests/codex_common_errors_validator.py b/src/ingest_validation_tests/codex_common_errors_validator.py
index 391e9cc..5edbec8 100644
--- a/src/ingest_validation_tests/codex_common_errors_validator.py
+++ b/src/ingest_validation_tests/codex_common_errors_validator.py
@@ -20,22 +20,20 @@ def _split_cycle_dir_string(cycle_str):
     """
     Given a cycle-and-region directory name, split out the cyle and region numbers
     """
-    words = cycle_str.split('_')
+    words = cycle_str.split("_")
     assert len(words) >= 2, f'Directory string "{cycle_str}" has unexpected form'
-    assert words[0].startswith('cyc'), (f'directory string "{cycle_str}" does'
-                                        ' not start with "cyc"')
+    assert words[0].startswith("cyc"), (
+        f'directory string "{cycle_str}" does' ' not start with "cyc"'
+    )
     try:
-        cyc_id = int(words[0][len('cyc'):])
+        cyc_id = int(words[0][len("cyc") :])
     except ValueError:
-        raise AssertionError(f'Directory string "{cycle_str}" cycle number is'
-                             ' not an integer')
-    assert words[1].startswith('reg'), (f'Directory string "{cycle_str}" does'
-                                        ' not include "_reg"')
+        raise AssertionError(f'Directory string "{cycle_str}" cycle number is' " not an integer")
+    assert words[1].startswith("reg"), f'Directory string "{cycle_str}" does' ' not include "_reg"'
     try:
-        reg_id = int(words[1][len('reg'):])
+        reg_id = int(words[1][len("reg") :])
     except ValueError:
-        raise AssertionError(f'Directory string "{cycle_str}" region number is'
-                             ' not an integer')
+        raise AssertionError(f'Directory string "{cycle_str}" region number is' " not an integer")
     return cyc_id, reg_id
 
 
@@ -53,7 +51,7 @@ def collect_errors(self, **kwargs) -> List[str]:
         Return the errors found by this validator
         """
         del kwargs
-        if self.assay_type != 'CODEX':
+        if self.assay_type != "CODEX":
             return []  # We only test CODEX data
         rslts = []
         for path in self.paths:
@@ -61,101 +59,95 @@ def collect_errors(self, **kwargs) -> List[str]:
             try:
                 # is the raw/src_ directory present?
                 prefix = None
-                if (path / 'raw').is_dir():
-                    prefix = path / 'raw'
+                if (path / "raw").is_dir():
+                    prefix = path / "raw"
                 else:
-                    for candidate in path.glob('src_*'):
+                    for candidate in path.glob("src_*"):
                         prefix = candidate
                 if prefix is None:
-                    rslt.append('The raw/src_ subdirectory is missing?')
+                    rslt.append("The raw/src_ subdirectory is missing?")
                     raise QuitNowException()
 
                 # Does dataset.json exist?  If so, 'new CODEX' syntax rules
                 # are in effect
                 dataset_json_exists = False
                 any_dataset_json_exists = False
-                for candidate in path.glob('**/dataset.json'):
+                for candidate in path.glob("**/dataset.json"):
                     any_dataset_json_exists = True
-                    if candidate == prefix / 'dataset.json':
+                    if candidate == prefix / "dataset.json":
                         dataset_json_exists = True
                 if dataset_json_exists:
-                    print('FOUND dataset.json; skipping further analysis')
+                    print("FOUND dataset.json; skipping further analysis")
                     raise QuitNowException()
                 elif any_dataset_json_exists:
-                    rslt.append(
-                        'A dataset.json file exists but'
-                        ' is in the wrong place'
-                    )
+                    rslt.append("A dataset.json file exists but" " is in the wrong place")
 
                 # is the segmentation.json file on the right side?
                 found = False
                 right_place = False
-                for filepath in path.glob('*/[Ss]egmentation.json'):
+                for filepath in path.glob("*/[Ss]egmentation.json"):
                     rel_path = filepath.relative_to(path)
                     found = True
-                    if str(rel_path).startswith(('raw', 'src_')):
+                    if str(rel_path).startswith(("raw", "src_")):
                         right_place = True
                 if found:
                     if right_place:
                         pass
                     else:
-                        rslt.append(
-                            'The segmentation.json file is in the wrong subdirectory'
-                        )
+                        rslt.append("The segmentation.json file is in the wrong subdirectory")
                 else:
-                    rslt.append('The segmentation.json file is missing or misplaced')
+                    rslt.append("The segmentation.json file is missing or misplaced")
 
                 # Does the channelnames.txt file exist?
-                channelnames_txt_path = prefix / 'channelnames.txt'
+                channelnames_txt_path = prefix / "channelnames.txt"
                 if not channelnames_txt_path.is_file():
                     # sometimes we see this variant
-                    channelnames_txt_path = prefix / 'channelNames.txt'
+                    channelnames_txt_path = prefix / "channelNames.txt"
                     if not channelnames_txt_path.is_file():
-                        rslt.append('channelnames.txt is missing')
+                        rslt.append("channelnames.txt is missing")
                         raise QuitNowException()
 
                 # Parse channelnames.txt into a dataframe
                 try:
                     cn_df = pd.read_csv(str(channelnames_txt_path), header=None)
                 except Exception:
-                    rslt.append(f'Unexpected error reading {channelnames_txt_path}')
+                    rslt.append(f"Unexpected error reading {channelnames_txt_path}")
                     raise QuitNowException()
                 if len(cn_df.columns) != 1:
-                    rslt.append(f'Unexpected format for {channelnames_txt_path}')
+                    rslt.append(f"Unexpected format for {channelnames_txt_path}")
                     raise QuitNowException()
 
                 # Does the channelnames_report.csv file exist?
-                report_csv_path = prefix / 'channelnames_report.csv'
+                report_csv_path = prefix / "channelnames_report.csv"
                 if report_csv_path.is_file():
                     # Parse channelnames_report.txt into a dataframe
                     try:
-                        rpt_df = pd.read_csv(str(report_csv_path), sep=',', header=None)
+                        rpt_df = pd.read_csv(str(report_csv_path), sep=",", header=None)
                     except Exception:
-                        rslt.append(f'Unexpected error reading {report_csv_path}')
+                        rslt.append(f"Unexpected error reading {report_csv_path}")
                         raise QuitNowException()
                     if len(rpt_df) == len(cn_df) + 1:
                         # channelnames_report.csv appears to have a header
                         try:
-                            rpt_df = pd.read_csv(str(report_csv_path), sep=',')
+                            rpt_df = pd.read_csv(str(report_csv_path), sep=",")
                         except Exception:
-                            rslt.append(f'Unexpected error reading {report_csv_path}')
+                            rslt.append(f"Unexpected error reading {report_csv_path}")
                             raise QuitNowException()
                     if len(rpt_df.columns) != 2:
                         rslt.append(
-                            f'Could not parse {report_csv_path}.'
-                            ' Is it a comma-separated table?'
+                            f"Could not parse {report_csv_path}." " Is it a comma-separated table?"
                         )
                         raise QuitNowException()
                     col_0, col_1 = rpt_df.columns
-                    rpt_df = rpt_df.rename(columns={col_0: 'Marker', col_1: 'Result'})
+                    rpt_df = rpt_df.rename(columns={col_0: "Marker", col_1: "Result"})
                     # Do they match?
-                    rpt_df['other'] = cn_df[0]
-                    mismatches_df = rpt_df[rpt_df['other'] != rpt_df['Marker']]
+                    rpt_df["other"] = cn_df[0]
+                    mismatches_df = rpt_df[rpt_df["other"] != rpt_df["Marker"]]
                     if len(mismatches_df) != 0:
                         for idx, row in mismatches_df.iterrows():
                             rslt.append(
-                                f'{channelnames_txt_path.name} does not'
-                                ' match channelnames_report.txt'
+                                f"{channelnames_txt_path.name} does not"
+                                " match channelnames_report.txt"
                                 f' on line {idx}: {row["other"]} vs {row["Marker"]}'
                             )
                         raise QuitNowException()
@@ -164,7 +156,7 @@ def collect_errors(self, **kwargs) -> List[str]:
 
                 # Tabulate the cycle and region info
                 all_cycle_dirs = []
-                for glob_str in ['cyc*', 'Cyc*']:
+                for glob_str in ["cyc*", "Cyc*"]:
                     for pth in prefix.glob(glob_str):
                         if pth.is_dir():
                             all_cycle_dirs.append(str(pth.stem).lower())
@@ -189,28 +181,26 @@ def collect_errors(self, **kwargs) -> List[str]:
                 failures = []
                 # First cycle must be 1
                 if cycles[0] != 1:
-                    failures.append('Cycle numbering does not start at 1')
+                    failures.append("Cycle numbering does not start at 1")
                 # First region must be 1
                 if regions[0] != 1:
-                    failures.append('Region numbering does not start at 1')
+                    failures.append("Region numbering does not start at 1")
                 # Cycle range must be contiguous ints
                 if cycles != list(range(cycles[0], cycles[-1] + 1)):
-                    failures.append('Cycle numbers are not contiguous')
+                    failures.append("Cycle numbers are not contiguous")
                 # Region range must be contiguous ints
                 if regions != list(range(regions[0], regions[-1] + 1)):
-                    failures.append('Region numbers are not contiguous')
+                    failures.append("Region numbers are not contiguous")
                 # All cycle, region pairs must be present
                 if len(cycles) * len(regions) != total_entries:
-                    failures.append('Not all cycle/region pairs are present')
+                    failures.append("Not all cycle/region pairs are present")
                 # Total number of channels / total number of cycles must be integer,
                 # excluding any HandE channels
                 total_channel_count = len(cn_df)
-                h_and_e_channel_count = len(cn_df[cn_df[0].str.startswith('HandE')])
-                channels_per_cycle = (
-                    total_channel_count - h_and_e_channel_count
-                ) / len(cycles)
+                h_and_e_channel_count = len(cn_df[cn_df[0].str.startswith("HandE")])
+                channels_per_cycle = (total_channel_count - h_and_e_channel_count) / len(cycles)
                 if channels_per_cycle != int(channels_per_cycle):
-                    failures.append('The number of channels per cycle is not constant')
+                    failures.append("The number of channels per cycle is not constant")
                 if failures:
                     rslt += failures
                     raise QuitNowException()
diff --git a/src/ingest_validation_tests/codex_json_validator.py b/src/ingest_validation_tests/codex_json_validator.py
index 1e9e9ba..41a6955 100644
--- a/src/ingest_validation_tests/codex_json_validator.py
+++ b/src/ingest_validation_tests/codex_json_validator.py
@@ -12,19 +12,19 @@ class CodexJsonValidator(Validator):
 
     def collect_errors(self, **kwargs) -> List[str]:
         del kwargs
-        if 'codex' not in self.assay_type.lower():
+        if "codex" not in self.assay_type.lower():
             return []
 
-        schema_path = Path(__file__).parent / 'codex_schema.json'
+        schema_path = Path(__file__).parent / "codex_schema.json"
         schema = json.loads(schema_path.read_text())
 
         rslt = []
-        for glob_expr in ['**/dataset.json']:
+        for glob_expr in ["**/dataset.json"]:
             for path in self.paths:
                 for file in path.glob(glob_expr):
                     instance = json.loads(file.read_text())
                     try:
                         validate(instance=instance, schema=schema)
                     except Exception as e:
-                        rslt.append(f'{file}: {e}')
+                        rslt.append(f"{file}: {e}")
         return rslt
diff --git a/src/ingest_validation_tests/fastq_validator.py b/src/ingest_validation_tests/fastq_validator.py
index 0a30e42..d31335f 100644
--- a/src/ingest_validation_tests/fastq_validator.py
+++ b/src/ingest_validation_tests/fastq_validator.py
@@ -1,8 +1,8 @@
 from os import cpu_count
 from typing import List
 
-from ingest_validation_tools.plugin_validator import Validator
 from fastq_validator_logic import FASTQValidatorLogic, _log
+from ingest_validation_tools.plugin_validator import Validator
 
 
 class FASTQValidator(Validator):
@@ -10,8 +10,8 @@ class FASTQValidator(Validator):
     cost = 15.0
 
     def collect_errors(self, **kwargs) -> List[str]:
-        threads = kwargs.get('coreuse', None) or cpu_count() // 4 or 1
-        _log(f'Threading at {threads}')
+        threads = kwargs.get("coreuse", None) or cpu_count() // 4 or 1
+        _log(f"Threading at {threads}")
         validator = FASTQValidatorLogic(verbose=True)
         validator.validate_fastq_files_in_path(self.paths, threads)
         return validator.errors
diff --git a/src/ingest_validation_tests/fastq_validator_logic.py b/src/ingest_validation_tests/fastq_validator_logic.py
index 85eeb44..8095b74 100644
--- a/src/ingest_validation_tests/fastq_validator_logic.py
+++ b/src/ingest_validation_tests/fastq_validator_logic.py
@@ -15,10 +15,7 @@ def is_valid_filename(filename: str) -> bool:
 
 
 def _open_fastq_file(file: Path) -> TextIO:
-    return (
-        gzip.open(file, 'rt') if file.name.endswith('.gz')
-        else file.open()
-    )
+    return gzip.open(file, "rt") if file.name.endswith(".gz") else file.open()
 
 
 def _log(message: str) -> str:
@@ -54,13 +51,13 @@ class FASTQValidatorLogic:
     contain the same number of symbols as letters in the sequence.
     """
 
-    _FASTQ_LINE_2_VALID_CHARS = 'ACGNT'
+    _FASTQ_LINE_2_VALID_CHARS = "ACGNT"
 
     def __init__(self, verbose=False):
         self.errors: List[str] = []
         self._file_record_counts = Manager().dict()
         self._file_prefix_counts = Manager().dict()
-        self._filename = ''
+        self._filename = ""
         self._line_number = 0
 
         self._verbose = verbose
@@ -80,7 +77,7 @@ def _format_error(self, error: str) -> str:
         return message
 
     def _validate_fastq_line_1(self, line: str) -> List[str]:
-        if not line or line[0] != '@':
+        if not line or line[0] != "@":
             return ["Line does not begin with '@'."]
 
         return []
@@ -89,45 +86,47 @@ def _validate_fastq_line_2(self, line: str) -> List[str]:
         self._line_2_length = len(line)
         self._last_line_2_number = self._line_number
 
-        invalid_chars = ''.join(
-            c for c in line if c not in self._FASTQ_LINE_2_VALID_CHARS)
+        invalid_chars = "".join(c for c in line if c not in self._FASTQ_LINE_2_VALID_CHARS)
         if invalid_chars:
             return [f"Line contains invalid character(s): {invalid_chars}"]
 
         return []
 
     def _validate_fastq_line_3(self, line: str) -> List[str]:
-        if not line or line[0] != '+':
+        if not line or line[0] != "+":
             return ["Line does not begin with '+'."]
 
         return []
 
     def _validate_fastq_line_4(self, line: str) -> List[str]:
         errors: List[str] = []
-        invalid_chars = ''.join(c for c in line if not 33 <= ord(c) <= 126)
+        invalid_chars = "".join(c for c in line if not 33 <= ord(c) <= 126)
         if invalid_chars:
-            errors.append("Line contains invalid quality character(s): "
-                          f'"{invalid_chars}"')
+            errors.append("Line contains invalid quality character(s): " f'"{invalid_chars}"')
 
         if len(line) != self._line_2_length:
-            errors.append(f"Line contains {len(line)} characters which "
-                          f"does not match line {self._last_line_2_number}'s "
-                          f"{self._line_2_length} characters.")
+            errors.append(
+                f"Line contains {len(line)} characters which "
+                f"does not match line {self._last_line_2_number}'s "
+                f"{self._line_2_length} characters."
+            )
         return errors
 
-    _VALIDATE_FASTQ_LINE_METHODS = {1: _validate_fastq_line_1,
-                                    2: _validate_fastq_line_2,
-                                    3: _validate_fastq_line_3,
-                                    4: _validate_fastq_line_4}
+    _VALIDATE_FASTQ_LINE_METHODS = {
+        1: _validate_fastq_line_1,
+        2: _validate_fastq_line_2,
+        3: _validate_fastq_line_3,
+        4: _validate_fastq_line_4,
+    }
 
     def validate_fastq_record(self, line: str, line_number: int) -> List[str]:
         line_index = line_number % 4 + 1
 
-        validator_method: Callable[[FASTQValidatorLogic, str], List[str]] = \
-            self._VALIDATE_FASTQ_LINE_METHODS[line_index]
+        validator_method: Callable[
+            [FASTQValidatorLogic, str], List[str]
+        ] = self._VALIDATE_FASTQ_LINE_METHODS[line_index]
 
-        assert validator_method, \
-            f"No validator method defined for record index {line_index}"
+        assert validator_method, f"No validator method defined for record index {line_index}"
 
         return validator_method(self, line)
 
@@ -139,8 +138,8 @@ def validate_fastq_stream(self, fastq_data: TextIO) -> int:
         for line_count, line in enumerate(fastq_data):
             self._line_number = line_count + 1
             self.errors.extend(
-                self._format_error(error) for error in
-                self.validate_fastq_record(line.rstrip(), line_count)
+                self._format_error(error)
+                for error in self.validate_fastq_record(line.rstrip(), line_count)
             )
 
         return line_count + 1
@@ -151,9 +150,9 @@ def validate_fastq_file(self, fastq_file: Path) -> None:
 
         if not is_valid_filename(fastq_file.name):
             # If we don't like the filename, don't bother reading the contents.
-            self.errors.append(_log(
-                "Filename does not have proper format "
-                "and will not be processed"))
+            self.errors.append(
+                _log("Filename does not have proper format " "and will not be processed")
+            )
             return
 
         self._line_number = 0
@@ -163,12 +162,10 @@ def validate_fastq_file(self, fastq_file: Path) -> None:
             with _open_fastq_file(fastq_file) as fastq_data:
                 records_read = self.validate_fastq_stream(fastq_data)
         except gzip.BadGzipFile:
-            self.errors.append(
-                self._format_error(f"Bad gzip file: {fastq_file}."))
+            self.errors.append(self._format_error(f"Bad gzip file: {fastq_file}."))
             return
         except IOError:
-            self.errors.append(
-                self._format_error(f"Unable to open FASTQ data file {fastq_file}."))
+            self.errors.append(self._format_error(f"Unable to open FASTQ data file {fastq_file}."))
             return
         self._file_record_counts[str(fastq_file)] = records_read
 
@@ -187,7 +184,9 @@ def validate_fastq_files_in_path(self, paths: List[Path], threads: int) -> None:
                     for file in files:
                         file_list.append(Path(path / rel_path / file))
             try:
-                logging.info(f"Passing file list for paths {paths} to engine. File list: {file_list}.")
+                logging.info(
+                    f"Passing file list for paths {paths} to engine. File list: {file_list}."
+                )
                 pool = Pool(threads)
                 engine = Engine(self)
                 data_output = pool.imap_unordered(engine, file_list)
@@ -214,14 +213,17 @@ def _find_duplicates(self, dirs_and_files):
                     files_per_path[filepath.name].append(data_path / sub_path)
             for filename, filepaths in files_per_path.items():
                 if len(filepaths) > 1:
-                    self.errors.append(_log(
-                        f"{filename} has been found multiple times during this validation. Locations of duplicates: {filepaths}."))  # noqa: E501
+                    self.errors.append(
+                        _log(
+                            f"{filename} has been found multiple times during this validation. Locations of duplicates: {filepaths}."  # noqa: E501
+                        )
+                    )
 
     def _find_shared_prefixes(self, lock):
         # This pattern seeks out the string that includes the lane number (since
         # that is expected to be present to help anchor the prefix) that comes
         # before any of _I1, _I2, _R1, or _R2.
-        fastq_file_prefix_regex = re.compile(r'(.+_L\d+.*)_[IR][12][._]')
+        fastq_file_prefix_regex = re.compile(r"(.+_L\d+.*)_[IR][12][._]")
         for fastq_file, records_read in self._file_record_counts.items():
             match = fastq_file_prefix_regex.match(Path(fastq_file).name)
             with lock:
@@ -233,26 +235,31 @@ def _find_shared_prefixes(self, lock):
                             # Find a file we've validated already that matches this
                             # prefix.
                             extant_files = [
-                                str(Path(filepath).name) for filepath, record_count
-                                in self._file_record_counts.items()
-                                if record_count == extant_count and Path(filepath).name.startswith(filename_prefix)
+                                str(Path(filepath).name)
+                                for filepath, record_count in self._file_record_counts.items()
+                                if record_count == extant_count
+                                and Path(filepath).name.startswith(filename_prefix)
                             ]
                             # Based on how the dictionaries are created, there should
                             # always be at least one matching filename.
                             assert extant_files
 
-                            self.errors.append(_log(
-                                f"{Path(fastq_file).name} ({records_read} lines) "
-                                f"does not match length of {extant_files[0]} "
-                                f"({extant_count} lines)."))
+                            self.errors.append(
+                                _log(
+                                    f"{Path(fastq_file).name} ({records_read} lines) "
+                                    f"does not match length of {extant_files[0]} "
+                                    f"({extant_count} lines)."
+                                )
+                            )
                     else:
                         self._file_prefix_counts[filename_prefix] = records_read
 
 
 def main():
-    parser = argparse.ArgumentParser(description='Validate FASTQ files.')
-    parser.add_argument('filepaths', type=Path, nargs='+',
-                        help="Files to validate for FASTQ syntax")
+    parser = argparse.ArgumentParser(description="Validate FASTQ files.")
+    parser.add_argument(
+        "filepaths", type=Path, nargs="+", help="Files to validate for FASTQ syntax"
+    )
 
     args = parser.parse_args()
     if isinstance(args.filepaths, List):
@@ -262,13 +269,11 @@ def main():
     elif isinstance(args.filepaths, str):
         filepaths = [Path(args.filepaths)]
     else:
-        raise Exception(
-            f"Validator init received base_paths arg as type {type(args.filepaths)}"
-        )
+        raise Exception(f"Validator init received base_paths arg as type {type(args.filepaths)}")
 
     validator = FASTQValidatorLogic(True)
     validator.validate_fastq_files_in_path(filepaths, Lock())
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/ingest_validation_tests/gz_validator.py b/src/ingest_validation_tests/gz_validator.py
index 58ad671..9bb0162 100644
--- a/src/ingest_validation_tests/gz_validator.py
+++ b/src/ingest_validation_tests/gz_validator.py
@@ -1,9 +1,9 @@
+import gzip
+import re
 from multiprocessing import Pool
 from os import cpu_count
-import re
 from typing import List
 
-import gzip
 from ingest_validation_tools.plugin_validator import Validator
 
 
diff --git a/src/ingest_validation_tests/publication_validator.py b/src/ingest_validation_tests/publication_validator.py
index 4c7ccc1..528a2b5 100644
--- a/src/ingest_validation_tests/publication_validator.py
+++ b/src/ingest_validation_tests/publication_validator.py
@@ -2,10 +2,11 @@
 Test for some common errors in the directory and file structure of publications.
 """
 
-from typing import List
-import re
 import json
+import re
 from pathlib import Path
+from typing import List
+
 import frontmatter
 from ingest_validation_tools.plugin_validator import Validator
 
@@ -15,50 +16,57 @@ class PublicationValidator(Validator):
     Test for some common errors in the directory and file structure of
     publications.
     """
+
     description = "Test for common problems found in publications"
     cost = 1.0
-    base_url_re = r'(\s*\{\{\s*base_url\s*\}\})/(.*)'
-    url_re = r'[Uu][Rr][Ll]'
+    base_url_re = r"(\s*\{\{\s*base_url\s*\}\})/(.*)"
+    url_re = r"[Uu][Rr][Ll]"
 
     def collect_errors(self, **kwargs) -> List[str]:
         """
         Return the errors found by this validator
         """
         del kwargs
-        if self.assay_type != 'Publication':
+        if self.assay_type != "Publication":
             return []  # We only test Publication data
         rslt = []
         for path in self.paths:
             try:
-                vignette_path = path / 'vignettes'
-                assert vignette_path.is_dir(), 'vignettes not found or not a directory'
-                for this_vignette_path in vignette_path.glob('*'):
-                    assert this_vignette_path.is_dir(), (f"Found the non-dir {this_vignette_path}"
-                                                        " in vignettes")
-                    this_vignette_all_paths = set(this_vignette_path.glob('*'))
+                vignette_path = path / "vignettes"
+                assert vignette_path.is_dir(), "vignettes not found or not a directory"
+                for this_vignette_path in vignette_path.glob("*"):
+                    assert this_vignette_path.is_dir(), (
+                        f"Found the non-dir {this_vignette_path}" " in vignettes"
+                    )
+                    this_vignette_all_paths = set(this_vignette_path.glob("*"))
                     if not all(pth.is_file() for pth in this_vignette_all_paths):
-                        raise AssertionError('Found a subdirectory in a vignette')
+                        raise AssertionError("Found a subdirectory in a vignette")
                     md_found = False
                     vig_figures = []
-                    for md_path in this_vignette_path.glob('*.md'):
+                    for md_path in this_vignette_path.glob("*.md"):
                         if md_found:
-                            raise AssertionError('A vignette has more than one markdown file')
+                            raise AssertionError("A vignette has more than one markdown file")
                         else:
                             md_found = True
                         vig_fm = frontmatter.loads(md_path.read_text())
-                        for key in ['name', 'figures']:
-                            assert key in vig_fm.metadata, ('vignette markdown is incorrectly'
-                                                            f' formatted or has no {key}')
-                        for fig_dict in vig_fm.metadata['figures']:
-                            assert 'file' in fig_dict, 'figure dict does not reference a file'
-                            assert 'name' in fig_dict, 'figure dict does not provide a name'
-                            vig_figures.append(fig_dict['file'])
+                        for key in ["name", "figures"]:
+                            assert key in vig_fm.metadata, (
+                                "vignette markdown is incorrectly" f" formatted or has no {key}"
+                            )
+                        for fig_dict in vig_fm.metadata["figures"]:
+                            assert "file" in fig_dict, "figure dict does not reference a file"
+                            assert "name" in fig_dict, "figure dict does not provide a name"
+                            vig_figures.append(fig_dict["file"])
                         this_vignette_all_paths.remove(md_path)
                         for fname in vig_figures:
-                            rslt.extend(self.validate_vitessce_config(this_vignette_path / fname, path))
+                            rslt.extend(
+                                self.validate_vitessce_config(this_vignette_path / fname, path)
+                            )
                             this_vignette_all_paths.remove(this_vignette_path / fname)
-                    assert not this_vignette_all_paths, ('unexpected files in vignette:'
-                                                        f' {list(str(elt) for elt in this_vignette_all_paths)}')
+                    assert not this_vignette_all_paths, (
+                        "unexpected files in vignette:"
+                        f" {list(str(elt) for elt in this_vignette_all_paths)}"
+                    )
 
             except AssertionError as excp:
                 rslt.append(str(excp))
@@ -94,9 +102,10 @@ def validate_vitessce_config(self, json_path, path):
                     match = re.match(self.base_url_re, val)
                     if match:  # it starts with {{ base_url }}
                         extra_url = match.group(2)
-                        data_path = path / 'data' / extra_url
-                        assert data_path.exists(), ("expected data file"
-                                                    f" {Path('data') / extra_url} is absent")
+                        data_path = path / "data" / extra_url
+                        assert data_path.exists(), (
+                            "expected data file" f" {Path('data') / extra_url} is absent"
+                        )
 
                 except AssertionError as excp:
                     rslt.append(str(excp))
diff --git a/src/ingest_validation_tests/tiff_validator.py b/src/ingest_validation_tests/tiff_validator.py
index db840c1..23b568a 100644
--- a/src/ingest_validation_tests/tiff_validator.py
+++ b/src/ingest_validation_tests/tiff_validator.py
@@ -37,13 +37,15 @@ class TiffValidator(Validator):
     cost = 1.0
 
     def collect_errors(self, **kwargs) -> List[str]:
-        threads = kwargs.get('coreuse', None) or cpu_count() // 4 or 1
+        threads = kwargs.get("coreuse", None) or cpu_count() // 4 or 1
         pool = Pool(threads)
         filenames_to_test = []
-        for glob_expr in ['**/*.tif', '**/*.tiff', '**/*.TIFF', '**/*.TIF']:
+        for glob_expr in ["**/*.tif", "**/*.tiff", "**/*.TIFF", "**/*.TIF"]:
             for path in self.paths:
                 for file in path.glob(glob_expr):
                     filenames_to_test.append(file)
-        return list(rslt for rslt in pool.imap_unordered(_check_tiff_file,
-                                                              filenames_to_test)
-            if rslt is not None)
+        return list(
+            rslt
+            for rslt in pool.imap_unordered(_check_tiff_file, filenames_to_test)
+            if rslt is not None
+        )
diff --git a/tests/pytest_runner.py b/tests/pytest_runner.py
index 0743dad..ac7e5a3 100644
--- a/tests/pytest_runner.py
+++ b/tests/pytest_runner.py
@@ -1,12 +1,15 @@
 import sys
 from pathlib import Path
+
 import pytest
 
-class add_path():
+
+class add_path:
     """
     Add an element to sys.path using a context.
     Thanks to Eugene Yarmash https://stackoverflow.com/a/39855753
     """
+
     def __init__(self, path):
         self.path = path
 
@@ -22,16 +25,13 @@ def __exit__(self, exc_type, exc_value, traceback):
 
 def main():
     if len(sys.argv) != 2:
-        sys.exit(f'usage: {sys.argv[0]} path-to-ingest-validation-tools')
-    tools_path = Path(sys.argv[1]).resolve() / 'src'
-    plugins_path = (Path(__file__).resolve().parent.parent
-                    / 'src'
-                    / 'ingest_validation_tests'
-                    )
+        sys.exit(f"usage: {sys.argv[0]} path-to-ingest-validation-tools")
+    tools_path = Path(sys.argv[1]).resolve() / "src"
+    plugins_path = Path(__file__).resolve().parent.parent / "src" / "ingest_validation_tests"
     with add_path(str(tools_path)):
         with add_path(str(plugins_path)):
-            sys.exit(pytest.main(['-vv']))
+            sys.exit(pytest.main(["-vv"]))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/tests/test_codex_common_errors_validator.py b/tests/test_codex_common_errors_validator.py
index 23cd5e7..68d1d82 100644
--- a/tests/test_codex_common_errors_validator.py
+++ b/tests/test_codex_common_errors_validator.py
@@ -1,53 +1,84 @@
-from pathlib import Path
 import zipfile
+from pathlib import Path
 
 import pytest
 
-@pytest.mark.parametrize(('test_data_fname', 'msg_starts_list'), (
-    ('test_data/fake_codex_tree_0.zip', ['Unexpected error reading']),
-    ('test_data/fake_codex_tree_1.zip', ['The segmentation.json file is in',
-                                         'Unexpected error reading']),
-    ('test_data/fake_codex_tree_2.zip', ['The raw/src_ subdirectory is missing?']),
-    ('test_data/fake_codex_tree_3.zip', ['channelnames.txt is missing']),
-    ('test_data/fake_codex_tree_4.zip', ['Unexpected error reading']),
-    ('test_data/fake_codex_tree_5.zip', ['channelnames.txt does not match channelnames_report.txt'
-                                         ' on line 1: HLADR vs HLA-DR',
-                                         'channelnames.txt does not match channelnames_report.txt'
-                                         ' on line 6: Empty vs Blank']),
-    ('test_data/fake_codex_tree_6.zip', ['Could not parse ']),
-    ('test_data/fake_codex_tree_7.zip', []),
-    ('test_data/fake_codex_tree_8.zip', ['Region numbers are not contiguous']),
-    ('test_data/fake_codex_tree_9.zip', ['Cycle numbers are not contiguous',
-                                         'The number of channels per cycle is not constant']),
-    ('test_data/fake_codex_tree_10.zip', ['Directory string "cyc0a3_reg001_211119_040351"'
-                                          ' cycle number is not an integer']),
-    ('test_data/fake_codex_tree_11.zip', ['Directory string "cyc003_reg0a1_211119_040351"'
-                                          ' region number is not an integer']),
-    ('test_data/fake_codex_tree_12.zip', ['Directory string "cyc002_rig001_211119_040351"'
-                                          ' does not include "_reg"']),
-    ('test_data/fake_codex_tree_13.zip', ['Cycle numbering does not start at 1']),
-    ('test_data/fake_codex_tree_14.zip', ['Region numbering does not start at 1']),
-    ('test_data/fake_codex_tree_15.zip', ['Not all cycle/region pairs are present',
-                                          'The number of channels per cycle is not constant']),
-    ('test_data/fake_codex_tree_16.zip', []),
-    ('test_data/fake_codex_tree_17.zip', ['A dataset.json file exists but is in the wrong place',
-                                          'Region numbering does not start at 1']),
-    ('test_data/fake_codex_tree_18.zip', ['The number of channels per cycle is not constant']),
-    ('test_data/fake_codex_tree_19.zip', []),
-    ))
+
+@pytest.mark.parametrize(
+    ("test_data_fname", "msg_starts_list"),
+    (
+        ("test_data/fake_codex_tree_0.zip", ["Unexpected error reading"]),
+        (
+            "test_data/fake_codex_tree_1.zip",
+            ["The segmentation.json file is in", "Unexpected error reading"],
+        ),
+        ("test_data/fake_codex_tree_2.zip", ["The raw/src_ subdirectory is missing?"]),
+        ("test_data/fake_codex_tree_3.zip", ["channelnames.txt is missing"]),
+        ("test_data/fake_codex_tree_4.zip", ["Unexpected error reading"]),
+        (
+            "test_data/fake_codex_tree_5.zip",
+            [
+                "channelnames.txt does not match channelnames_report.txt"
+                " on line 1: HLADR vs HLA-DR",
+                "channelnames.txt does not match channelnames_report.txt"
+                " on line 6: Empty vs Blank",
+            ],
+        ),
+        ("test_data/fake_codex_tree_6.zip", ["Could not parse "]),
+        ("test_data/fake_codex_tree_7.zip", []),
+        ("test_data/fake_codex_tree_8.zip", ["Region numbers are not contiguous"]),
+        (
+            "test_data/fake_codex_tree_9.zip",
+            [
+                "Cycle numbers are not contiguous",
+                "The number of channels per cycle is not constant",
+            ],
+        ),
+        (
+            "test_data/fake_codex_tree_10.zip",
+            ['Directory string "cyc0a3_reg001_211119_040351"' " cycle number is not an integer"],
+        ),
+        (
+            "test_data/fake_codex_tree_11.zip",
+            ['Directory string "cyc003_reg0a1_211119_040351"' " region number is not an integer"],
+        ),
+        (
+            "test_data/fake_codex_tree_12.zip",
+            ['Directory string "cyc002_rig001_211119_040351"' ' does not include "_reg"'],
+        ),
+        ("test_data/fake_codex_tree_13.zip", ["Cycle numbering does not start at 1"]),
+        ("test_data/fake_codex_tree_14.zip", ["Region numbering does not start at 1"]),
+        (
+            "test_data/fake_codex_tree_15.zip",
+            [
+                "Not all cycle/region pairs are present",
+                "The number of channels per cycle is not constant",
+            ],
+        ),
+        ("test_data/fake_codex_tree_16.zip", []),
+        (
+            "test_data/fake_codex_tree_17.zip",
+            [
+                "A dataset.json file exists but is in the wrong place",
+                "Region numbering does not start at 1",
+            ],
+        ),
+        ("test_data/fake_codex_tree_18.zip", ["The number of channels per cycle is not constant"]),
+        ("test_data/fake_codex_tree_19.zip", []),
+    ),
+)
 def test_codex_common_errors_validator(test_data_fname, msg_starts_list, tmp_path):
     from codex_common_errors_validator import CodexCommonErrorsValidator
+
     test_data_path = Path(test_data_fname)
     zfile = zipfile.ZipFile(test_data_path)
     zfile.extractall(tmp_path)
-    validator = CodexCommonErrorsValidator([Path(tmp_path / test_data_path.stem)],
-                                           'CODEX'
-                                           )
+    validator = CodexCommonErrorsValidator([Path(tmp_path / test_data_path.stem)], "CODEX")
     errors = validator.collect_errors()[:]
-    print(f'ERRORS FOLLOW FOR {test_data_fname}')
+    print(f"ERRORS FOLLOW FOR {test_data_fname}")
     for err in errors:
         print(err)
-    print('ERRORS ABOVE')
+    print("ERRORS ABOVE")
     assert len(msg_starts_list) == len(errors)
     for err_str, expected_str in zip(errors, msg_starts_list):
         assert err_str.startswith(expected_str)
diff --git a/tests/test_codex_json_validator.py b/tests/test_codex_json_validator.py
index a2998ff..4539270 100644
--- a/tests/test_codex_json_validator.py
+++ b/tests/test_codex_json_validator.py
@@ -1,22 +1,27 @@
-from pathlib import Path
-import zipfile
 import re
+import zipfile
+from pathlib import Path
 
 import pytest
 
-@pytest.mark.parametrize(('test_data_fname', 'msg_re_list'), (
-    ('test_data/good_codex_akoya_directory_v1_with_dataset_json_fails.zip',
-     [".*is not of type 'object'.*"]),
-    ('test_data/good_codex_akoya_directory_v1_with_dataset_json_passes.zip', []),
-    ))
+
+@pytest.mark.parametrize(
+    ("test_data_fname", "msg_re_list"),
+    (
+        (
+            "test_data/good_codex_akoya_directory_v1_with_dataset_json_fails.zip",
+            [".*is not of type 'object'.*"],
+        ),
+        ("test_data/good_codex_akoya_directory_v1_with_dataset_json_passes.zip", []),
+    ),
+)
 def test_codex_json_validator(test_data_fname, msg_re_list, tmp_path):
     from codex_json_validator import CodexJsonValidator
+
     test_data_path = Path(test_data_fname)
     zfile = zipfile.ZipFile(test_data_path)
     zfile.extractall(tmp_path)
-    validator = CodexJsonValidator(tmp_path / test_data_path.stem,
-                                   'CODEX'
-                                   )
+    validator = CodexJsonValidator(tmp_path / test_data_path.stem, "CODEX")
     errors = validator.collect_errors()[:]
     assert len(msg_re_list) == len(errors)
     for err_str, expected_re in zip(errors, msg_re_list):
diff --git a/tests/test_fastq_validator_logic.py b/tests/test_fastq_validator_logic.py
index a244cd8..c554d6f 100644
--- a/tests/test_fastq_validator_logic.py
+++ b/tests/test_fastq_validator_logic.py
@@ -1,36 +1,30 @@
-from multiprocessing import Lock
+import gzip
 from pathlib import Path
 from typing import TextIO
 
-import gzip
 import pytest
+from src.ingest_validation_tests.fastq_validator_logic import FASTQValidatorLogic
 
-from src.ingest_validation_tests.fastq_validator_logic import \
-    FASTQValidatorLogic
-
-_GOOD_RECORDS = '''\
+_GOOD_RECORDS = """\
 @A12345:123:A12BCDEFG:1:1234:1000:1234 1:N:0:NACTGACTGA+CTGACTGACT
 NACTGACTGA
 +
 #FFFFFFFFF
-'''
+"""
 
 _GOOD_QUALITY_RECORD = (
-    '!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-    r'[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'
+    "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    r"[\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
 )
 
 _GOOD_SEQUENCE_FOR_QUALITY = (
-    'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
-    'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
+    "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+    "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
 )
 
 
 def _open_output_file(filename: Path, use_gzip: bool) -> TextIO:
-    return (
-        gzip.open(filename, 'wt') if use_gzip
-        else open(filename, 'wt')
-    )
+    return gzip.open(filename, "wt") if use_gzip else open(filename, "wt")
 
 
 class TestFASTQValidatorLogic:
@@ -46,33 +40,29 @@ def test_fastq_validator_no_files(self, fastq_validator, tmp_path):
     def test_fastq_validator_bad_gzip_data(self, fastq_validator, tmp_path):
         # Note that the filename ends in .gz, although it will not contain
         # compressed data.
-        test_file = tmp_path.joinpath('test.fastq.gz')
+        test_file = tmp_path.joinpath("test.fastq.gz")
         with _open_output_file(test_file, False) as output:
             output.write(_GOOD_RECORDS)
 
         fastq_validator.validate_fastq_file(test_file)
         assert "Bad gzip file" in fastq_validator.errors[0]
 
-    def test_fastq_validator_unrecognized_file(self, fastq_validator,
-                                               tmp_path):
-        test_file = tmp_path.joinpath('test.txt')
+    def test_fastq_validator_unrecognized_file(self, fastq_validator, tmp_path):
+        test_file = tmp_path.joinpath("test.txt")
         with _open_output_file(test_file, False) as output:
             output.write(_GOOD_RECORDS)
 
         fastq_validator.validate_fastq_file(test_file)
-        assert "Filename does not have proper format" in \
-               fastq_validator.errors[0]
+        assert "Filename does not have proper format" in fastq_validator.errors[0]
 
-    def test_fastq_validator_empty_directory(self, fastq_validator,
-                                             tmp_path):
+    def test_fastq_validator_empty_directory(self, fastq_validator, tmp_path):
         fastq_validator.validate_fastq_files_in_path([tmp_path], 2)
         # No files in path means no errors
         assert fastq_validator.errors == []
 
     @pytest.mark.parametrize("use_gzip", [False, True])
     def test_fastq_validator_basic(self, fastq_validator, tmp_path, use_gzip):
-        test_file = tmp_path.joinpath('test.fastq.gz' if use_gzip
-                                      else 'test.fastq')
+        test_file = tmp_path.joinpath("test.fastq.gz" if use_gzip else "test.fastq")
         with _open_output_file(test_file, use_gzip) as output:
             output.write(_GOOD_RECORDS)
 
@@ -80,9 +70,9 @@ def test_fastq_validator_basic(self, fastq_validator, tmp_path, use_gzip):
         assert not fastq_validator.errors
 
     def test_fastq_validator_bad_file(self, fastq_validator, tmp_path):
-        test_file = tmp_path.joinpath('test.fastq')
+        test_file = tmp_path.joinpath("test.fastq")
         with _open_output_file(test_file, False) as output:
-            output.write('ABCDEF')
+            output.write("ABCDEF")
 
         fastq_validator.validate_fastq_files_in_path([tmp_path], 2)
 
@@ -91,56 +81,54 @@ def test_fastq_validator_bad_file(self, fastq_validator, tmp_path):
         assert fastq_validator.errors
 
     def test_fastq_validator_duplicate_file(self, fastq_validator, tmp_path):
-        for subdirectory in ['a', 'b']:
+        for subdirectory in ["a", "b"]:
             subdirectory_path = tmp_path.joinpath(subdirectory)
             subdirectory_path.mkdir()
-            with _open_output_file(subdirectory_path.joinpath('test.fastq'),
-                                   False) as output:
+            with _open_output_file(subdirectory_path.joinpath("test.fastq"), False) as output:
                 output.write(_GOOD_RECORDS)
 
         fastq_validator.validate_fastq_files_in_path([tmp_path], 2)
-        assert "test.fastq has been found multiple times" in \
-               fastq_validator.errors[0]
+        assert "test.fastq has been found multiple times" in fastq_validator.errors[0]
 
     def test_fastq_validator_io_error(self, fastq_validator, tmp_path):
-        fake_path = tmp_path.joinpath('does-not-exist.fastq')
+        fake_path = tmp_path.joinpath("does-not-exist.fastq")
 
         fastq_validator.validate_fastq_file(fake_path)
 
         assert "Unable to open" in fastq_validator.errors[0]
 
     def test_fastq_validator_line_1_good(self, fastq_validator):
-        result = fastq_validator.validate_fastq_record('@SEQ_ID', 0)
+        result = fastq_validator.validate_fastq_record("@SEQ_ID", 0)
 
         assert not result
 
     def test_fastq_validator_line_1_bad(self, fastq_validator):
-        result = fastq_validator.validate_fastq_record('*SEQ_ID', 0)
+        result = fastq_validator.validate_fastq_record("*SEQ_ID", 0)
 
         assert "does not begin with '@'" in result[0]
 
     def test_fastq_validator_line_1_empty(self, fastq_validator):
-        result = fastq_validator.validate_fastq_record('', 0)
+        result = fastq_validator.validate_fastq_record("", 0)
 
         assert "does not begin with '@'" in result[0]
 
     def test_fastq_validator_line_2_good(self, fastq_validator):
-        result = fastq_validator.validate_fastq_record('ACTGACTGACTGNNNN', 1)
+        result = fastq_validator.validate_fastq_record("ACTGACTGACTGNNNN", 1)
 
         assert not result
 
     def test_fastq_validator_line_2_bad(self, fastq_validator):
-        result = fastq_validator.validate_fastq_record('ACTGACT$ACTGNNNN', 1)
+        result = fastq_validator.validate_fastq_record("ACTGACT$ACTGNNNN", 1)
 
         assert "contains invalid character(s): $" in result[0]
 
     def test_fastq_validator_line_3_good(self, fastq_validator):
-        result = fastq_validator.validate_fastq_record('+SEQ_ID', 2)
+        result = fastq_validator.validate_fastq_record("+SEQ_ID", 2)
 
         assert not result
 
     def test_fastq_validator_line_3_bad(self, fastq_validator):
-        result = fastq_validator.validate_fastq_record('!SEQ_ID', 2)
+        result = fastq_validator.validate_fastq_record("!SEQ_ID", 2)
 
         assert "does not begin with '+'" in result[0]
 
@@ -151,42 +139,41 @@ def test_fastq_validator_line_4_good(self, fastq_validator):
         assert not result
 
     def test_fastq_validator_line_4_bad(self, fastq_validator):
-        fastq_validator.validate_fastq_record('1234567', 1)
-        result = fastq_validator.validate_fastq_record('ABC !@#', 3)
+        fastq_validator.validate_fastq_record("1234567", 1)
+        result = fastq_validator.validate_fastq_record("ABC !@#", 3)
 
         assert 'contains invalid quality character(s): " "' in result[0]
 
     def test_fastq_validator_line_4_matching_length(self, fastq_validator):
-        fastq_validator.validate_fastq_record('1234567', 1)
-        result = fastq_validator.validate_fastq_record('ABCDEFG', 3)
+        fastq_validator.validate_fastq_record("1234567", 1)
+        result = fastq_validator.validate_fastq_record("ABCDEFG", 3)
 
         assert not result
 
-    def test_fastq_validator_line_4_mismatched_length(self, fastq_validator,
-                                                      tmp_path):
-        fastq_validator.validate_fastq_record('123456789ABCDEF', 1)
-        fastq_validator.validate_fastq_record('ABC', 3)
+    def test_fastq_validator_line_4_mismatched_length(self, fastq_validator, tmp_path):
+        fastq_validator.validate_fastq_record("123456789ABCDEF", 1)
+        fastq_validator.validate_fastq_record("ABC", 3)
 
-        test_data = '''\
+        test_data = """\
 @A12345:123:A12BCDEFG:1:1234:1000:1234 1:N:0:NACTGACTGA+CTGACTGACT
 NACTGACTGA
 +
 #FFFFFFFF
-'''
+"""
 
-        new_file = tmp_path.joinpath('test.fastq')
+        new_file = tmp_path.joinpath("test.fastq")
         with _open_output_file(new_file, False) as output:
             output.write(test_data)
 
         fastq_validator.validate_fastq_file(new_file)
-        assert "contains 9 characters which does not match line 2's 10" in \
-               fastq_validator.errors[0]
+        assert (
+            "contains 9 characters which does not match line 2's 10" in fastq_validator.errors[0]
+        )
 
-    def test_fastq_validator_record_counts_good(self, fastq_validator,
-                                                tmp_path):
+    def test_fastq_validator_record_counts_good(self, fastq_validator, tmp_path):
         for filename in [
-            'SREQ-1_1-ACTGACTGAC-TGACTGACTG_S1_L001_I1_001.fastq',
-            'SREQ-1_1-ACTGACTGAC-TGACTGACTG_S1_L001_I2_001.fastq'
+            "SREQ-1_1-ACTGACTGAC-TGACTGACTG_S1_L001_I1_001.fastq",
+            "SREQ-1_1-ACTGACTGAC-TGACTGACTG_S1_L001_I2_001.fastq",
         ]:
             new_file = tmp_path.joinpath(filename)
             with _open_output_file(new_file, False) as output:
@@ -196,15 +183,14 @@ def test_fastq_validator_record_counts_good(self, fastq_validator,
 
         assert not fastq_validator.errors
 
-    def test_fastq_validator_record_counts_bad(self, fastq_validator,
-                                               tmp_path):
-        with _open_output_file(tmp_path.joinpath(
-                'SREQ-1_1-ACTGACTGAC-TGACTGACTG_S1_L001_I1_001.fastq'),
-                False) as output:
+    def test_fastq_validator_record_counts_bad(self, fastq_validator, tmp_path):
+        with _open_output_file(
+            tmp_path.joinpath("SREQ-1_1-ACTGACTGAC-TGACTGACTG_S1_L001_I1_001.fastq"), False
+        ) as output:
             output.write(_GOOD_RECORDS)
-        with _open_output_file(tmp_path.joinpath(
-                'SREQ-1_1-ACTGACTGAC-TGACTGACTG_S1_L001_I2_001.fastq'),
-                False) as output:
+        with _open_output_file(
+            tmp_path.joinpath("SREQ-1_1-ACTGACTGAC-TGACTGACTG_S1_L001_I2_001.fastq"), False
+        ) as output:
             output.write(_GOOD_RECORDS)
             output.write(_GOOD_RECORDS)
 
diff --git a/tests/test_gz_validator.py b/tests/test_gz_validator.py
index 0c6c8b2..ad748c4 100644
--- a/tests/test_gz_validator.py
+++ b/tests/test_gz_validator.py
@@ -1,19 +1,24 @@
-from pathlib import Path
-import zipfile
 import re
+import zipfile
+from pathlib import Path
 
 import pytest
 
-@pytest.mark.parametrize(('test_data_fname', 'msg_re_list'), (
-    ('test_data/fake_snrnaseq_tree_good.zip', []),
-    ('test_data/fake_snrnaseq_tree_bad.zip', ['.*text2.txt.gz is not a valid gzipped file']),
-    ))
+
+@pytest.mark.parametrize(
+    ("test_data_fname", "msg_re_list"),
+    (
+        ("test_data/fake_snrnaseq_tree_good.zip", []),
+        ("test_data/fake_snrnaseq_tree_bad.zip", [".*text2.txt.gz is not a valid gzipped file"]),
+    ),
+)
 def test_gz_validator(test_data_fname, msg_re_list, tmp_path):
     from gz_validator import GZValidator
+
     test_data_path = Path(test_data_fname)
     zfile = zipfile.ZipFile(test_data_path)
     zfile.extractall(tmp_path)
-    validator = GZValidator(tmp_path / test_data_path.stem, 'snRNAseq')
+    validator = GZValidator(tmp_path / test_data_path.stem, "snRNAseq")
     errors = validator.collect_errors(coreuse=4)[:]
     assert len(msg_re_list) == len(errors)
     for err_str, re_str in zip(errors, msg_re_list):
diff --git a/tests/test_ome_tiff_validator.py b/tests/test_ome_tiff_validator.py
index 09e2163..89ad198 100644
--- a/tests/test_ome_tiff_validator.py
+++ b/tests/test_ome_tiff_validator.py
@@ -1,20 +1,27 @@
-from pathlib import Path
-import zipfile
 import re
+import zipfile
+from pathlib import Path
 
 import pytest
 
-@pytest.mark.parametrize(('test_data_fname', 'msg_re_list'), (
-    ('test_data/codex_tree_ometiff_bad.zip',
-     ['.*tubhiswt_C0_bad.ome.tif is not a valid OME.TIFF file.*']),
-    ('test_data/codex_tree_ometiff_good.zip',[]),
-    ))
+
+@pytest.mark.parametrize(
+    ("test_data_fname", "msg_re_list"),
+    (
+        (
+            "test_data/codex_tree_ometiff_bad.zip",
+            [".*tubhiswt_C0_bad.ome.tif is not a valid OME.TIFF file.*"],
+        ),
+        ("test_data/codex_tree_ometiff_good.zip", []),
+    ),
+)
 def test_ome_tiff_validator(test_data_fname, msg_re_list, tmp_path):
     from ome_tiff_validator import OmeTiffValidator
+
     test_data_path = Path(test_data_fname)
     zfile = zipfile.ZipFile(test_data_path)
     zfile.extractall(tmp_path)
-    validator = OmeTiffValidator(tmp_path / test_data_path.stem, 'CODEX')
+    validator = OmeTiffValidator(tmp_path / test_data_path.stem, "CODEX")
     errors = validator.collect_errors(coreuse=4)[:]
     assert len(msg_re_list) == len(errors)
     for err_str, re_str in zip(errors, msg_re_list):
diff --git a/tests/test_publication_validator.py b/tests/test_publication_validator.py
index ebb768c..50c3a0e 100644
--- a/tests/test_publication_validator.py
+++ b/tests/test_publication_validator.py
@@ -1,34 +1,46 @@
-from pathlib import Path
-import zipfile
 import re
+import zipfile
+from pathlib import Path
 
 import pytest
 
-@pytest.mark.parametrize(('test_data_fname', 'msg_re_list'), (
-    ('test_data/publication_tree_good.zip', []),
-    ('test_data/publication_tree_good_complex.zip', []),
-    ('test_data/publication_tree_bad_complex.zip',
-     [
-         'expected data file data/vignette_12/A/0/325b936e-4132-45fe-8674-9abbde568be8 is absent',
-         'expected data file data/vignette_12/A/0/9db02302-07d9-4c54-ad45-4578c4822cce is absent',
-         'expected data file data/vignette_12/A/1/90b3667d-3ccc-4241-9227-fee578d41bac is absent',
-     ]),
-    ('test_data/publication_tree_bad_1.zip', ['vignettes not found or not a directory']),
-    ('test_data/publication_tree_bad_2.zip', ['Found a subdirectory in a vignette']),
-    ('test_data/publication_tree_bad_3.zip', ['A vignette has more than one markdown file']),
-    ('test_data/publication_tree_bad_4.zip', ['figure dict does not provide a name']),
-    ('test_data/publication_tree_bad_5.zip', ['figure dict does not reference a file']),
-    ('test_data/publication_tree_bad_6.zip', ['unexpected files in vignette.*']),
-    ('test_data/publication_tree_bad_7.zip', ['expected data file'
-                                              ' data/codeluppi_2018_nature_methods.molecules.h5ad.zarr'
-                                              ' is absent']),
-    ))
+
+@pytest.mark.parametrize(
+    ("test_data_fname", "msg_re_list"),
+    (
+        ("test_data/publication_tree_good.zip", []),
+        ("test_data/publication_tree_good_complex.zip", []),
+        (
+            "test_data/publication_tree_bad_complex.zip",
+            [
+                "expected data file data/vignette_12/A/0/325b936e-4132-45fe-8674-9abbde568be8 is absent",  # noqa: E501
+                "expected data file data/vignette_12/A/0/9db02302-07d9-4c54-ad45-4578c4822cce is absent",  # noqa: E501
+                "expected data file data/vignette_12/A/1/90b3667d-3ccc-4241-9227-fee578d41bac is absent",  # noqa: E501
+            ],
+        ),
+        ("test_data/publication_tree_bad_1.zip", ["vignettes not found or not a directory"]),
+        ("test_data/publication_tree_bad_2.zip", ["Found a subdirectory in a vignette"]),
+        ("test_data/publication_tree_bad_3.zip", ["A vignette has more than one markdown file"]),
+        ("test_data/publication_tree_bad_4.zip", ["figure dict does not provide a name"]),
+        ("test_data/publication_tree_bad_5.zip", ["figure dict does not reference a file"]),
+        ("test_data/publication_tree_bad_6.zip", ["unexpected files in vignette.*"]),
+        (
+            "test_data/publication_tree_bad_7.zip",
+            [
+                "expected data file"
+                " data/codeluppi_2018_nature_methods.molecules.h5ad.zarr"
+                " is absent"
+            ],
+        ),
+    ),
+)
 def test_publication_validator(test_data_fname, msg_re_list, tmp_path):
     from publication_validator import PublicationValidator
+
     test_data_path = Path(test_data_fname)
     zfile = zipfile.ZipFile(test_data_path)
     zfile.extractall(tmp_path)
-    validator = PublicationValidator(tmp_path / test_data_path.stem, 'Publication')
+    validator = PublicationValidator(tmp_path / test_data_path.stem, "Publication")
     errors = validator.collect_errors(coreuse=4)[:]
     print(f"errors: {errors}")
     matched_err_str_list = []
diff --git a/tests/test_tiff_validator.py b/tests/test_tiff_validator.py
index c2d271b..4ebe66c 100644
--- a/tests/test_tiff_validator.py
+++ b/tests/test_tiff_validator.py
@@ -1,24 +1,32 @@
-from pathlib import Path
-import zipfile
 import re
+import zipfile
+from pathlib import Path
 
 import pytest
 
-@pytest.mark.parametrize(('test_data_fname', 'msg_re_list'), (
-    ('test_data/tiff_tree_good.zip', []),
-    ('test_data/tiff_tree_bad.zip', [
-        '.*notatiff.tif is not a valid TIFF file',
-        '.*notatiff.tiff is not a valid TIFF file',
-        '.*notatiff.TIFF is not a valid TIFF file',
-        '.*notatiff.TIF is not a valid TIFF file',
-    ]),
-    ))
+
+@pytest.mark.parametrize(
+    ("test_data_fname", "msg_re_list"),
+    (
+        ("test_data/tiff_tree_good.zip", []),
+        (
+            "test_data/tiff_tree_bad.zip",
+            [
+                ".*notatiff.tif is not a valid TIFF file",
+                ".*notatiff.tiff is not a valid TIFF file",
+                ".*notatiff.TIFF is not a valid TIFF file",
+                ".*notatiff.TIF is not a valid TIFF file",
+            ],
+        ),
+    ),
+)
 def test_tiff_validator(test_data_fname, msg_re_list, tmp_path):
     from tiff_validator import TiffValidator
+
     test_data_path = Path(test_data_fname)
     zfile = zipfile.ZipFile(test_data_path)
     zfile.extractall(tmp_path)
-    validator = TiffValidator(tmp_path / test_data_path.stem, 'codex')
+    validator = TiffValidator(tmp_path / test_data_path.stem, "codex")
     errors = validator.collect_errors(coreuse=4)[:]
     print(f"errors: {errors}")
     matched_err_str_list = []

From 174aae05eca907b1f24589040b2a5e300838c704 Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 12:40:50 -0500
Subject: [PATCH 04/19] testing black gh action

---
 .github/workflows/black.yml | 14 +++++++++++++
 README.md                   | 42 +++++++++++++++++++++++++++++++++++--
 requirements-dev.txt        |  2 +-
 3 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/black.yml

diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
new file mode 100644
index 0000000..a4a0460
--- /dev/null
+++ b/.github/workflows/black.yml
@@ -0,0 +1,14 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: psf/black@stable
+        with:
+          options: "--check --verbose --line-length 99"
+          src: "./src"
+          version: "~= 24.0"
diff --git a/README.md b/README.md
index 2db444a..033f5b4 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,47 @@
 # ingest-validation-tests
 
-This repository contains plug-in tests for use during validation of submissions.  It is referenced by ingest-validation-tools.
+This repository contains plug-in tests for use during validation of submissions. It is referenced by ingest-validation-tools.
 
 ## Development process
 
+### Branches
+
 - Make new feature branches from `devel`.
 - Make PRs to `devel`. (This is the default branch.)
-- The last reviewer to approve a PR should merge it.  At the moment that is likely to be @jswelling .
+- The last reviewer to approve a PR should merge it.
+
+### Setup
+
+- Creating and activating a virtual environment is recommended. These instructions assume you are using a virtual environment. Example using venv:
+
+```
+python3.9 -m venv hm-ingest-validation-tests
+source hm-ingest-validation-tests/bin/activate
+```
+
+- Run `pip install -r requirements-dev.txt`
+- (optional) Integrate black with your editor.
+  - [Instructions for black.](https://black.readthedocs.io/en/stable/integrations/editors.html)
+  - If you choose not to integrate black with your editor, run the following from the base `ingest-validation-tests` directory before pushing code to GitHub: `black --line-length 99 .`
+
+### Testing
+
+- If ingest-validation-tools is not already set up:
+
+```
+# Starting from ingest-validation-tests...
+cd ..
+git clone https://github.com/hubmapconsortium/ingest-validation-tools.git
+cd ingest-validation-tests
+pip install -r ../ingest-validation-tools/requirements.txt
+pip install -r ../ingest-validation-tools/requirements-dev.txt
+```
+
+- If ingest-validation-tools is already set up, add the appropriate ingest-validation-tools path and run
+
+```
+pip install -r <path-to-ingest-validation-tools>/requirements.txt
+pip install -r <path-to-ingest-validation-tools>/requirements-dev.txt
+```
+
+- Run `test.sh`
diff --git a/requirements-dev.txt b/requirements-dev.txt
index d5c087a..2cd9d49 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,10 +1,10 @@
 black==23.12.1
-flake8==7.0.0
 git+https://github.com/hubmapconsortium/fastq-utils.git@v0.2.5#egg=hubmap-fastq-utils
 imagecodecs>=2023.3.16
 isort==5.13.2
 jsonschema==4.4.0
 pandas>=1.2.0
+pytest==8.0.0
 python-frontmatter>=1.0.0
 tifffile==2020.10.1
 xmlschema>=1.6

From 0dd0cb643a841301b7f1acea6e7c1891bfa70af6 Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 12:53:29 -0500
Subject: [PATCH 05/19] linting change

---
 src/ingest_validation_tests/fastq_validator_logic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/ingest_validation_tests/fastq_validator_logic.py b/src/ingest_validation_tests/fastq_validator_logic.py
index 8095b74..440ceb4 100644
--- a/src/ingest_validation_tests/fastq_validator_logic.py
+++ b/src/ingest_validation_tests/fastq_validator_logic.py
@@ -122,9 +122,9 @@ def _validate_fastq_line_4(self, line: str) -> List[str]:
     def validate_fastq_record(self, line: str, line_number: int) -> List[str]:
         line_index = line_number % 4 + 1
 
-        validator_method: Callable[
-            [FASTQValidatorLogic, str], List[str]
-        ] = self._VALIDATE_FASTQ_LINE_METHODS[line_index]
+        validator_method: Callable[[FASTQValidatorLogic, str], List[str]] = (
+            self._VALIDATE_FASTQ_LINE_METHODS[line_index]
+        )
 
         assert validator_method, f"No validator method defined for record index {line_index}"
 

From 303e87d8179cceb5b310d0faa64f76726b213d69 Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 13:01:40 -0500
Subject: [PATCH 06/19] testing update to dismiss error about node version

---
 .github/workflows/black.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
index a4a0460..d8d7e0b 100644
--- a/.github/workflows/black.yml
+++ b/.github/workflows/black.yml
@@ -6,7 +6,7 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: psf/black@stable
         with:
           options: "--check --verbose --line-length 99"

From edac78ced52c81068baa4504558d7bdd9e5eeeed Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 13:06:05 -0500
Subject: [PATCH 07/19] trying isort gh action

---
 .github/workflows/isort.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .github/workflows/isort.yml

diff --git a/.github/workflows/isort.yml b/.github/workflows/isort.yml
new file mode 100644
index 0000000..7bfa84f
--- /dev/null
+++ b/.github/workflows/isort.yml
@@ -0,0 +1,13 @@
+name: Run isort
+on:
+  - push
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: isort/isort-action@v1
+        with:
+          configuration: "--profile black"
+          requirements-files: "requirements.txt requirements-dev.txt"

From 9d368d12c070eddb93dc35d96786b4a7740edd3f Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 13:49:30 -0500
Subject: [PATCH 08/19] testing isort 3

---
 .github/workflows/isort.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/isort.yml b/.github/workflows/isort.yml
index 7bfa84f..fdf5e4a 100644
--- a/.github/workflows/isort.yml
+++ b/.github/workflows/isort.yml
@@ -7,7 +7,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: isort/isort-action@v1
+      - uses: actions/setup-python@v5
         with:
-          configuration: "--profile black"
-          requirements-files: "requirements.txt requirements-dev.txt"
+          python-version: 3.9
+      - uses: isort/isort-action@master
+        with:
+            configuration: "--profile black"
+            requirementsFiles: "requirements.txt requirements-test.txt"

From 2f6f1d9cbc2070e63565625537acf7f013b97f2c Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 13:51:03 -0500
Subject: [PATCH 09/19] testing isort 4

---
 .github/workflows/isort.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/isort.yml b/.github/workflows/isort.yml
index fdf5e4a..52f9cf5 100644
--- a/.github/workflows/isort.yml
+++ b/.github/workflows/isort.yml
@@ -12,5 +12,5 @@ jobs:
           python-version: 3.9
       - uses: isort/isort-action@master
         with:
-            configuration: "--profile black"
+            configuration: "--check-only --diff --profile black"
             requirementsFiles: "requirements.txt requirements-test.txt"

From 4f9c09fb12e3cd645a5aa8c925c0106bcacdcdf4 Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 14:10:23 -0500
Subject: [PATCH 10/19] updated actions to run on PR; updated README.md

---
 .github/workflows/black.yml | 3 +--
 .github/workflows/isort.yml | 3 +--
 README.md                   | 5 ++++-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
index d8d7e0b..7c02822 100644
--- a/.github/workflows/black.yml
+++ b/.github/workflows/black.yml
@@ -1,6 +1,5 @@
 name: Lint
-
-on: [push, pull_request]
+on: pull_request
 
 jobs:
   lint:
diff --git a/.github/workflows/isort.yml b/.github/workflows/isort.yml
index 52f9cf5..db105a8 100644
--- a/.github/workflows/isort.yml
+++ b/.github/workflows/isort.yml
@@ -1,6 +1,5 @@
 name: Run isort
-on:
-  - push
+on: pull_request
 
 jobs:
   build:
diff --git a/README.md b/README.md
index 033f5b4..4bddaf1 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,10 @@ This repository contains plug-in tests for use during validation of submissions.
 ### Branches
 
 - Make new feature branches from `devel`.
+- Before submitting a PR, make sure your code is black and isort compliant. Run the following from the base `ingest-validation-tests` directory:
+  - `black --line-length 99 .` (if you choose not to integrate black with your editor (see Setup section)
+  - `isort .`
+
 - Make PRs to `devel`. (This is the default branch.)
 - The last reviewer to approve a PR should merge it.
 
@@ -22,7 +26,6 @@ source hm-ingest-validation-tests/bin/activate
 - Run `pip install -r requirements-dev.txt`
 - (optional) Integrate black with your editor.
   - [Instructions for black.](https://black.readthedocs.io/en/stable/integrations/editors.html)
-  - If you choose not to integrate black with your editor, run the following from the base `ingest-validation-tests` directory before pushing code to GitHub: `black --line-length 99 .`
 
 ### Testing
 

From a029b0061d251cba10393e0be7843e2a86fc9a00 Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 14:35:56 -0500
Subject: [PATCH 11/19] cleanup

---
 .flake8    |  3 ---
 .gitignore |  1 +
 README.md  | 48 +++++++++++++++++++++++++++---------------------
 3 files changed, 28 insertions(+), 24 deletions(-)
 delete mode 100644 .flake8

diff --git a/.flake8 b/.flake8
deleted file mode 100644
index ad98539..0000000
--- a/.flake8
+++ /dev/null
@@ -1,3 +0,0 @@
-[flake8]
-max-line-length = 100
-extend-ignore = E203
diff --git a/.gitignore b/.gitignore
index da9fde3..79cb0cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 .vscode/
+.flake8
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/README.md b/README.md
index 4bddaf1..b0f37ab 100644
--- a/README.md
+++ b/README.md
@@ -8,8 +8,13 @@ This repository contains plug-in tests for use during validation of submissions.
 
 - Make new feature branches from `devel`.
 - Before submitting a PR, make sure your code is black and isort compliant. Run the following from the base `ingest-validation-tests` directory:
-  - `black --line-length 99 .` (if you choose not to integrate black with your editor (see Setup section)
-  - `isort .`
+
+  ```
+  black --line-length 99 .
+  isort --profile black --multi-line 3 .
+  ```
+
+  (You can integrate black and potentially isort with your editor to skip this step, see Setup section below)
 
 - Make PRs to `devel`. (This is the default branch.)
 - The last reviewer to approve a PR should merge it.
@@ -18,33 +23,34 @@ This repository contains plug-in tests for use during validation of submissions.
 
 - Creating and activating a virtual environment is recommended. These instructions assume you are using a virtual environment. Example using venv:
 
-```
-python3.9 -m venv hm-ingest-validation-tests
-source hm-ingest-validation-tests/bin/activate
-```
+  ```
+  python3.9 -m venv hm-ingest-validation-tests
+  source hm-ingest-validation-tests/bin/activate
+  ```
 
 - Run `pip install -r requirements-dev.txt`
 - (optional) Integrate black with your editor.
   - [Instructions for black.](https://black.readthedocs.io/en/stable/integrations/editors.html)
+- (optional) Integrate [isort](https://pycqa.github.io/isort/) with your editor.
 
 ### Testing
 
 - If ingest-validation-tools is not already set up:
 
-```
-# Starting from ingest-validation-tests...
-cd ..
-git clone https://github.com/hubmapconsortium/ingest-validation-tools.git
-cd ingest-validation-tests
-pip install -r ../ingest-validation-tools/requirements.txt
-pip install -r ../ingest-validation-tools/requirements-dev.txt
-```
-
-- If ingest-validation-tools is already set up, add the appropriate ingest-validation-tools path and run
-
-```
-pip install -r <path-to-ingest-validation-tools>/requirements.txt
-pip install -r <path-to-ingest-validation-tools>/requirements-dev.txt
-```
+  ```
+  # Starting from ingest-validation-tests...
+  cd ..
+  git clone https://github.com/hubmapconsortium/ingest-validation-tools.git
+  cd ingest-validation-tests
+  pip install -r ../ingest-validation-tools/requirements.txt
+  pip install -r ../ingest-validation-tools/requirements-dev.txt
+  ```
+
+- If ingest-validation-tools is already set up, add the appropriate ingest-validation-tools path and run:
+
+  ```
+  pip install -r <path-to-ingest-validation-tools>/requirements.txt
+  pip install -r <path-to-ingest-validation-tools>/requirements-dev.txt
+  ```
 
 - Run `test.sh`

From 9604ce7a38b7a20ebfa37c93c95bffbdca0ed045 Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 14:42:24 -0500
Subject: [PATCH 12/19] cleanup

---
 .github/workflows/isort.yml | 4 ++--
 pyproject.toml              | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/isort.yml b/.github/workflows/isort.yml
index db105a8..2ef4557 100644
--- a/.github/workflows/isort.yml
+++ b/.github/workflows/isort.yml
@@ -11,5 +11,5 @@ jobs:
           python-version: 3.9
       - uses: isort/isort-action@master
         with:
-            configuration: "--check-only --diff --profile black"
-            requirementsFiles: "requirements.txt requirements-test.txt"
+          configuration: "--check-only --diff --profile black"
+          requirementsFiles: "requirements.txt requirements-test.txt"
diff --git a/pyproject.toml b/pyproject.toml
index 72dd8d9..93863fa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,4 +4,3 @@ line-length = 99
 [tool.isort]
 profile = "black"
 multi_line_output = 3
-src_paths = ["src/ingest-validation-tests"]

From e30f4263b8b9c44c98262fc2f8fce199b209b33c Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Thu, 1 Feb 2024 14:45:11 -0500
Subject: [PATCH 13/19] oops forgot to update test file

---
 tests/test_fastq_validator_logic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_fastq_validator_logic.py b/tests/test_fastq_validator_logic.py
index c554d6f..31bd68a 100644
--- a/tests/test_fastq_validator_logic.py
+++ b/tests/test_fastq_validator_logic.py
@@ -3,6 +3,7 @@
 from typing import TextIO
 
 import pytest
+
 from src.ingest_validation_tests.fastq_validator_logic import FASTQValidatorLogic
 
 _GOOD_RECORDS = """\

From 99858a1dfca86648c83b1f4b9241bd11d59c8b5d Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Fri, 2 Feb 2024 13:46:17 -0500
Subject: [PATCH 14/19] simplifying workflows yml file, adding flake8

---
 .flake8                       |  3 +++
 .github/workflows/black.yml   | 13 -------------
 .github/workflows/isort.yml   | 15 ---------------
 .github/workflows/linting.yml | 32 ++++++++++++++++++++++++++++++++
 .gitignore                    |  1 -
 README.md                     |  6 ++++--
 requirements-dev.txt          |  1 +
 7 files changed, 40 insertions(+), 31 deletions(-)
 create mode 100644 .flake8
 delete mode 100644 .github/workflows/black.yml
 delete mode 100644 .github/workflows/isort.yml
 create mode 100644 .github/workflows/linting.yml

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..70fc30a
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+extend-ignore = E203,E501,W503
+max-line-length = 99
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
deleted file mode 100644
index 7c02822..0000000
--- a/.github/workflows/black.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: Lint
-on: pull_request
-
-jobs:
-  lint:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: psf/black@stable
-        with:
-          options: "--check --verbose --line-length 99"
-          src: "./src"
-          version: "~= 24.0"
diff --git a/.github/workflows/isort.yml b/.github/workflows/isort.yml
deleted file mode 100644
index 2ef4557..0000000
--- a/.github/workflows/isort.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-name: Run isort
-on: pull_request
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: 3.9
-      - uses: isort/isort-action@master
-        with:
-          configuration: "--check-only --diff --profile black"
-          requirementsFiles: "requirements.txt requirements-test.txt"
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
new file mode 100644
index 0000000..f34ebe8
--- /dev/null
+++ b/.github/workflows/linting.yml
@@ -0,0 +1,32 @@
+name: Linters
+
+on:
+  push:
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10"]
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install flake8 black isort
+      - name: Flake8 Lint
+        run: |
+          flake8 --ignore=E501,W503,E203
+      - name: Black Lint
+        run: |
+          black --line-length 99 --check --verbose
+      - name: isort Lint
+        run: |
+          isort --profile black --check-only --diff
diff --git a/.gitignore b/.gitignore
index 79cb0cf..da9fde3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,4 @@
 .vscode/
-.flake8
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/README.md b/README.md
index b0f37ab..0cca7ba 100644
--- a/README.md
+++ b/README.md
@@ -7,14 +7,15 @@ This repository contains plug-in tests for use during validation of submissions.
 ### Branches
 
 - Make new feature branches from `devel`.
-- Before submitting a PR, make sure your code is black and isort compliant. Run the following from the base `ingest-validation-tests` directory:
+- Before submitting a PR, make sure your code is black, isort, and flake8 compliant. Run the following from the base `ingest-validation-tests` directory:
 
   ```
   black --line-length 99 .
   isort --profile black --multi-line 3 .
+  flake8
   ```
 
-  (You can integrate black and potentially isort with your editor to skip this step, see Setup section below)
+  (Integrating black and potentially isort/flake8 with your editor may allow you to skip this step, see Setup section below.)
 
 - Make PRs to `devel`. (This is the default branch.)
 - The last reviewer to approve a PR should merge it.
@@ -32,6 +33,7 @@ This repository contains plug-in tests for use during validation of submissions.
 - (optional) Integrate black with your editor.
   - [Instructions for black.](https://black.readthedocs.io/en/stable/integrations/editors.html)
 - (optional) Integrate [isort](https://pycqa.github.io/isort/) with your editor.
+- (optional) Integrate [flake8](https://flake8.pycqa.org/en/latest/index.html) with your editor.
 
 ### Testing
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 2cd9d49..ba6f71b 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,4 +1,5 @@
 black==23.12.1
+flake8==7.0.0
 git+https://github.com/hubmapconsortium/fastq-utils.git@v0.2.5#egg=hubmap-fastq-utils
 imagecodecs>=2023.3.16
 isort==5.13.2

From 67436d4e59399b25fce5d76d159edc85a42b9ca2 Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Fri, 2 Feb 2024 13:47:38 -0500
Subject: [PATCH 15/19] fixing mistake

---
 .github/workflows/linting.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index f34ebe8..b4fb9d4 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -23,10 +23,10 @@ jobs:
           pip install flake8 black isort
       - name: Flake8 Lint
         run: |
-          flake8 --ignore=E501,W503,E203
+          flake8 --ignore=E501,W503,E203 .
       - name: Black Lint
         run: |
-          black --line-length 99 --check --verbose
+          black --line-length 99 --check --verbose .
       - name: isort Lint
         run: |
-          isort --profile black --check-only --diff
+          isort --profile black --check-only --diff .

From ee0ebc71bcbd4d9ed1dab3726b12637aefe5cccd Mon Sep 17 00:00:00 2001
From: Gesina Phillips <gesina@psc.edu>
Date: Mon, 26 Feb 2024 16:15:30 -0500
Subject: [PATCH 16/19] fixing silly typo

---
 src/ingest_validation_tests/publication_validator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ingest_validation_tests/publication_validator.py b/src/ingest_validation_tests/publication_validator.py
index a68d549..a3d705a 100644
--- a/src/ingest_validation_tests/publication_validator.py
+++ b/src/ingest_validation_tests/publication_validator.py
@@ -21,7 +21,7 @@ class PublicationValidator(Validator):
     cost = 1.0
     base_url_re = r"(\s*\{\{\s*base_url\s*\}\})/(.*)"
     url_re = r"[Uu][Rr][Ll]"
-    required = "publications"
+    required = "publication"
 
     def collect_errors(self, **kwargs) -> List[str]:
         """

From 92d305faf7bc3f0de7589074e9d206dd689efc55 Mon Sep 17 00:00:00 2001
From: David Betancur <dbetancur@psc.edu>
Date: Mon, 15 Apr 2024 12:24:30 -0400
Subject: [PATCH 17/19] Adding exception handling for different errors on Gzip
 files. Adding exception tasks to properly close multiprocessing pools.

---
 src/ingest_validation_tests/fastq_validator_logic.py | 7 +++++++
 src/ingest_validation_tests/gz_validator.py          | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/src/ingest_validation_tests/fastq_validator_logic.py b/src/ingest_validation_tests/fastq_validator_logic.py
index 440ceb4..2c87c91 100644
--- a/src/ingest_validation_tests/fastq_validator_logic.py
+++ b/src/ingest_validation_tests/fastq_validator_logic.py
@@ -167,6 +167,10 @@ def validate_fastq_file(self, fastq_file: Path) -> None:
         except IOError:
             self.errors.append(self._format_error(f"Unable to open FASTQ data file {fastq_file}."))
             return
+        except EOFError:
+            self.errors.append(self._format_error(f"EOF in FASTQ data file {fastq_file}."))
+        except Exception as e:
+            self.errors.append(self._format_error(f"Unexpected error: {e} on data file {fastq_file}."))
         self._file_record_counts[str(fastq_file)] = records_read
 
     def validate_fastq_files_in_path(self, paths: List[Path], threads: int) -> None:
@@ -192,6 +196,9 @@ def validate_fastq_files_in_path(self, paths: List[Path], threads: int) -> None:
                 data_output = pool.imap_unordered(engine, file_list)
             except Exception as e:
                 _log(f"Error {e}")
+                pool.close()
+                pool.join()
+                data_found_one.append(f"Error {e}")
             else:
                 pool.close()
                 pool.join()
diff --git a/src/ingest_validation_tests/gz_validator.py b/src/ingest_validation_tests/gz_validator.py
index 9bb0162..fe0a147 100644
--- a/src/ingest_validation_tests/gz_validator.py
+++ b/src/ingest_validation_tests/gz_validator.py
@@ -46,6 +46,9 @@ def collect_errors(self, **kwargs) -> List[str]:
             data_output = pool.imap_unordered(engine, file_list)
         except Exception as e:
             _log(f"Error {e}")
+            pool.close()
+            pool.join()
+            data_output2.extend(f"Error: {e}")
         else:
             pool.close()
             pool.join()

From fadcf183dbfca18a56ed8dfe1400de3aa435b755 Mon Sep 17 00:00:00 2001
From: David Betancur <dbetancur@psc.edu>
Date: Mon, 15 Apr 2024 12:29:19 -0400
Subject: [PATCH 18/19] Updating long line.

---
 src/ingest_validation_tests/fastq_validator_logic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ingest_validation_tests/fastq_validator_logic.py b/src/ingest_validation_tests/fastq_validator_logic.py
index 2c87c91..b786fc3 100644
--- a/src/ingest_validation_tests/fastq_validator_logic.py
+++ b/src/ingest_validation_tests/fastq_validator_logic.py
@@ -222,7 +222,8 @@ def _find_duplicates(self, dirs_and_files):
                 if len(filepaths) > 1:
                     self.errors.append(
                         _log(
-                            f"{filename} has been found multiple times during this validation. Locations of duplicates: {filepaths}."  # noqa: E501
+                            f"{filename} has been found multiple times during this validation. "
+                            f"Locations of duplicates: {filepaths}."  # noqa: E501
                         )
                     )
 

From 30e25082c2b4dec13b86989cf62f1698c6327d85 Mon Sep 17 00:00:00 2001
From: David Betancur <dbetancur@psc.edu>
Date: Mon, 15 Apr 2024 12:30:50 -0400
Subject: [PATCH 19/19] Black linting

---
 src/ingest_validation_tests/fastq_validator_logic.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/ingest_validation_tests/fastq_validator_logic.py b/src/ingest_validation_tests/fastq_validator_logic.py
index b786fc3..7ae516f 100644
--- a/src/ingest_validation_tests/fastq_validator_logic.py
+++ b/src/ingest_validation_tests/fastq_validator_logic.py
@@ -170,7 +170,9 @@ def validate_fastq_file(self, fastq_file: Path) -> None:
         except EOFError:
             self.errors.append(self._format_error(f"EOF in FASTQ data file {fastq_file}."))
         except Exception as e:
-            self.errors.append(self._format_error(f"Unexpected error: {e} on data file {fastq_file}."))
+            self.errors.append(
+                self._format_error(f"Unexpected error: {e} on data file {fastq_file}.")
+            )
         self._file_record_counts[str(fastq_file)] = records_read
 
     def validate_fastq_files_in_path(self, paths: List[Path], threads: int) -> None: