qiita-spots · AmandaBirmingham · Jul 7, 2025 · Jul 8, 2025 · Jul 8, 2025 · Jul 8, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -50,6 +50,7 @@ dependencies = [
     'jinja2',
     'numpy',
     'cython',
+    'pyyaml',
     "qiita-files@https://github.com/qiita-spots/qiita-files/archive/master.zip",
     "qiita_client@https://github.com/qiita-spots/qiita_client/archive/master.zip",
     'metapool@https://github.com/biocore/kl-metapool/archive/master.zip',

diff --git a/src/sequence_processing_pipeline/Pipeline.py b/src/sequence_processing_pipeline/Pipeline.py
@@ -6,12 +6,13 @@
 from metapool import (load_sample_sheet, AmpliconSampleSheet, is_blank,
                       parse_project_name, SAMPLE_NAME_KEY, QIITA_ID_KEY,
                       PROJECT_SHORT_NAME_KEY, PROJECT_FULL_NAME_KEY,
-                      CONTAINS_REPLICATES_KEY)
+                      CONTAINS_REPLICATES_KEY, get_model_by_instrument_id,
+                      PROFILE_NAME_KEY)
 from metapool.plate import ErrorMessage, WarningMessage
 from sequence_processing_pipeline.Job import Job
 from sequence_processing_pipeline.PipelineError import PipelineError
 import logging
-from re import findall, search, match
+from re import findall, search
 import sample_sheet
 import pandas as pd
 from collections import defaultdict
@@ -26,14 +27,8 @@
 
 
 class InstrumentUtils():
-    types = {'A': 'NovaSeq 6000', 'D': 'HiSeq 2500', 'FS': 'iSeq',
-             'K': 'HiSeq 4000', 'LH': 'NovaSeq X Plus', 'M': 'MiSeq',
-             'MN': 'MiniSeq',
-             # SN – RapidRun which is HiSeq 2500
-             'SN': 'RapidRun'}
-
     @staticmethod
-    def get_instrument_id(run_directory):
+    def _get_instrument_id(run_directory):
         run_info = join(run_directory, 'RunInfo.xml')
 
         if not exists(run_info):
@@ -46,24 +41,12 @@ def get_instrument_id(run_directory):
 
     @staticmethod
     def get_instrument_type(run_directory):
-        # extract all letters at the beginning of the string, stopping
-        # at the first digit.
-        code = match(r"^(.*?)\d.*",
-                     InstrumentUtils.get_instrument_id(run_directory))
-
-        if code is None:
-            raise ValueError("Could not determine instrument code")
-        else:
-            code = code.group(1)
-
-        # map instrument code to a name string and return it, if possible.
-        try:
-            return InstrumentUtils.types[code]
-        except KeyError:
-            raise ValueError(f"Instrument code '{code}' is of unknown type")
+        instrument_id = InstrumentUtils._get_instrument_id(run_directory)
+        return get_model_by_instrument_id(
+            instrument_id, model_key=PROFILE_NAME_KEY)
 
     @staticmethod
-    def get_date(run_directory):
+    def _get_date(run_directory):
         run_info = join(run_directory, 'RunInfo.xml')
 
         if not exists(run_info):

diff --git a/tests/test_NuQCJob.py b/tests/test_NuQCJob.py
@@ -944,10 +944,7 @@ def test_nuqcjob_creation(self):
             sheet.write(f)
 
         with self.assertRaisesRegex(
-            ValueError,
-            "tmp-sample-sheet.csv' does "
-            "not appear to be a valid "
-            "sample-sheet.",
+            ValueError, "'NotMetagenomic' is an unrecognized Assay type",
         ):
             NuQCJob(
                 self.fastq_root_path,
@@ -1017,10 +1014,7 @@ def test_error_msg_from_logs(self):
 
     def test_assay_value(self):
         with self.assertRaisesRegex(
-            ValueError,
-            "bad-sample-sheet-metagenomics"
-            ".csv' does not appear to be a"
-            " valid sample-sheet.",
+            ValueError, "'Metagenomics' is an unrecognized Assay type",
         ):
             NuQCJob(
                 self.fastq_root_path,

diff --git a/tests/test_Pipeline.py b/tests/test_Pipeline.py
@@ -258,9 +258,8 @@ def test_creation(self):
                          f"{self.path()}/bad_configuration.json")
 
         # Pipeline should assert due to Assay having a bad value.
-        with self.assertRaisesRegex(ValueError, "bad-sample-sheet-metagenomics"
-                                                ".csv' does not appear to be a"
-                                                " valid sample-sheet."):
+        with self.assertRaisesRegex(
+                ValueError, "'Metagenomics' is an unrecognized Assay type"):
             Pipeline(self.good_config_file,
                      self.good_run_id,
                      self.bad_assay_type_path,
@@ -2401,18 +2400,15 @@ def test_instrument_utils(self):
                                                         'type': 'iSeq',
                                                         'date': '2022-09-12'},
                '231215_LH00444_0031_B222WHFLT4': {'id': 'LH00444',
-                                                  'type': 'NovaSeq X Plus',
+                                                  'type': 'NovaSeq X',
                                                   'date': '2023-12-16'},
                '190809_D00611_0709_AH3CKJBCX3_RKL0040_StudyB_36-39_2': {
                    'id': 'D00611',
                    'type': 'HiSeq 2500',
                    'date': '2019-08-09'},
                '231215_A01535_0435_BH23F5DSXC': {'id': 'A01535',
                                                  'type': 'NovaSeq 6000',
-                                                 'date': '2023-12-15'},
-               '150629_SN1001_0511_AH5L7GBCXX': {'id': 'SN1001',
-                                                 'type': 'RapidRun',
-                                                 'date': '2015-06-29'}}
+                                                 'date': '2023-12-15'}}
 
         run_directories = []
         for root, dirs, files in walk(self.path('sample_run_directories')):
@@ -2423,11 +2419,9 @@ def test_instrument_utils(self):
             break
 
         for run_id, run_dir in run_directories:
-            self.assertEqual(iutils.get_instrument_id(run_dir),
-                             exp[run_id]['id'])
             self.assertEqual(iutils.get_instrument_type(run_dir),
                              exp[run_id]['type'])
-            self.assertEqual(iutils.get_date(run_dir),
+            self.assertEqual(iutils._get_date(run_dir),
                              exp[run_id]['date'])
 
 

diff --git a/tests/test_WorkflowFactory.py b/tests/test_WorkflowFactory.py
@@ -84,8 +84,7 @@ def test_invalid_sample_sheets(self):
         kwargs = {"uif_path": "tests/data/sample-sheets/metagenomic/"
                   "illumina/bad_sheet1.csv"}
 
-        msg = ("'tests/data/sample-sheets/metagenomic/illumina/"
-               "bad_sheet1.csv' does not appear to be a valid sample-sheet.")
+        msg = "'not_a_metag' is an unrecognized SheetType"
 
         with self.assertRaisesRegex(ValueError, msg):
             WorkflowFactory.generate_workflow(**kwargs)
@@ -95,8 +94,7 @@ def test_invalid_sample_sheets(self):
         kwargs = {"uif_path": "tests/data/sample-sheets/metagenomic/"
                   "illumina/bad_sheet2.csv"}
 
-        msg = ("'tests/data/sample-sheets/metagenomic/illumina/"
-               "bad_sheet2.csv' does not appear to be a valid sample-sheet.")
+        msg = "'NotMetagenomic' is an unrecognized Assay type"
 
         with self.assertRaisesRegex(ValueError, msg):
             WorkflowFactory.generate_workflow(**kwargs)