Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ dependencies = [
'jinja2',
'numpy',
'cython',
'pyyaml',
"qiita-files@https://github.com/qiita-spots/qiita-files/archive/master.zip",
"qiita_client@https://github.com/qiita-spots/qiita_client/archive/master.zip",
'metapool@https://github.com/biocore/kl-metapool/archive/master.zip',
Expand Down
33 changes: 8 additions & 25 deletions src/sequence_processing_pipeline/Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
from metapool import (load_sample_sheet, AmpliconSampleSheet, is_blank,
parse_project_name, SAMPLE_NAME_KEY, QIITA_ID_KEY,
PROJECT_SHORT_NAME_KEY, PROJECT_FULL_NAME_KEY,
CONTAINS_REPLICATES_KEY)
CONTAINS_REPLICATES_KEY, get_model_by_instrument_id,
PROFILE_NAME_KEY)
from metapool.plate import ErrorMessage, WarningMessage
from sequence_processing_pipeline.Job import Job
from sequence_processing_pipeline.PipelineError import PipelineError
import logging
from re import findall, search, match
from re import findall, search
import sample_sheet
import pandas as pd
from collections import defaultdict
Expand All @@ -26,14 +27,8 @@


class InstrumentUtils():
types = {'A': 'NovaSeq 6000', 'D': 'HiSeq 2500', 'FS': 'iSeq',
'K': 'HiSeq 4000', 'LH': 'NovaSeq X Plus', 'M': 'MiSeq',
'MN': 'MiniSeq',
# SN – RapidRun which is HiSeq 2500
'SN': 'RapidRun'}

@staticmethod
def get_instrument_id(run_directory):
def _get_instrument_id(run_directory):
run_info = join(run_directory, 'RunInfo.xml')

if not exists(run_info):
Expand All @@ -46,24 +41,12 @@ def get_instrument_id(run_directory):

@staticmethod
def get_instrument_type(run_directory):
# extract all letters at the beginning of the string, stopping
# at the first digit.
code = match(r"^(.*?)\d.*",
InstrumentUtils.get_instrument_id(run_directory))

if code is None:
raise ValueError("Could not determine instrument code")
else:
code = code.group(1)

# map instrument code to a name string and return it, if possible.
try:
return InstrumentUtils.types[code]
except KeyError:
raise ValueError(f"Instrument code '{code}' is of unknown type")
instrument_id = InstrumentUtils._get_instrument_id(run_directory)
return get_model_by_instrument_id(
instrument_id, model_key=PROFILE_NAME_KEY)

@staticmethod
def get_date(run_directory):
def _get_date(run_directory):
run_info = join(run_directory, 'RunInfo.xml')

if not exists(run_info):
Expand Down
10 changes: 2 additions & 8 deletions tests/test_NuQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,10 +944,7 @@ def test_nuqcjob_creation(self):
sheet.write(f)

with self.assertRaisesRegex(
ValueError,
"tmp-sample-sheet.csv' does "
"not appear to be a valid "
"sample-sheet.",
ValueError, "'NotMetagenomic' is an unrecognized Assay type",
):
NuQCJob(
self.fastq_root_path,
Expand Down Expand Up @@ -1017,10 +1014,7 @@ def test_error_msg_from_logs(self):

def test_assay_value(self):
with self.assertRaisesRegex(
ValueError,
"bad-sample-sheet-metagenomics"
".csv' does not appear to be a"
" valid sample-sheet.",
ValueError, "'Metagenomics' is an unrecognized Assay type",
):
NuQCJob(
self.fastq_root_path,
Expand Down
16 changes: 5 additions & 11 deletions tests/test_Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,9 +258,8 @@ def test_creation(self):
f"{self.path()}/bad_configuration.json")

# Pipeline should assert due to Assay having a bad value.
with self.assertRaisesRegex(ValueError, "bad-sample-sheet-metagenomics"
".csv' does not appear to be a"
" valid sample-sheet."):
with self.assertRaisesRegex(
ValueError, "'Metagenomics' is an unrecognized Assay type"):
Pipeline(self.good_config_file,
self.good_run_id,
self.bad_assay_type_path,
Expand Down Expand Up @@ -2401,18 +2400,15 @@ def test_instrument_utils(self):
'type': 'iSeq',
'date': '2022-09-12'},
'231215_LH00444_0031_B222WHFLT4': {'id': 'LH00444',
'type': 'NovaSeq X Plus',
'type': 'NovaSeq X',
'date': '2023-12-16'},
'190809_D00611_0709_AH3CKJBCX3_RKL0040_StudyB_36-39_2': {
'id': 'D00611',
'type': 'HiSeq 2500',
'date': '2019-08-09'},
'231215_A01535_0435_BH23F5DSXC': {'id': 'A01535',
'type': 'NovaSeq 6000',
'date': '2023-12-15'},
'150629_SN1001_0511_AH5L7GBCXX': {'id': 'SN1001',
'type': 'RapidRun',
'date': '2015-06-29'}}
'date': '2023-12-15'}}

run_directories = []
for root, dirs, files in walk(self.path('sample_run_directories')):
Expand All @@ -2423,11 +2419,9 @@ def test_instrument_utils(self):
break

for run_id, run_dir in run_directories:
self.assertEqual(iutils.get_instrument_id(run_dir),
exp[run_id]['id'])
self.assertEqual(iutils.get_instrument_type(run_dir),
exp[run_id]['type'])
self.assertEqual(iutils.get_date(run_dir),
self.assertEqual(iutils._get_date(run_dir),
exp[run_id]['date'])


Expand Down
6 changes: 2 additions & 4 deletions tests/test_WorkflowFactory.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,7 @@ def test_invalid_sample_sheets(self):
kwargs = {"uif_path": "tests/data/sample-sheets/metagenomic/"
"illumina/bad_sheet1.csv"}

msg = ("'tests/data/sample-sheets/metagenomic/illumina/"
"bad_sheet1.csv' does not appear to be a valid sample-sheet.")
msg = "'not_a_metag' is an unrecognized SheetType"

with self.assertRaisesRegex(ValueError, msg):
WorkflowFactory.generate_workflow(**kwargs)
Expand All @@ -95,8 +94,7 @@ def test_invalid_sample_sheets(self):
kwargs = {"uif_path": "tests/data/sample-sheets/metagenomic/"
"illumina/bad_sheet2.csv"}

msg = ("'tests/data/sample-sheets/metagenomic/illumina/"
"bad_sheet2.csv' does not appear to be a valid sample-sheet.")
msg = "'NotMetagenomic' is an unrecognized Assay type"

with self.assertRaisesRegex(ValueError, msg):
WorkflowFactory.generate_workflow(**kwargs)
Expand Down
Loading