Skip to content

Commit 767bb8a

Browse files
authored
make TMT plex inference testable and add tests
2 parents 572abe2 + ba46736 commit 767bb8a

File tree

2 files changed

+128
-132
lines changed

2 files changed

+128
-132
lines changed

sdrf_pipelines/openms/openms.py

Lines changed: 112 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,28 @@ class FileToColumnEntries:
3535
file2technical_rep: dict[str, str] = {}
3636

3737

38+
def infer_tmtplex(label_set: set) -> str:
39+
"""Infer the tmt plex from a set of labels"""
40+
if len(label_set) > 16 or "TMT134C" in label_set or "TMT135N" in label_set:
41+
label = "tmt18plex"
42+
elif (
43+
len(label_set) > 11
44+
or "TMT134N" in label_set
45+
or "TMT133C" in label_set
46+
or "TMT133N" in label_set
47+
or "TMT132C" in label_set
48+
or "TMT132N" in label_set
49+
):
50+
label = "tmt16plex"
51+
elif len(label_set) == 11 or "TMT131C" in label_set:
52+
label = "tmt11plex"
53+
elif len(label_set) > 6:
54+
label = "tmt10plex"
55+
else:
56+
label = "tmt6plex"
57+
return label
58+
59+
3860
def get_openms_file_name(raw, extension_convert: str | None = None):
3961
"""
4062
Convert file name for OpenMS. If extension_convert is set, the extension will be converted to the specified format.
@@ -92,82 +114,86 @@ def parse_tolerance(pc_tol_str: str, units=("ppm", "da")) -> tuple[str | None, s
92114
return None, None
93115

94116

117+
TMT_PLEXES = {
118+
"tmt18plex": {
119+
"TMT126": 1,
120+
"TMT127N": 2,
121+
"TMT127C": 3,
122+
"TMT128N": 4,
123+
"TMT128C": 5,
124+
"TMT129N": 6,
125+
"TMT129C": 7,
126+
"TMT130N": 8,
127+
"TMT130C": 9,
128+
"TMT131N": 10,
129+
"TMT131C": 11,
130+
"TMT132N": 12,
131+
"TMT132C": 13,
132+
"TMT133N": 14,
133+
"TMT133C": 15,
134+
"TMT134N": 16,
135+
"TMT134C": 17,
136+
"TMT135N": 18,
137+
},
138+
"tmt16plex": {
139+
"TMT126": 1,
140+
"TMT127N": 2,
141+
"TMT127C": 3,
142+
"TMT128N": 4,
143+
"TMT128C": 5,
144+
"TMT129N": 6,
145+
"TMT129C": 7,
146+
"TMT130N": 8,
147+
"TMT130C": 9,
148+
"TMT131N": 10,
149+
"TMT131C": 11,
150+
"TMT132N": 12,
151+
"TMT132C": 13,
152+
"TMT133N": 14,
153+
"TMT133C": 15,
154+
"TMT134N": 16,
155+
},
156+
"tmt11plex": {
157+
"TMT126": 1,
158+
"TMT127N": 2,
159+
"TMT127C": 3,
160+
"TMT128N": 4,
161+
"TMT128C": 5,
162+
"TMT129N": 6,
163+
"TMT129C": 7,
164+
"TMT130N": 8,
165+
"TMT130C": 9,
166+
"TMT131N": 10,
167+
"TMT131C": 11,
168+
},
169+
"tmt10plex": {
170+
"TMT126": 1,
171+
"TMT127N": 2,
172+
"TMT127C": 3,
173+
"TMT128N": 4,
174+
"TMT128C": 5,
175+
"TMT129N": 6,
176+
"TMT129C": 7,
177+
"TMT130N": 8,
178+
"TMT130C": 9,
179+
"TMT131": 10,
180+
},
181+
"tmt6plex": {
182+
"TMT126": 1,
183+
"TMT127": 2,
184+
"TMT128": 3,
185+
"TMT129": 4,
186+
"TMT130": 5,
187+
"TMT131": 6,
188+
},
189+
}
190+
191+
95192
class OpenMS:
96193
def __init__(self) -> None:
97194
super().__init__()
98195
self.warnings: dict[str, int] = {}
99196
self._unimod_database = UnimodDatabase()
100-
self.tmt18plex = {
101-
"TMT126": 1,
102-
"TMT127N": 2,
103-
"TMT127C": 3,
104-
"TMT128N": 4,
105-
"TMT128C": 5,
106-
"TMT129N": 6,
107-
"TMT129C": 7,
108-
"TMT130N": 8,
109-
"TMT130C": 9,
110-
"TMT131N": 10,
111-
"TMT131C": 11,
112-
"TMT132N": 12,
113-
"TMT132C": 13,
114-
"TMT133N": 14,
115-
"TMT133C": 15,
116-
"TMT134N": 16,
117-
"TMT134C": 17,
118-
"TMT135N": 18,
119-
}
120-
self.tmt16plex = {
121-
"TMT126": 1,
122-
"TMT127N": 2,
123-
"TMT127C": 3,
124-
"TMT128N": 4,
125-
"TMT128C": 5,
126-
"TMT129N": 6,
127-
"TMT129C": 7,
128-
"TMT130N": 8,
129-
"TMT130C": 9,
130-
"TMT131N": 10,
131-
"TMT131C": 11,
132-
"TMT132N": 12,
133-
"TMT132C": 13,
134-
"TMT133N": 14,
135-
"TMT133C": 15,
136-
"TMT134N": 16,
137-
}
138-
self.tmt11plex = {
139-
"TMT126": 1,
140-
"TMT127N": 2,
141-
"TMT127C": 3,
142-
"TMT128N": 4,
143-
"TMT128C": 5,
144-
"TMT129N": 6,
145-
"TMT129C": 7,
146-
"TMT130N": 8,
147-
"TMT130C": 9,
148-
"TMT131N": 10,
149-
"TMT131C": 11,
150-
}
151-
self.tmt10plex = {
152-
"TMT126": 1,
153-
"TMT127N": 2,
154-
"TMT127C": 3,
155-
"TMT128N": 4,
156-
"TMT128C": 5,
157-
"TMT129N": 6,
158-
"TMT129C": 7,
159-
"TMT130N": 8,
160-
"TMT130C": 9,
161-
"TMT131": 10,
162-
}
163-
self.tmt6plex = {
164-
"TMT126": 1,
165-
"TMT127": 2,
166-
"TMT128": 3,
167-
"TMT129": 4,
168-
"TMT130": 5,
169-
"TMT131": 6,
170-
}
171197
# Hardcode enzymes from OpenMS
172198
self.enzymes = {
173199
"Glutamyl endopeptidase": "glutamyl endopeptidase",
@@ -655,35 +681,20 @@ def writeTwoTableExperimentalDesign(
655681
sample_id += 1
656682

657683
labels = file2label[raw]
684+
labels_str = ",".join(labels)
658685
label_set = set(labels)
659686
if "label free sample" in labels:
660687
label = "1"
661-
elif "TMT" in ",".join(file2label[raw]):
662-
if len(label_set) > 16 or "TMT134C" in label_set or "TMT135N" in label_set:
663-
choice = self.tmt18plex
664-
elif (
665-
len(label_set) > 11
666-
or "TMT134N" in label_set
667-
or "TMT133C" in label_set
668-
or "TMT133N" in label_set
669-
or "TMT132C" in label_set
670-
or "TMT132N" in label_set
671-
):
672-
choice = self.tmt16plex
673-
elif len(label_set) == 11 or "TMT131C" in label_set:
674-
choice = self.tmt11plex
675-
elif len(label_set) > 6:
676-
choice = self.tmt10plex
677-
else:
678-
choice = self.tmt6plex
688+
elif "TMT" in labels_str:
689+
choice = TMT_PLEXES[infer_tmtplex(label_set)]
679690
label = str(choice[labels[label_index[raw]]])
680691
label_index[raw] = label_index[raw] + 1
681-
elif "SILAC" in ",".join(file2label[raw]):
692+
elif "SILAC" in labels_str:
682693
if len(label_set) == 3:
683694
label = str(self.silac3[labels[label_index[raw]].lower()])
684695
else:
685696
label = str(self.silac2[labels[label_index[raw]].lower()])
686-
elif "ITRAQ" in ",".join(file2label[raw]):
697+
elif "ITRAQ" in labels_str:
687698
if (
688699
len(label_set) > 4
689700
or "ITRAQ113" in label_set
@@ -904,39 +915,24 @@ def writeOneTableExperimentalDesign(
904915

905916
# convert sdrf's label to openms's label
906917
labels = file2label[raw]
918+
labels_str = ",".join(labels)
907919
label_set = set(labels)
908920
if "label free sample" in labels:
909921
label = "1"
910922

911-
elif "TMT" in ",".join(file2label[raw]):
912-
if len(label_set) > 16 or "TMT134C" in label_set or "TMT135N" in label_set:
913-
choice = self.tmt18plex
914-
elif (
915-
len(label_set) > 11
916-
or "TMT134N" in label_set
917-
or "TMT133C" in label_set
918-
or "TMT133N" in label_set
919-
or "TMT132C" in label_set
920-
or "TMT132N" in label_set
921-
):
922-
choice = self.tmt16plex
923-
elif len(label_set) == 11 or "TMT131C" in label_set:
924-
choice = self.tmt11plex
925-
elif len(label_set) > 6:
926-
choice = self.tmt10plex
927-
else:
928-
choice = self.tmt6plex
923+
elif "TMT" in labels_str:
924+
choice = TMT_PLEXES[infer_tmtplex(label_set)]
929925
label = str(choice[labels[label_index[raw]]])
930926

931927
# This can be avoided the dicts are built based on file&label as key.
932928
label_index[raw] = label_index[raw] + 1
933-
elif "SILAC" in ",".join(file2label[raw]):
929+
elif "SILAC" in labels_str:
934930
if len(label_set) == 3:
935931
label = str(self.silac3[labels[label_index[raw]].lower()])
936932
else:
937933
label = str(self.silac2[labels[label_index[raw]].lower()])
938934
label_index[raw] = label_index[raw] + 1
939-
elif "ITRAQ" in ",".join(file2label[raw]):
935+
elif "ITRAQ" in labels_str:
940936
if (
941937
len(label_set) > 4
942938
or "ITRAQ113" in label_set
@@ -1057,25 +1053,10 @@ def save_search_settings_to_file(self, output_filename, sdrf, f2c):
10571053
continue
10581054
raws.append(raw)
10591055
labels = f2c.file2label[raw]
1056+
labels_str = ",".join(labels)
10601057
label_set = set(labels)
1061-
if "TMT" in ",".join(labels):
1062-
if len(label_set) > 16 or "TMT134C" in label_set or "TMT135N" in label_set:
1063-
label = "tmt18plex"
1064-
elif (
1065-
len(label_set) > 11
1066-
or "TMT134N" in label_set
1067-
or "TMT133C" in label_set
1068-
or "TMT133N" in label_set
1069-
or "TMT132C" in label_set
1070-
or "TMT132N" in label_set
1071-
):
1072-
label = "tmt16plex"
1073-
elif len(label_set) == 11 or "TMT131C" in label_set:
1074-
label = "tmt11plex"
1075-
elif len(label_set) > 6:
1076-
label = "tmt10plex"
1077-
else:
1078-
label = "tmt6plex"
1058+
if "TMT" in labels_str:
1059+
label = infer_tmtplex(label_set)
10791060
# add default TMT modification when sdrf with label not contains TMT modification
10801061
if "TMT" not in f2c.file2mods[raw][0] and "TMT" not in f2c.file2mods[raw][1]:
10811062
warning_message = (
@@ -1091,9 +1072,9 @@ def save_search_settings_to_file(self, output_filename, sdrf, f2c):
10911072
f2c.file2mods[raw] = (f2c.file2mods[raw][0], ",".join(tmt_var_mod))
10921073
elif "label free sample" in label_set:
10931074
label = "label free sample"
1094-
elif "silac" in ",".join(label_set):
1075+
elif "silac" in labels_str:
10951076
label = "SILAC"
1096-
elif "ITRAQ" in ",".join(label_set):
1077+
elif "ITRAQ" in labels_str:
10971078
if (
10981079
len(label_set) > 4
10991080
or "ITRAQ113" in label_set

tests/test_openms.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import pytest
22

3-
from sdrf_pipelines.openms.openms import get_openms_file_name, parse_tolerance
3+
from sdrf_pipelines.openms.openms import (
4+
TMT_PLEXES,
5+
get_openms_file_name,
6+
infer_tmtplex,
7+
parse_tolerance,
8+
)
49

510
test_functions = [
611
("file.raw", "file.mzML", "raw:mzML"),
@@ -37,3 +42,13 @@ def test_get_openms_file_name(input_file, expected_file, extension):
3742
@pytest.mark.parametrize("input_str,expected_tol,expected_unit", test_tol_string)
3843
def test_parse_tolerence(input_str, expected_tol, expected_unit):
3944
assert parse_tolerance(input_str) == (expected_tol, expected_unit)
45+
46+
47+
@pytest.mark.parametrize("plex_name", TMT_PLEXES)
48+
def test_tmt_label_inference_full_plexes(plex_name):
49+
assert plex_name == infer_tmtplex(TMT_PLEXES[plex_name])
50+
51+
52+
@pytest.mark.parametrize("plex_name", TMT_PLEXES)
53+
def test_tmt_label_inference_from_incomplete_plexes(plex_name):
54+
assert plex_name == infer_tmtplex(TMT_PLEXES[plex_name])

0 commit comments

Comments
 (0)