Skip to content

Commit 9cb4917

Browse files
committed
Fix usage of mkdssp>=4
1 parent cd77d62 commit 9cb4917

File tree

4 files changed

+86
-18
lines changed

4 files changed

+86
-18
lines changed

.github/workflows/test_and_deploy.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,6 @@ jobs:
209209
miniforge-version: latest
210210
- name: Install distribution
211211
run: pip install ./dist/*.whl
212-
- name: "TEMP: Skip DSSP tests"
213-
# TEMP: Omit DSSP tests for now until conda-forge DSSP is functional
214-
# (https://github.com/conda-forge/dssp-feedstock/pull/4)
215-
run: mamba uninstall dssp
216212
- name: Run tests
217213
# Running NCBI BLAST and SRA takes too long
218214
# The tests on the NCBI Entrez database are not reliable enough

environment.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ channels:
77
- anaconda
88
- conda-forge
99
- bioconda
10-
- salilab
1110

1211
dependencies:
1312
- python =3.11
@@ -33,7 +32,7 @@ dependencies:
3332
# Interfaced software in biotite.application (can also be installed separately)
3433
- autodock-vina
3534
- clustalo
36-
- dssp =3
35+
- dssp =4
3736
- mafft
3837
- muscle =3
3938
- sra-tools =3

src/biotite/application/dssp/app.py

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,13 @@
1111
import numpy as np
1212
from biotite.application.application import AppState, requires_state
1313
from biotite.application.localapp import LocalApp, cleanup_tempfile, get_version
14-
from biotite.structure.io.pdbx.cif import CIFFile
14+
from biotite.structure.error import BadStructureError
15+
from biotite.structure.filter import filter_amino_acids
16+
from biotite.structure.io.pdbx.cif import CIFCategory, CIFColumn, CIFFile
17+
from biotite.structure.io.pdbx.component import MaskValue
1518
from biotite.structure.io.pdbx.convert import set_structure
19+
from biotite.structure.repair import create_continuous_res_ids
20+
from biotite.structure.residues import get_residue_starts
1621

1722

1823
class DsspApp(LocalApp):
@@ -49,17 +54,19 @@ class DsspApp(LocalApp):
4954
>>> app.start()
5055
>>> app.join()
5156
>>> print(app.get_sse())
52-
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'C' 'C'
53-
'C' 'C']
57+
['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'P' 'P'
58+
'P' 'C']
5459
"""
5560

5661
def __init__(self, atom_array, bin_path="mkdssp"):
5762
super().__init__(bin_path)
5863

59-
# mkdssp requires also the
60-
# 'occupancy', 'b_factor' and 'charge' fields
61-
# -> Add these annotations to a copy of the input structure
64+
if not np.all(filter_amino_acids(atom_array)):
65+
raise BadStructureError("The input structure must contain only amino acids")
6266
self._array = atom_array.copy()
67+
# DSSP requires also the
68+
# 'occupancy', 'b_factor' and 'charge' fields
69+
# -> Add these placeholder values
6370
categories = self._array.get_annotation_categories()
6471
if "charge" not in categories:
6572
self._array.set_annotation(
@@ -73,6 +80,10 @@ def __init__(self, atom_array, bin_path="mkdssp"):
7380
self._array.set_annotation(
7481
"occupancy", np.ones(self._array.array_length(), dtype=float)
7582
)
83+
# DSSP>=4 complains about the `pdbx_poly_seq_scheme` category,
84+
# if `seq_id` does not start at 1
85+
self._array.res_id = create_continuous_res_ids(self._array)
86+
7687
try:
7788
# The parameters have changed in version 4
7889
self._new_cli = get_version(bin_path)[0] >= 4
@@ -86,6 +97,9 @@ def __init__(self, atom_array, bin_path="mkdssp"):
8697
def run(self):
8798
in_file = CIFFile()
8899
set_structure(in_file, self._array)
100+
in_file.block["pdbx_poly_seq_scheme"] = _create_pdbx_poly_seq_scheme(
101+
self._array, in_file.block["atom_site"]["label_entity_id"].as_array(str)
102+
)
89103
in_file.write(self._in_file)
90104
self._in_file.flush()
91105
if self._new_cli:
@@ -157,3 +171,46 @@ def annotate_sse(atom_array, bin_path="mkdssp"):
157171
app.start()
158172
app.join()
159173
return app.get_sse()
174+
175+
176+
def _create_pdbx_poly_seq_scheme(atom_array, entity_ids):
177+
"""
178+
Create the ``pdbx_poly_seq_scheme`` category, as required by DSSP.
179+
180+
Parameters
181+
----------
182+
atom_array : AtomArray
183+
The atom array to create the category from.
184+
entity_ids : ndarray, dtype=str
185+
The entity IDs for each atoms.
186+
187+
Returns
188+
-------
189+
pdbx_poly_seq_scheme : CIFCategory
190+
The ``pdbx_poly_seq_scheme`` category.
191+
"""
192+
res_start_indices = get_residue_starts(atom_array)
193+
chain_id = atom_array.chain_id[res_start_indices]
194+
res_name = atom_array.res_name[res_start_indices]
195+
res_id = atom_array.res_id[res_start_indices]
196+
ins_code = atom_array.ins_code[res_start_indices]
197+
hetero = atom_array.hetero[res_start_indices]
198+
entity_id = entity_ids[res_start_indices]
199+
200+
poly_seq_scheme = CIFCategory()
201+
poly_seq_scheme["asym_id"] = chain_id
202+
poly_seq_scheme["entity_id"] = entity_id
203+
poly_seq_scheme["seq_id"] = res_id
204+
poly_seq_scheme["mon_id"] = res_name
205+
poly_seq_scheme["ndb_seq_num"] = res_id
206+
poly_seq_scheme["pdb_seq_num"] = res_id
207+
poly_seq_scheme["auth_seq_num"] = res_id
208+
poly_seq_scheme["pdb_mon_id"] = res_name
209+
poly_seq_scheme["auth_mon_id"] = res_name
210+
poly_seq_scheme["pdb_strand_id"] = chain_id
211+
poly_seq_scheme["pdb_ins_code"] = CIFColumn(
212+
ins_code, np.where(ins_code == "", MaskValue.MISSING, MaskValue.PRESENT)
213+
)
214+
poly_seq_scheme["hetero"] = np.where(hetero, "y", "n")
215+
216+
return poly_seq_scheme

tests/application/test_dssp.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
# information.
44

55
from os.path import join
6-
from subprocess import SubprocessError
76
import numpy as np
87
import pytest
98
import biotite.structure as struc
@@ -13,21 +12,38 @@
1312
from tests.util import data_dir, is_not_installed
1413

1514

15+
@pytest.mark.parametrize(
16+
"pdb_id",
17+
[
18+
"1aki", # Single chain
19+
"1igy", # Multiple chains
20+
"5eil", # Contains non-canonical amino acid
21+
],
22+
)
1623
@pytest.mark.skipif(is_not_installed("mkdssp"), reason="DSSP is not installed")
17-
def test_multiple_chains():
24+
def test_annotation(pdb_id):
25+
"""
26+
Check if the the DSSP annotation has the correct length and reasonable values.
27+
"""
1828
atoms = pdbx.get_structure(
19-
pdbx.BinaryCIFFile.read(join(data_dir("structure"), "1igy.bcif")), model=1
29+
pdbx.BinaryCIFFile.read(join(data_dir("structure"), f"{pdb_id}.bcif")), model=1
2030
)
21-
atoms = atoms[struc.filter_canonical_amino_acids(atoms)]
31+
atoms = atoms[struc.filter_amino_acids(atoms)]
2232
sse = DsspApp.annotate_sse(atoms)
23-
assert np.all(np.isin(sse, ["C", "H", "B", "E", "G", "I", "T", "S"]))
33+
34+
assert np.all(np.isin(sse, ["C", "H", "B", "E", "G", "I", "T", "S", "P"]))
35+
# One SSE per residue
2436
assert len(sse) == struc.get_residue_count(atoms)
2537

2638

2739
@pytest.mark.skipif(is_not_installed("mkdssp"), reason="DSSP is not installed")
2840
def test_invalid_structure():
41+
"""
42+
Check if an exception is raised, if the input structure contains non-amino-acid
43+
residues.
44+
"""
2945
array = strucio.load_structure(join(data_dir("structure"), "5ugo.bcif"))
3046
# Get DNA chain -> Invalid for DSSP
3147
chain = array[array.chain_id == "T"]
32-
with pytest.raises(SubprocessError):
48+
with pytest.raises(struc.BadStructureError):
3349
DsspApp.annotate_sse(chain)

0 commit comments

Comments
 (0)