-
Notifications
You must be signed in to change notification settings - Fork 103
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add 3Di sequences for existing test structure
- Loading branch information
Showing
7 changed files
with
302 additions
and
12,662 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
Structural alphabet sequences | ||
============================== | ||
|
||
This directory contains structural alphabet sequences for the test structure files | ||
from the `tests/structure/data/` directory, generated with the respective reference | ||
implementation. | ||
|
||
3Di sequences | ||
------------- | ||
|
||
The 3Di sequences in `i3d.fasta` were generated with `foldseek`: | ||
|
||
.. code-block:: console | ||
$ foldseek createdb --chain-name-mode 1 tests/structure/data/*.cif /tmp/biotite_3di | ||
$ foldseek convert2fasta /tmp/biotite_3di tests/structure/data/alphabet/i3d.fasta |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,4 @@ | |
5eil | ||
4p5j | ||
1crr | ||
7gsa | ||
8crb | ||
3bww | ||
7gsa |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import re | ||
from pathlib import Path | ||
import pytest | ||
import biotite.structure as struc | ||
import biotite.structure.io.pdbx as pdbx | ||
import biotite.sequence.io.fasta as fasta | ||
import biotite.structure.alphabet as strucalph | ||
import biotite.structure.io.pdb as pdb | ||
from tests.util import data_dir | ||
|
||
|
||
def _get_ref_3di_sequence(pdb_id, chain_id): | ||
""" | ||
Get the reference 3di sequence for the first model of the structure with the given | ||
PDB ID and chain ID. | ||
""" | ||
ref_3di_file = fasta.FastaFile.read( | ||
Path(data_dir("structure")) / "alphabet" / "i3d.fasta" | ||
) | ||
for header, seq_string in ref_3di_file.items(): | ||
# The first model of a structure is also the first sequence to appear | ||
# and thus to be matched | ||
if re.match(rf"^{pdb_id}(_MODEL_\d+)?_{chain_id}", header): | ||
ref_3di_sequence = strucalph.I3DSequence(seq_string) | ||
break | ||
else: | ||
raise ValueError( | ||
f"Reference 3Di sequence not found for {pdb_id} chain {chain_id}" | ||
) | ||
return ref_3di_sequence | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"path", Path(data_dir("structure")).glob("*.bcif"), ids=lambda path: path.stem | ||
) | ||
def test_to_3di(path): | ||
""" | ||
Check if the 3di sequence of a chain is correctly generated, by comparing the result | ||
to a reference sequence generated with *foldseek*. | ||
""" | ||
pdbx_file = pdbx.BinaryCIFFile.read(path) | ||
atoms = pdbx.get_structure(pdbx_file, model=1) | ||
atoms = atoms[struc.filter_amino_acids(atoms)] | ||
if len(atoms) == 0: | ||
# Skip empty structures | ||
return | ||
test_3di, chain_starts = strucalph.to_3di(atoms) | ||
|
||
ref_3di = [ | ||
_get_ref_3di_sequence(path.stem, chain_id) | ||
for chain_id in atoms.chain_id[chain_starts] | ||
] | ||
|
||
for (test, ref, chain_id) in zip(test_3di, ref_3di, atoms.chain_id[chain_starts]): | ||
assert str(test) == str(ref), f"3Di sequence of chain {chain_id} does not match" | ||
|
||
|
||
def test_missing(): | ||
""" | ||
Test if missing or non-peptide residues within a chain are correctly handled. | ||
""" | ||
pass | ||
|
||
|
||
def test_empty(): | ||
""" | ||
Test if an empty structure is correctly handled. | ||
""" | ||
pass |