Skip to content

Commit

Permalink
Add 3Di sequences for existing test structure
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Oct 15, 2024
1 parent 63c9366 commit ff3831d
Show file tree
Hide file tree
Showing 7 changed files with 302 additions and 12,662 deletions.
2,898 changes: 0 additions & 2,898 deletions tests/structure/data/3bww.pdb

This file was deleted.

9,696 changes: 0 additions & 9,696 deletions tests/structure/data/8crb.pdb

This file was deleted.

16 changes: 16 additions & 0 deletions tests/structure/data/alphabet/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Structural alphabet sequences
==============================

This directory contains structural alphabet sequences for the test structure files
from the `tests/structure/data/` directory, generated with the respective reference
implementation.

3Di sequences
-------------

The 3Di sequences in `i3d.fasta` were generated with `foldseek`:

.. code-block:: console
$ foldseek createdb --chain-name-mode 1 tests/structure/data/*.cif /tmp/biotite_3di
$ foldseek convert2fasta /tmp/biotite_3di tests/structure/data/alphabet/i3d.fasta
216 changes: 216 additions & 0 deletions tests/structure/data/alphabet/i3d.fasta

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions tests/structure/data/ids.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,4 @@
5eil
4p5j
1crr
7gsa
8crb
3bww
7gsa
65 changes: 0 additions & 65 deletions tests/structure/test_3di.py

This file was deleted.

69 changes: 69 additions & 0 deletions tests/structure/test_i3d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import re
from pathlib import Path
import pytest
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
import biotite.sequence.io.fasta as fasta
import biotite.structure.alphabet as strucalph
import biotite.structure.io.pdb as pdb
from tests.util import data_dir


def _get_ref_3di_sequence(pdb_id, chain_id):
"""
Get the reference 3di sequence for the first model of the structure with the given
PDB ID and chain ID.
"""
ref_3di_file = fasta.FastaFile.read(
Path(data_dir("structure")) / "alphabet" / "i3d.fasta"
)
for header, seq_string in ref_3di_file.items():
# The first model of a structure is also the first sequence to appear
# and thus to be matched
if re.match(rf"^{pdb_id}(_MODEL_\d+)?_{chain_id}", header):
ref_3di_sequence = strucalph.I3DSequence(seq_string)
break
else:
raise ValueError(
f"Reference 3Di sequence not found for {pdb_id} chain {chain_id}"
)
return ref_3di_sequence


@pytest.mark.parametrize(
"path", Path(data_dir("structure")).glob("*.bcif"), ids=lambda path: path.stem
)
def test_to_3di(path):
"""
Check if the 3di sequence of a chain is correctly generated, by comparing the result
to a reference sequence generated with *foldseek*.
"""
pdbx_file = pdbx.BinaryCIFFile.read(path)
atoms = pdbx.get_structure(pdbx_file, model=1)
atoms = atoms[struc.filter_amino_acids(atoms)]
if len(atoms) == 0:
# Skip empty structures
return
test_3di, chain_starts = strucalph.to_3di(atoms)

ref_3di = [
_get_ref_3di_sequence(path.stem, chain_id)
for chain_id in atoms.chain_id[chain_starts]
]

for (test, ref, chain_id) in zip(test_3di, ref_3di, atoms.chain_id[chain_starts]):
assert str(test) == str(ref), f"3Di sequence of chain {chain_id} does not match"


def test_missing():
"""
Test if missing or non-peptide residues within a chain are correctly handled.
"""
pass


def test_empty():
"""
Test if an empty structure is correctly handled.
"""
pass

0 comments on commit ff3831d

Please sign in to comment.