Skip to content

Commit

Permalink
Merge pull request #707 from alex-sbaq/pdb_space_group
Browse files Browse the repository at this point in the history
Add support for space group information when reading and writing pdb files
  • Loading branch information
padix-key authored Dec 22, 2024
2 parents a70ada1 + ded3998 commit 8ad4856
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 0 deletions.
61 changes: 61 additions & 0 deletions src/biotite/structure/io/pdb/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
__all__ = ["PDBFile"]

import warnings
from collections import namedtuple
import numpy as np
from biotite.file import InvalidFileError, TextFile
from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
Expand Down Expand Up @@ -53,6 +54,8 @@
_alpha = slice(33, 40)
_beta = slice(40, 47)
_gamma = slice(47, 54)
_space = slice(55, 66)
_z = slice(66, 70)


class PDBFile(TextFile):
Expand Down Expand Up @@ -545,6 +548,38 @@ def get_structure(

return array

def get_space_group(self):
"""
Extract the space group and Z value from the CRYST1 record.
Returns
-------
space_group : str
The extracted space group.
z_val : int
The extracted Z value.
"""
# Initialize the namedtuple
SpaceGroupInfo = namedtuple("SpaceGroupInfo", ["space_group", "z_val"])

# CRYST1 is a one-time record so we can extract it directly
for line in self.lines:
if line.startswith("CRYST1"):
try:
# Extract space group and Z value
space_group = str(line[_space])
z_val = int(line[_z])
except ValueError:
# File contains invalid 'CRYST1' record
raise InvalidFileError(
"File does not contain valid space group and/or Z values"
)
# Set default values
space_group = "P 1"
z_val = 1
break
return SpaceGroupInfo(space_group=space_group, z_val=z_val)

def set_structure(self, array, hybrid36=False):
"""
Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
Expand Down Expand Up @@ -700,6 +735,32 @@ def set_structure(self, array, hybrid36=False):

self._index_models_and_atoms()

def set_space_group(self, info):
"""
Update the CRYST1 record with the provided space group and Z value.
Parameters
----------
info : tuple(str, int) or SpaceGroupInfo
Contains the space group and Z-value.
"""
for i, line in enumerate(self.lines):
if line.startswith("CRYST1"):
try:
# Format the replacement string
space_group_str = info.space_group.ljust(11)
z_val_str = str(info.z_val).rjust(4)

# Replace the existing CRYST1 record
self.lines[i] = line[:55] + space_group_str + z_val_str + line[70:]
except (ValueError, AttributeError) as e:
# Raise an exception with context
raise AttributeError(
f"Failed to update CRYST1 record. "
f"Line: {line.strip()} | Error: {e}"
)
break

def list_assemblies(self):
"""
List the biological assemblies that are available for the
Expand Down
39 changes: 39 additions & 0 deletions tests/structure/io/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,45 @@ def test_array_conversion(path, model, hybrid36, include_bonds):
assert array1.coord.tolist() == array2.coord.tolist()


@pytest.mark.parametrize(
"path",
glob.glob(join(data_dir("structure"), "*.pdb")),
)
def test_space_group(path):
"""
Test the preservation of space group information and structure
when reading and writing a PDB file.
Parameters
----------
path : str
Path to the PDB file.
"""
# Read the PDB file
pdb_file = pdb.PDBFile.read(path)
print(f"Testing file: {path}")

try:
# Extract structure and space group
stack1 = pdb_file.get_structure(model=1)
cryst1 = pdb_file.get_space_group()
except biotite.InvalidFileError:
raise

# Write the structure and space group back to a new PDB file
pdb_file = pdb.PDBFile()
pdb_file.set_structure(stack1)
pdb_file.set_space_group(cryst1)

# Re-read the structure and space group
stack2 = pdb_file.get_structure(model=1)
cryst2 = pdb_file.get_space_group()

# Assertions to check if the original and new data match
assert stack1 == stack2, "Structure mismatch after writing and reading."
assert cryst1 == cryst2, "Space group mismatch after writing and reading."


@pytest.mark.parametrize(
"path, model",
itertools.product(glob.glob(join(data_dir("structure"), "*.pdb")), [None, 1, -1]),
Expand Down

0 comments on commit 8ad4856

Please sign in to comment.