From d89e5abe9699647d4fd1510447febd5b3896e29e Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Tue, 15 Oct 2024 18:33:23 +0200 Subject: [PATCH 1/2] Make `dihedral_backbone()` more robust against missing atoms --- .../scripts/structure/protein/ramachandran.py | 10 +- doc/tutorial/structure/measurement.rst | 4 +- src/biotite/structure/geometry.py | 173 ++++++------------ src/biotite/structure/util.py | 32 +++- tests/structure/test_geometry.py | 45 +++-- 5 files changed, 116 insertions(+), 148 deletions(-) diff --git a/doc/examples/scripts/structure/protein/ramachandran.py b/doc/examples/scripts/structure/protein/ramachandran.py index 806ac283f..0f17bdbf7 100644 --- a/doc/examples/scripts/structure/protein/ramachandran.py +++ b/doc/examples/scripts/structure/protein/ramachandran.py @@ -21,12 +21,14 @@ # Download and parse file file = rcsb.fetch("3vkh", "cif", gettempdir()) atom_array = strucio.load_structure(file) +# Extract first peptide chain +peptide = atom_array[struc.filter_amino_acids(atom_array)] +chain = peptide[peptide.chain_id == "A"] # Calculate backbone dihedral angles # from one of the two identical chains in the asymmetric unit -phi, psi, omega = struc.dihedral_backbone(atom_array[atom_array.chain_id == "A"]) -# Conversion from radians into degree -phi *= 180 / np.pi -psi *= 180 / np.pi +phi, psi, omega = struc.dihedral_backbone(chain) +phi = np.rad2deg(phi) +psi = np.rad2deg(psi) # Remove invalid values (NaN) at first and last position phi = phi[1:-1] psi = psi[1:-1] diff --git a/doc/tutorial/structure/measurement.rst b/doc/tutorial/structure/measurement.rst index a5bd94d6f..63eeb73aa 100644 --- a/doc/tutorial/structure/measurement.rst +++ b/doc/tutorial/structure/measurement.rst @@ -98,8 +98,8 @@ simple *Ramachandran* plot for the first frame of *TC5b*. ) ax.set_xlim(-180,180) ax.set_ylim(-180,180) - ax.set_xlabel("$\phi$") - _ = ax.set_ylabel("$\psi$") + ax.set_xlabel(r"$\phi$") + _ = ax.set_ylabel(r"$\psi$") Surface area ------------ diff --git a/src/biotite/structure/geometry.py b/src/biotite/structure/geometry.py index cc5c59f4e..8f64fbfb8 100644 --- a/src/biotite/structure/geometry.py +++ b/src/biotite/structure/geometry.py @@ -25,10 +25,12 @@ import numpy as np from biotite.structure.atoms import AtomArray, AtomArrayStack, coord from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal -from biotite.structure.chains import chain_iter -from biotite.structure.error import BadStructureError -from biotite.structure.filter import filter_peptide_backbone -from biotite.structure.util import norm_vector, vector_dot +from biotite.structure.filter import filter_amino_acids +from biotite.structure.util import ( + coord_for_atom_name_per_residue, + norm_vector, + vector_dot, +) def displacement(atoms1, atoms2, box=None): @@ -480,139 +482,84 @@ def index_dihedral(*args, **kwargs): def dihedral_backbone(atom_array): """ - Measure the characteristic backbone dihedral angles of a protein - structure. + Measure the characteristic backbone dihedral angles of a chain. Parameters ---------- - atom_array: AtomArray or AtomArrayStack - The protein structure. A complete backbone, without gaps, - is required here. - Chain transitions are allowed, the angles at the transition are - `NaN`. - The order of the backbone atoms for each residue must be - (N, CA, C). + atoms: AtomArray or AtomArrayStack + The protein structure to measure the dihedral angles for. + For missing backbone atoms the corresponding angles are `NaN`. Returns ------- phi, psi, omega : ndarray - An array containing the 3 backbone dihedral angles for every - CA. 'phi' is not defined at the N-terminus, 'psi' and 'omega' - are not defined at the C-terminus. In these places the arrays - have *NaN* values. If an :class:`AtomArrayStack` is given, the - output angles are 2-dimensional, the first dimension corresponds - to the model number. - - Raises - ------ - BadStructureError - If the amount of backbone atoms is not equal to amount of - residues times 3 (for N, CA and C). - - See Also - -------- - dihedral - - Examples - -------- - - >>> phi, psi, omega = dihedral_backbone(atom_array) - >>> print(np.stack([np.rad2deg(phi), np.rad2deg(psi)]).T) - [[ nan -56.145] - [ -43.980 -51.309] - [ -66.466 -30.898] - [ -65.219 -45.945] - [ -64.747 -30.346] - [ -73.136 -43.425] - [ -64.882 -43.255] - [ -59.509 -25.698] - [ -77.989 -8.823] - [ 110.784 8.079] - [ 55.244 -124.371] - [ -57.983 -28.766] - [ -81.834 19.125] - [-124.057 13.401] - [ 67.931 25.218] - [-143.952 131.297] - [ -70.100 160.068] - [ -69.484 145.669] - [ -77.264 124.223] - [ -78.100 nan]] + An array containing the 3 backbone dihedral angles for every CA atom. + `phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the + C-terminus. + In these places the arrays have *NaN* values. + If an :class:`AtomArrayStack` is given, the output angles are 2-dimensional, + the first dimension corresponds to the model number. """ - bb_filter = filter_peptide_backbone(atom_array) - backbone = atom_array[..., bb_filter] - - if ( - backbone.array_length() % 3 != 0 - or (backbone.atom_name[0::3] != "N").any() - or (backbone.atom_name[1::3] != "CA").any() - or (backbone.atom_name[2::3] != "C").any() - ): - raise BadStructureError( - "The backbone is invalid, must be repeats of (N, CA, C), " - "maybe a backbone atom is missing" - ) - phis = [] - psis = [] - omegas = [] - for chain_bb in chain_iter(backbone): - phi, psi, omega = _dihedral_backbone(chain_bb) - phis.append(phi) - psis.append(psi) - omegas.append(omega) - return ( - np.concatenate(phis, axis=-1), - np.concatenate(psis, axis=-1), - np.concatenate(omegas, axis=-1), - ) + amino_acid_mask = filter_amino_acids(atom_array) + # Coordinates for dihedral angle calculation + coord_n, coord_ca, coord_c = coord_for_atom_name_per_residue( + atom_array, + ("N", "CA", "C"), + amino_acid_mask, + ) + n_residues = coord_n.shape[-2] -def _dihedral_backbone(chain_bb): - bb_coord = chain_bb.coord # Coordinates for dihedral angle calculation # Dim 0: Model index (only for atom array stacks) # Dim 1: Angle index # Dim 2: X, Y, Z coordinates # Dim 3: Atoms involved in dihedral angle - if isinstance(chain_bb, AtomArray): - angle_coord_shape = (len(bb_coord) // 3, 3, 4) - elif isinstance(chain_bb, AtomArrayStack): - angle_coord_shape = (bb_coord.shape[0], bb_coord.shape[1] // 3, 3, 4) - phi_coord = np.full(angle_coord_shape, np.nan) - psi_coord = np.full(angle_coord_shape, np.nan) - omega_coord = np.full(angle_coord_shape, np.nan) - - # Indices for coordinates of CA atoms - ca_i = np.arange(bb_coord.shape[-2] // 3) * 3 + 1 + if isinstance(atom_array, AtomArray): + angle_coord_shape: tuple[int, ...] = (n_residues, 3, 4) + elif isinstance(atom_array, AtomArrayStack): + angle_coord_shape = (atom_array.stack_depth(), n_residues, 3, 4) + coord_for_phi = np.full(angle_coord_shape, np.nan, dtype=np.float32) + coord_for_psi = np.full(angle_coord_shape, np.nan, dtype=np.float32) + coord_for_omg = np.full(angle_coord_shape, np.nan, dtype=np.float32) + # fmt: off - phi_coord [..., 1:, :, 0] = bb_coord[..., ca_i[1: ]-2, :] - phi_coord [..., 1:, :, 1] = bb_coord[..., ca_i[1: ]-1, :] - phi_coord [..., 1:, :, 2] = bb_coord[..., ca_i[1: ], :] - phi_coord [..., 1:, :, 3] = bb_coord[..., ca_i[1: ]+1, :] - psi_coord [..., :-1, :, 0] = bb_coord[..., ca_i[:-1]-1, :] - psi_coord [..., :-1, :, 1] = bb_coord[..., ca_i[:-1], :] - psi_coord [..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+1, :] - psi_coord [..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+2, :] - omega_coord[..., :-1, :, 0] = bb_coord[..., ca_i[:-1], :] - omega_coord[..., :-1, :, 1] = bb_coord[..., ca_i[:-1]+1, :] - omega_coord[..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+2, :] - omega_coord[..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+3, :] + coord_for_phi[..., 1:, :, 0] = coord_c[..., 0:-1, :] + coord_for_phi[..., 1:, :, 1] = coord_n[..., 1:, :] + coord_for_phi[..., 1:, :, 2] = coord_ca[..., 1:, :] + coord_for_phi[..., 1:, :, 3] = coord_c[..., 1:, :] + + coord_for_psi[..., 0:-1, :, 0] = coord_n[..., 0:-1, :] + coord_for_psi[..., 0:-1, :, 1] = coord_ca[..., 0:-1, :] + coord_for_psi[..., 0:-1, :, 2] = coord_c[..., 0:-1, :] + coord_for_psi[..., 0:-1, :, 3] = coord_n[..., 1:, :] + + coord_for_omg[..., 0:-1, :, 0] = coord_ca[..., 0:-1, :] + coord_for_omg[..., 0:-1, :, 1] = coord_c[..., 0:-1, :] + coord_for_omg[..., 0:-1, :, 2] = coord_n[..., 1:, :] + coord_for_omg[..., 0:-1, :, 3] = coord_ca[..., 1:, :] # fmt: on phi = dihedral( - phi_coord[..., 0], phi_coord[..., 1], phi_coord[..., 2], phi_coord[..., 3] + coord_for_phi[..., 0], + coord_for_phi[..., 1], + coord_for_phi[..., 2], + coord_for_phi[..., 3], ) psi = dihedral( - psi_coord[..., 0], psi_coord[..., 1], psi_coord[..., 2], psi_coord[..., 3] + coord_for_psi[..., 0], + coord_for_psi[..., 1], + coord_for_psi[..., 2], + coord_for_psi[..., 3], ) - omega = dihedral( - omega_coord[..., 0], - omega_coord[..., 1], - omega_coord[..., 2], - omega_coord[..., 3], + omg = dihedral( + coord_for_omg[..., 0], + coord_for_omg[..., 1], + coord_for_omg[..., 2], + coord_for_omg[..., 3], ) - return phi, psi, omega + return phi, psi, omg def centroid(atoms): diff --git a/src/biotite/structure/util.py b/src/biotite/structure/util.py index 43816adfe..a2579d929 100644 --- a/src/biotite/structure/util.py +++ b/src/biotite/structure/util.py @@ -17,7 +17,7 @@ ] import numpy as np -from biotite.structure.atoms import AtomArray +from biotite.structure.atoms import AtomArrayStack from biotite.structure.error import BadStructureError from biotite.structure.residues import get_residue_masks, get_residue_starts @@ -105,7 +105,7 @@ def matrix_rotate(v, matrix): return v -def coord_for_atom_name_per_residue(atoms, atom_names): +def coord_for_atom_name_per_residue(atoms, atom_names, mask=None): """ Get the coordinates of a specific atom for every residue. @@ -118,39 +118,51 @@ def coord_for_atom_name_per_residue(atoms, atom_names): atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n) The atom array or stack to get the residue-wise coordinates from. atom_names : list of str, length=k + The atom names to get the coordinates for. + mask : ndarray, shape=(n,), dtype=bool, optional + A boolean mask to further select valid atoms from `atoms`. Returns ------- coord: ndarray, shape=(k, m, r, 3) or shape=(k, r, 3) The coordinates of the specified atom for each residue. """ + is_multi_model = isinstance(atoms, AtomArrayStack) residue_starts = get_residue_starts(atoms) all_residue_masks = get_residue_masks(atoms, residue_starts) - if isinstance(atoms, AtomArray): + if is_multi_model: coord = np.full( - (len(atom_names), len(residue_starts), 3), + (len(atom_names), atoms.stack_depth(), len(residue_starts), 3), np.nan, dtype=np.float32, ) else: coord = np.full( - (len(atom_names), atoms.stack_depth(), len(residue_starts), 3), + (len(atom_names), len(residue_starts), 3), np.nan, dtype=np.float32, ) for i, atom_name in enumerate(atom_names): - atom_mask_for_name = atoms.atom_name == atom_name - all_residue_masks_for_specified_atom = all_residue_masks & atom_mask_for_name + specified_atom_mask = atoms.atom_name == atom_name + if mask is not None: + specified_atom_mask &= mask + all_residue_masks_for_specified_atom = all_residue_masks & specified_atom_mask number_of_specified_atoms_per_residue = np.count_nonzero( all_residue_masks_for_specified_atom, axis=-1 ) if np.any(number_of_specified_atoms_per_residue > 1): raise BadStructureError(f"Multiple '{atom_name}' atoms per residue") residues_with_specified_atom = number_of_specified_atoms_per_residue == 1 - coord[i, ..., residues_with_specified_atom, :] = atoms.coord[ - ..., atom_mask_for_name, : - ] + coord_of_specified_atoms = atoms.coord[..., specified_atom_mask, :] + if is_multi_model: + # Swap dimensions due to NumPy's behavior when using advanced indexing + # (https://numpy.org/devdocs/user/basics.indexing.html#combining-advanced-and-basic-indexing) + coord[i, ..., residues_with_specified_atom, :] = ( + coord_of_specified_atoms.transpose(1, 0, 2) + ) + else: + coord[i, residues_with_specified_atom, :] = coord_of_specified_atoms return coord diff --git a/tests/structure/test_geometry.py b/tests/structure/test_geometry.py index dc5ecf1af..715cdeb3e 100644 --- a/tests/structure/test_geometry.py +++ b/tests/structure/test_geometry.py @@ -39,28 +39,23 @@ def test_dihedral(): assert struc.dihedral(coord1, coord2, coord3, coord4) == pytest.approx(0.5 * np.pi) -@pytest.mark.parametrize("multiple_chains", [False, True]) -def test_dihedral_backbone_general(multiple_chains): +@pytest.mark.parametrize("multi_model", [False, True]) +def test_dihedral_backbone_general(multi_model): stack = strucio.load_structure(join(data_dir("structure"), "1l2y.bcif")) n_models = stack.stack_depth() n_res = stack.res_id[-1] - if multiple_chains: - stack2 = stack.copy() - stack2.chain_id[:] = "B" - stack = stack + stack2 - n_res = n_res * 2 - array = stack[0] + + if multi_model: + atoms = stack + expected_shape = (n_models, n_res) + else: + atoms = stack[0] + expected_shape = (n_res,) # Test array - phi, psi, omega = struc.dihedral_backbone(array) - assert phi.shape == (n_res,) - assert psi.shape == (n_res,) - assert omega.shape == (n_res,) - _assert_plausible_omega(omega) - # Test stack - phi, psi, omega = struc.dihedral_backbone(stack) - assert phi.shape == (n_models, n_res) - assert psi.shape == (n_models, n_res) - assert omega.shape == (n_models, n_res) + phi, psi, omega = struc.dihedral_backbone(atoms) + assert phi.shape == expected_shape + assert psi.shape == expected_shape + assert omega.shape == expected_shape _assert_plausible_omega(omega) @@ -72,13 +67,17 @@ def _assert_plausible_omega(omega): assert omega.tolist() == pytest.approx([np.pi] * len(omega), rel=0.6) -def test_dihedral_backbone_consistency(): +@pytest.mark.parametrize("multi_model", [False, True]) +def test_dihedral_backbone_consistency(multi_model): """ Check if the computed dihedral angles are equal to the reference computed with MDTraj. """ pdbx_file = pdbx.BinaryCIFFile.read(join(data_dir("structure"), "1l2y.bcif")) atoms = pdbx.get_structure(pdbx_file, model=1) + if multi_model: + # Use models with equal coordinates + atoms = struc.stack([atoms] * 2) test_phi, test_psi, test_ome = struc.dihedral_backbone(atoms) @@ -105,6 +104,14 @@ def test_dihedral_backbone_consistency(): [-1.363, np.nan, np.nan], ]) # fmt: skip + if multi_model: + assert np.array_equal(test_phi[1], test_phi[0], equal_nan=True) + assert np.array_equal(test_psi[1], test_psi[0], equal_nan=True) + assert np.array_equal(test_ome[1], test_ome[0], equal_nan=True) + test_phi = test_phi[0] + test_psi = test_psi[0] + test_ome = test_ome[0] + assert test_phi == pytest.approx(ref_dihedrals[:, 0], abs=1e-3, nan_ok=True) assert test_psi == pytest.approx(ref_dihedrals[:, 1], abs=1e-3, nan_ok=True) assert test_ome == pytest.approx(ref_dihedrals[:, 2], abs=1e-3, nan_ok=True) From e0be07613366dc7c9e5d75b82dcc61e587c4de9b Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Tue, 15 Oct 2024 18:34:35 +0200 Subject: [PATCH 2/2] Add Protein Blocks alphabet --- doc/apidoc.json | 6 +- src/biotite/sequence/align/matrix.py | 46 ++++++ .../sequence/align/matrix_data/PB.license | 21 +++ src/biotite/sequence/align/matrix_data/PB.mat | 18 +++ src/biotite/structure/alphabet/__init__.py | 1 + src/biotite/structure/alphabet/pb.license | 21 +++ src/biotite/structure/alphabet/pb.py | 143 ++++++++++++++++++ tests/sequence/align/test_matrix.py | 3 +- tests/structure/data/alphabet/1ay7.bcif | Bin 0 -> 213446 bytes tests/structure/data/alphabet/README.rst | 9 +- tests/structure/data/alphabet/pb.fasta | 2 + tests/structure/test_i3d.py | 2 + tests/structure/test_pb.py | 76 ++++++++++ 13 files changed, 344 insertions(+), 4 deletions(-) create mode 100644 src/biotite/sequence/align/matrix_data/PB.license create mode 100644 src/biotite/sequence/align/matrix_data/PB.mat create mode 100644 src/biotite/structure/alphabet/pb.license create mode 100644 src/biotite/structure/alphabet/pb.py create mode 100644 tests/structure/data/alphabet/1ay7.bcif create mode 100644 tests/structure/data/alphabet/pb.fasta create mode 100644 tests/structure/test_pb.py diff --git a/doc/apidoc.json b/doc/apidoc.json index 55371866d..7432602f4 100644 --- a/doc/apidoc.json +++ b/doc/apidoc.json @@ -404,10 +404,12 @@ }, "biotite.structure.alphabet" : { "Structural alphabets": [ - "I3DSequence" + "I3DSequence", + "ProteinBlocksSequence" ], "Conversion Function": [ - "to_3di" + "to_3di", + "to_protein_blocks" ] } } diff --git a/src/biotite/sequence/align/matrix.py b/src/biotite/sequence/align/matrix.py index 7c80fa1fc..9e4324ac9 100644 --- a/src/biotite/sequence/align/matrix.py +++ b/src/biotite/sequence/align/matrix.py @@ -65,6 +65,7 @@ class SubstitutionMatrix(object): - Structural alphabet substitution matrices - **3Di** - For 3Di alphabet from ``foldseek`` :footcite:`VanKempen2024` + - **PB** - For Protein Blocks alphabet from *PBexplore* :footcite:`Barnoud2017` A list of all available matrix names is returned by :meth:`list_db()`. @@ -408,6 +409,7 @@ def std_3di_matrix(): """ Get the default :class:`SubstitutionMatrix` for 3Di sequence alignments. + :footcite:`VanKempen2024` Returns ------- @@ -418,3 +420,47 @@ def std_3di_matrix(): from biotite.structure.alphabet.i3d import I3DSequence return SubstitutionMatrix(I3DSequence.alphabet, I3DSequence.alphabet, "3Di") + + @staticmethod + @functools.cache + def std_protein_blocks_matrix(unknown_match=200, unkown_mismatch=-200): + """ + Get the default :class:`SubstitutionMatrix` for Protein Blocks sequences. + + The matrix is adapted from *PBxplore* :footcite:`Barnoud2017`. + + Parameters + ---------- + unknown_match, unknown_mismatch : int, optional + The match and mismatch score for undefined symbols. + The default values were chosen arbitrarily, but are in the order of + magnitude of the other score values. + + Returns + ------- + matrix : SubstitutionMatrix + Default matrix. + + References + ---------- + + .. footbibliography:: + + """ + from biotite.structure.alphabet.pb import ProteinBlocksSequence + + alphabet = ProteinBlocksSequence.alphabet + unknown_symbol = ProteinBlocksSequence.unknown_symbol + matrix_dict = SubstitutionMatrix.dict_from_db("PB") + # Add match/mismatch scores for undefined symbols residues + for symbol in alphabet: + if symbol == unknown_symbol: + continue + matrix_dict[symbol, unknown_symbol] = unkown_mismatch + matrix_dict[unknown_symbol, symbol] = unkown_mismatch + matrix_dict[unknown_symbol, unknown_symbol] = unknown_match + return SubstitutionMatrix( + alphabet, + alphabet, + matrix_dict, + ) diff --git a/src/biotite/sequence/align/matrix_data/PB.license b/src/biotite/sequence/align/matrix_data/PB.license new file mode 100644 index 000000000..688633bfa --- /dev/null +++ b/src/biotite/sequence/align/matrix_data/PB.license @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2013 Poulain, A. G. de Brevern + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/src/biotite/sequence/align/matrix_data/PB.mat b/src/biotite/sequence/align/matrix_data/PB.mat new file mode 100644 index 000000000..abb8dc293 --- /dev/null +++ b/src/biotite/sequence/align/matrix_data/PB.mat @@ -0,0 +1,18 @@ +# PB substitution matrix, adapted from PBxplore + a b c d e f g h i j k l m n o p +a 516 -59 113 -105 -411 -177 -27 -361 47 -103 -644 -259 -599 -372 -124 -83 +b -59 541 -146 -210 -155 -310 -97 90 182 -128 -30 29 -745 -242 -165 22 +c 113 -146 360 -14 -333 -240 49 -438 -269 -282 -688 -682 -608 -455 -147 6 +d -105 -210 -14 221 5 -131 -349 -278 -253 -173 -585 -670 -1573 -1048 -691 -497 +e -411 -155 -333 5 520 185 186 138 -378 -70 -112 -514 -1136 -469 -617 -632 +f -177 -310 -240 -131 185 459 -99 -45 -445 83 -214 -88 -547 -629 -406 -552 +g -27 -97 49 -349 186 -99 665 -99 -89 -118 -409 -138 -124 172 128 254 +h -361 90 -438 -278 138 -45 -99 632 -205 316 192 -108 -712 -359 95 -399 +i 47 182 -269 -253 -378 -445 -89 -205 696 186 8 15 -709 -269 -169 226 +j -103 -128 -282 -173 -70 83 -118 316 186 768 196 5 -398 -340 -117 -104 +k -644 -30 -688 -585 -112 -214 -409 192 8 196 568 -65 -270 -231 -471 -382 +l -259 29 -682 -670 -514 -88 -138 -108 15 5 -65 533 -131 8 -11 -316 +m -599 -745 -608 -1573 -1136 -547 -124 -712 -709 -398 -270 -131 241 -4 -190 -155 +n -372 -242 -455 -1048 -469 -629 172 -359 -269 -340 -231 8 -4 703 88 146 +o -124 -165 -147 -691 -617 -406 128 95 -169 -117 -471 -11 -190 88 716 58 +p -83 22 6 -497 -632 -552 254 -399 226 -104 -382 -316 -155 146 58 609 \ No newline at end of file diff --git a/src/biotite/structure/alphabet/__init__.py b/src/biotite/structure/alphabet/__init__.py index 1e1678822..baafab6e1 100644 --- a/src/biotite/structure/alphabet/__init__.py +++ b/src/biotite/structure/alphabet/__init__.py @@ -10,3 +10,4 @@ __author__ = "Martin Larralde, Patrick Kunzmann" from .i3d import * +from .pb import * diff --git a/src/biotite/structure/alphabet/pb.license b/src/biotite/structure/alphabet/pb.license new file mode 100644 index 000000000..688633bfa --- /dev/null +++ b/src/biotite/structure/alphabet/pb.license @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2013 Poulain, A. G. de Brevern + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/src/biotite/structure/alphabet/pb.py b/src/biotite/structure/alphabet/pb.py new file mode 100644 index 000000000..e2c527cca --- /dev/null +++ b/src/biotite/structure/alphabet/pb.py @@ -0,0 +1,143 @@ +# This source code is part of the Biotite package and is distributed +# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further +# information. + +""" +Conversion of structures into the *Protein Blocks* structural alphabet. +""" + +__name__ = "biotite.structure.alphabet" +__author__ = "Patrick Kunzmann" +__all__ = ["ProteinBlocksSequence", "to_protein_blocks"] + +import numpy as np +from biotite.sequence.alphabet import LetterAlphabet +from biotite.sequence.sequence import Sequence +from biotite.structure.chains import get_chain_starts +from biotite.structure.geometry import dihedral_backbone + +# PB reference angles, adapted from PBxplore +PB_ANGLES = np.array( + [ + [41.14, 75.53, 13.92, -99.80, 131.88, -96.27, 122.08, -99.68], + [108.24, -90.12, 119.54, -92.21, -18.06, -128.93, 147.04, -99.90], + [-11.61, -105.66, 94.81, -106.09, 133.56, -106.93, 135.97, -100.63], + [141.98, -112.79, 132.20, -114.79, 140.11, -111.05, 139.54, -103.16], + [133.25, -112.37, 137.64, -108.13, 133.00, -87.30, 120.54, 77.40], + [116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51], + [0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98], + [119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88], + [130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01], + [114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82], + [117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16], + [139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74], + [-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19], + [-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90], + [-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23], + [-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91], + ] +) # fmt: skip + + +class ProteinBlocksSequence(Sequence): + """ + Representation of a structure in the *Protein Blocks* structural alphabet. + :footcite:`Brevern2000` + + Parameters + ---------- + sequence : iterable object, optional + The *Protein Blocks* sequence. + This may either be a list or a string. + May take upper or lower case letters. + By default the sequence is empty. + + See also + -------- + to_protein_blocks : Create *Protein Blocks* sequences from a structure. + + References + ---------- + + .. footbibliography:: + + """ + + alphabet = LetterAlphabet("abcdefghijklmnopZ") + unknown_symbol = "Z" + + def get_alphabet(self): + return ProteinBlocksSequence.alphabet + + +def to_protein_blocks(atoms): + """ + Encode each chain in the given structure to the *Protein Blocks* structural + alphabet. + :footcite:`Brevern2000` + + Parameters + ---------- + atoms : AtomArray + The atom array to encode. + May contain multiple chains. + + Returns + ------- + sequences : list of Sequence, length=n + The encoded *Protein Blocks* sequence for each peptide chain in the structure. + chain_start_indices : ndarray, shape=(n,), dtype=int + The atom index where each chain starts. + + References + ---------- + + .. footbibliography:: + + Examples + -------- + + >>> sequences, chain_starts = to_protein_blocks(atom_array) + >>> print(sequences[0]) + ZZmmmmmnopjmnopacdZZ + """ + sequences = [] + chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True) + for i in range(len(chain_start_indices) - 1): + start = chain_start_indices[i] + stop = chain_start_indices[i + 1] + chain = atoms[start:stop] + sequences.append(_to_protein_blocks(chain)) + return sequences, chain_start_indices[:-1] + + +def _to_protein_blocks(chain): + phi, psi, _ = dihedral_backbone(chain) + + pb_angles = np.full((len(phi), 8), np.nan) + pb_angles[2:-2, 0] = psi[:-4] + pb_angles[2:-2, 1] = phi[1:-3] + pb_angles[2:-2, 2] = psi[1:-3] + pb_angles[2:-2, 3] = phi[2:-2] + pb_angles[2:-2, 4] = psi[2:-2] + pb_angles[2:-2, 5] = phi[3:-1] + pb_angles[2:-2, 6] = psi[3:-1] + pb_angles[2:-2, 7] = phi[4:] + pb_angles = np.rad2deg(pb_angles) + + # Angle RMSD of all reference angles with all actual angles + rmsda = np.sum( + ((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2, + axis=-1, + ) + # Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown + pb_seq_code = np.full(len(pb_angles), ProteinBlocksSequence.alphabet.encode("Z")) + pb_available_mask = ~np.isnan(rmsda).any(axis=0) + # Chose PB, where the RMSDA to the reference angle is lowest + # Due to the definition of Biotite symbol codes + # the index of the chosen PB is directly the symbol code + pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0) + # Put the array of symbol codes into actual sequence objects + pb_sequence = ProteinBlocksSequence() + pb_sequence.code = pb_seq_code + return pb_sequence diff --git a/tests/sequence/align/test_matrix.py b/tests/sequence/align/test_matrix.py index ca10133bb..2745af8cb 100644 --- a/tests/sequence/align/test_matrix.py +++ b/tests/sequence/align/test_matrix.py @@ -14,7 +14,7 @@ [ entry for entry in align.SubstitutionMatrix.list_db() - if entry not in ["NUC", "GONNET", "3Di"] + if entry not in ["NUC", "GONNET", "3Di", "PB"] ], ) def test_matrices(db_entry): @@ -45,6 +45,7 @@ def test_structural_alphabet_matrices(matrix_name, alphabet): "std_protein_matrix", "std_nucleotide_matrix", "std_3di_matrix", + "std_protein_blocks_matrix", ], ) def test_default_matrices(method_name): diff --git a/tests/structure/data/alphabet/1ay7.bcif b/tests/structure/data/alphabet/1ay7.bcif new file mode 100644 index 0000000000000000000000000000000000000000..3ce454e2efa7a3ff3de0bcafdaecc79b329776a3 GIT binary patch literal 213446 zcmeD^2Vfi3*|#)3vI7Z(nhhn(&SHaXE4GPaJC-vXphC7}TSS(OB!`#|Hl+h76euk% zh3vgZQUYO>)dFdu?GFv4rDc^pGiyQ|aV`tFogXPe*SarsK%O<4>4S+msP^*KH6RZ}NVn>=;B-R8HIyF9H+e0Sfp zy2D|EFRm;#HP2e#YV$kVJzl57x1X4O^Gdg^)3Ms>aQnSIYg#?7?oPMwZhUmP)4mej z<)*dVgPrd7yKh~&#Obzgu=u_3$>jCgdN!gf%xGZf{7u~Xb>%&N2X}U*zo*MVulI4+ zSHhLt5H@?-+I$Yb@U36o%zbNBrNia7t%i}cJKe+@pSQIMefbvd6`ENIosV1`*Y|^7Uy*iN^u;_8vI<}^Ay2wGkcyP;Ads0&9~&4 zwO-G)_>Oq?o2|BPyVGxN^|;$S-cA8FFjxe!b#`Z~--_O)9YmWuJ7G1gUG|n`li?{j z0IcFTU|}F^x)m;Ojt+mjU*e(#tu!s(=z zo>X3XJOh$nYdri--0)$G*!-|wLeBna) zlkts4E2E}Jm1u(2V8BxZiHVYcr~<4J{U0Qv9toIkMU1A>S~_*=)Y4g{)2B|&0oXEy zim?3yqO(zCwzal8e29w<0Vr>Tp<4mOQ%h|61#QPAZ)#!(LuBP#HGX3HsS`+GMsIqf9wO z*^41aqXr|8l#Ym0QG+R+GF7xY{5Ge{C&X$02=*all3@PfP_$FIL{7|N`gS%VG z1X1O268y+r$|ceDF>Kd%c|A*=$S#8TghX0N*~+or>*(_MoQPG*a9@chX4=+a%jBpj5BGU`<5EF)9?)JTe_{ZY`W zlu?6}Ky+Wr0_^Q5LB@HTzogaE+2wND+^vpEVCL}A<#KR_R`e0sDS;ogIlSP>aQGS> zZBDnN6O_Ud2?vO(T)ytkPFogOw^n(RibAQD7&mOx*b$f}>tH*OWQ4}cC6XW=CP7as zk|5sIBkJq@Qz0d}JgToJl*{$W>>U-k98Vd9axw-z2S}v)Q64fdNQTic(mj*~GL_UU zhBlefOF_I=K&zy%%fia0{pL6R~h%#D(=WROCI6-xlR zZ)wy~R)!qf%dFt7i904zGtrrbs8f`z;cxnS+^rp6kKYS!1u&I3tnmbGGEb^9PLWiM zwxS@WAx58i+6hr=#P4B|8ZmescVcQJ&fn(;f-0H?e~T~LfR6&99<(brvGr|5qq#QY z&{CluBD5%i7CxXZA=(#HiC&71teq!LMzJaa+z9emQYHo zuPyBelH5oM{v<>^8; zWNVVfsKip-(56A6cEo$hHu1q^wr&8SS8$BogSROcIXp;|J0dFZWQ${It8>Z3lFKH~ z_qN#F&Ly6uwuvRRlPh6f4)4U0#gi98EQrTFv1HC<%jAqx#>A?tvDnCmz9M^ijQI&g zkS?<-N0ul-H#?>=5pj66dY!e^37Rx0O9Ik7gpno_2`X#4yw0pRe-dlaIu(|-%J1}N zfQ|37Hac28?a18hahG`7N*e2s+stAsY3V8PcQ{I%?ha=Q3Ti27v3Y@Ic+W4vHX9VC z2-l-)I5(VQSqXULN+-`MF@e|32T4WfmYLrRi9u>r0qN$e!Qc)NtX5k~3q)KEk`N`P z*aoVjW zOVeEWtWpjkTWu<@Xf(@1CzXhDYF6LG_@Pz74nf9LH<`_z|L&dGsoL8(kzy7Dy^ zOI_Nhzi#Sz5J@|I`tFdZq zH0ID2qcP!=r6~c_8@gNOI_!f2{I37r8$-l)aN?inu7H~kKKUjw@7oQjGNq+cXHBsn z#7201-nmn!PM=#qN}s&ivQ?BmL4T2IF1%-XgCLv^-mSq?^M1lrGUOS} zJt0@_5lteqI{mzH03Ok!N*&BvbgBwq1zsc5Qnx~kzS|FRDh{u;zOkYvj@u*5SshJK`Ql~M(&|ub(%I3-BH7E@^ zHNjeC$dYi1ja?pBPp89sEqH*v8JG1`#v^*|X+ozF;2E2Kz8-QNt=RA>7dHmB^Hegu zk_l2#nS}(Zt2=FOx6Pje@T7Td7?jT#{M?GguB3BMjs&jmJEv;Csjj(plBL01Q8T-y zqNK5=yuNOJMXlLnF_&0O<)%hUlc_N~xU9lIL|kTw??jO~mN;xi{;+PB&3diF+1??y zM)05=bk&l-5Rdfi7qcPy(|!t37FNNJ0_?InJ1eDHk8xPx?(S>>Hs@)BAWsNj?{+!l zE~|UexNZ3TPLfYaN3w<+iKBH0Z1%O&CYMewn>Be_4s3TlpKS%~2y0mb?E68<#KYCbPwEbo=~vtIO$L0uGzx=wKS9eU({A;JV!H$pKhW z0&d2bS1~L*J#G&ud3jh4uri}EEU|Qq5}iD5ht=iqgUXyt2)z=uN=?sCjHCOWs9a#F zYFbcHYcbU}G?-0Il})vEwKXP7WmUuMhGxsW%BlqmX3wi@G+P$bHZ;w%G*>k@SDTxg z7SuE~RGAu^8mpQb8*6Hss^Gix%$3c?=BBF3>Sj~ZY)frT<-DefIde=4YU--Y^J*F` zHI~ZS>UmAIbLN<9;d$of<|=btZKbJpVP!>aQ`N!+=H`al1-0gdjq_%k=UL{=t8FsZ zRhiJU=Pj7sXg19;nT=Hyl{HN@HMN$E?=EG&8?Dtz6Sl>|_KG3>dpK!X0VNO4;SWQG%@zLV-X23idBeo=JcQmM&g;TsIIThI2TPkZ<=3jWPilgKGDA; zdb)W8Lb!#J$^{jW_jD&x0sApMy}COYtEGyWm!AlE71b;?Tdn}xsf;Ws<@-l<(}el{ zgBrDHNYE>qe|hSv2Va%-5t3Xa9rtnZmejw)SBm;YE2D|^;Na=O_sWvT606Sxv3d0s53QIRo7T*n=MU^4YEC;v^G&m zJmK1mP9s8z)Bysd=}{QHcB5EnGR%-64GZO%BjFH3F(FMT7@;-rda&>tIzkjsAx)r< z)lebhfr3Lug6?AuhmCzqkkKD3I6xKbVM)|gIOI8P?S8fZIOD}(?RWwL}mFU==F$Oh9zl3 z@zW8xg0Xn2*cD(nocfZBa&Xwhfmq`XleDh!zo6aZ-B@9pD1P>kty90E7gzkB; z53~`E=)W7RhKjL$2e3aV-2yBH)F&jy=0xD83cZvF8b<-4*(`z{-gL{uL~@bL?0^!B z-~#h>_CQUGa$Bp@Vz?i>m}l%opjey7jZohLB^Yv7ln-13-+>lo@*i2dTq1?Nf4fcht<2>vLA zPp-nelDQ&kJWQh<<>(R_u`^FZ{nX+e^BU^lG}I?!#YyP=h@GjeLq%uWH%%(Pm)M}} zg9mzg!Zbu-zQoqqnOQoV~g> zG3Gx3&7mZ1YDIsGvYN^VK!Jl&ACcPfQ8lk+$lQ$e?WNE`)Rt_la(?d)ZtvpsYZ3OX z4wnlBN#=#MtO#o|Mcn3ZZOw~m*$~qnjU_qIzIaGP)*S^}p^V^gkO!iZCpbrf;2?6J zCpg(4I8VnC90YjC)%04EY|^eIwB^-Aw|!lCkKbYPdTl*i{*2t0wxh%2^Fvv{_PkTa z=WKU7?1Km%NzpKZl|&K#Lr+QEYW6|r8LzXo!^w5wa%NlsRMw)R2?P`|T+shwf=;mh zCxSW|{f{aesQyPv^uI*dq&V8m;y^(Q84}*U4H8Vq0VF6;1S?i8q_EA4SXmLVuxcR! zKCfCNk#e0}$i`L?=y~sI7CAePaTH;nclIS>gOcdR^a@pZkDqV8JKKV-4E>>S^w5Yf zSi7jUbW(Zkd~*(nFk+Zej2w&o0wQ6_`vtN{)UlaPgniyGkX2<_CcNkU0uYCr72$nf z31_E^;(fOZb&dsdk6uW%;u;F~$-Pz4#ua_zeiis3-r8>oZ>rE;J=4wS)x z!Z=VZ2P)w}DJUq90|j%SBn}kGftoo`NC&FuKzSS}m;-fnWLD-Br#L1-RJvkR0;8O7 zFS=t;k(|-knT{HYj*`Zxr%;Z!pXyOb)%r0iPeZy==hRN(OY)YWinmU8yX+&>S(}84 zVL|;?*}|khvMe#!M-1a*7>tCDBngrt38NJFo4EXYRKGX*@+s+F1#QArc%&!epiP_s z4i)>knm<~?0FcE{9bh5t(Wsyr_d^PA2m+I|WGd;UxZ{)BDk`Wa-6|V^sMXcfnSrp? zaIcmIQ=hOB z!RUU4p5j2v?eS1y1mnzT#TBG65zvFEPM~d#ckvp&W6U5;D@S4@E0-Q*q#OCQifJX) zvr6hp>dPu-mQYd!yImPqqVc7p31e}Av9Qo+EM^MeozYlSz%y!8?a>IodMTUkzXK7EHc50K zBCpU4G#XVg8&P?ld^Tb&L9o}e*S9qgH*9)yAHDGP-`Mui#0|s^H+_hX|M^1n9{s!W zee{{V-f%ua+<)<9vxytJ_q_7YmuI45{#s@=al`-JilHH@2BYyl+lfou3dKjQD2}+( zA!rG|jt;IMWxQ268v*;Z=h17Lwg1r zQ8(zJJ%#mX5%RUCJzi(K)14<$X|6rJf!iQ?^va`GMoOC_yNMNrk*Cn+C9e<2^?{8D zs!q2LJT!L4AT1ueO+4*D9b(ofTs3|$)+m*}3xYWc*_C&T=BU;AlKz8FUP)NepYN~3 zH`bBa9{5K0Ycghgcq)y4I*GAwc}1m6J4?UYY5=|!9{3D#19`F35SEJVAt{Kd5E5+l zrRbO@*eUg7>5l}abL5h>&aEy(6cGc=4C&Djqg`#K=*j$BaGh z_;KS;m~c|b$)}up+L>ofoHV&?`iz;g&OQI4OHAbzmFC%()y}PNnAg}ezj^T$S6Xc? zt!?cc&Z|4!UDx=!mtMQy_y=LvkO`Eqrx?|_A-Fu$Y7M%E<)6Y2jyXQ=qS~~5#3og9)Qs_oBr_Qoq;iAjG zca`0-#O3k&d%pjJ8*cvbPk(mT1Hb$I@|Ek>Z`iVR+aDf#{D~L@PFYjVQv?}BNLe|| zLo^nmnbwB}L#4#j?xE)&k;y0-)`yY;s)`sR5e9N9;20CqLFq%vY(=UMJq7xZ=s~d?J}Zob-#zi8sUUN^{-fdP{`~TIiW7=9uPs8qJO7*>g=5 z8JFWV%A8#Ep=bz=K_iJ_iD@jhxfPnSMaG6wi-mO8vBB_cVf_rpe+ zWVF&DMw}Q<#*Y#w<#w*TK+B+(keH%uXm$EIT#xro4=@8I4|tnz$v^=Q9g+C}a>S6A zL2W|2(`}O%nwn>2oJy%|ZIMM#gs@M5ImeVJS?hAR+re9ElhG*NSsfqjWEa%uld<|7!yERF zy+k-oC$p%eGONRj3X#B_fS7z!Xkp+jLv`S-10A?g0I4FxHFd)2OhbHw&E4*DSZyxg z!s!`1eM`2+z8;Ug#o-@vL+8QXZtLt!)7Y7<*#i5j#ny((av8Fy5KZo?=NYYxK;gWw zZZ~RNVYMxD4mJ*3c+35gwCh$03(>iu}+zwxt zt<_;|_j!mol+2uw!<-%$ z)otsB{uo(-$I#wZST5Yil@0Sb{C+@zv}4TC0HJL)Zojq3*5Y#bP;1{7hgS|JMUjr? zsr`D9+RN$KYAci!@QZ>vC=e*W{n#a9tAVC>cF9HKB4UR& zTuy(FwbNnCOE1#41|)Ae*eXD1uFdc7a9k^gQj*FjS)VOA_fcU2T@G(6*b>0nEw_NE z2oOA?n?>BjIJ3d-@U?oKT}jw%RUUNI3cQcTZeAqjT`TNhzJS4ceq$?G1P8T~Gev`s z20>sY)com6J9j!7AhQuc#+qC)nWKF(V`{g&t6GLRsuaIpW7zC$(R-X3-c@ad80U6} zZ0xBUjgJbf=ez-#!ur)+)g`kgPt5^QRdl?H99%A4uCdykZEaq+z?1;Dkt&n2SZV8Q zL49hiN`$vxwsty5s8Un<4(Xlpgi4|RmLRC6E=b6cV=Ipw2<%*$P611JtKB+`JE=-8 zYlYjVomOqAnQN*tmo(Hu#gH7>j(mE5)R4++j>(iiO3ZvW%r!MOLh%z*V?}iiPafAJ zjg|nnf@L%PTx(6~dFRcXR9cz?fZz-J#bLpkO!1>Nb88wK>l-aOId?hD*(T5&ug&h{ zLL~&Eb3bNHX(BqCuC*<7@c!kvN&zaHi=$7^GjbaWHeN%*<6+w?d?W)nx;x zP^UFc*G}0g_Q>2ESV@uxhTIB1>tjkKeT>yw;<=6TLI^cd4a`9y5VpHVnyIELYpQjsavBHY- zmcTS|w$=FNG8m<<_;0W@*Hu(E);Bfggc~Ni!t~STx#hJ}D$SPWi6zyhx#ja4t4hkv zrn#taV9BCsxgb}5Cyu;efeAPI^7XipbmI4-BIwQxNZ8xsA(R6&cccnXa2Epzv{6fH_$`yYRd(-Eu1ZEs4@dALPSSzbvb=<_K-U18`{E_oC~e7LXLvV?GxC~ z?=hV?cEcAQ_q6yN-lYyZIN5dZ878TaJF}(Wdb9zHU9NDD$5G{Pd0EB zwT`>$d1lO#SSNvSADb($Xe-6A><~gfx;&6?2k~$U5Kj)*_tGC_3t6^tf=VM4)o*uL zYX&m|$sm=;^W<#J0E=3Us1)r|o2y$cYO1b4u8bdR5^jedpby!Dphp0YOb?6POJ@2rBeIDyVr^^RK-{JE+!DKt&wdj}8zxt=_q7uqOb=z(3)*c!8R7ItJhd(7-qu-4E z@0{m3RqtVcnSK3pk*FPCE+KE?C1k6wBR(H*J;!vdLyz08!vi0LnA5Mp@70ajLYH{< z70>9C!}z`QhuNCM#td+-z2x+44L?J?D~mID6hfTcQO-3e38G)Jd3tu&Ak9SoyV)9j z0-~R?Yj9{AX|O_?31-Hbd%&R1gl{*Li1DsOW^ zp@o={zyfgEL!5;z41%+z4E5eV*})xUnF-t1YqH;H2( zdOG(`7`tG6#a_3;7%ux=0EtyFa}g|9QrrTwXtcK3TKy2$7%wWRvLne5l?<=1F3*Q=B&(0bpY652(G3)9uegN=bXz3NrYp3d8*h^RM=1OUBq_b0hb6`Q@co!vFLP{)?;+ zzU<1&%ZBXWr(JpZv@{L=t*k>H&?ZfgKlMnAJxzemnk`k_u7L4FZR|TCv~nq`0?-Lv z;aqv4yg#8l7F3+HYh9vHsVOh*0{4AP30f_AXerxuusa4+sa7w9f1=Psryc6OWu#)? z>e-EEb4g^n^cI0?+4%(SD`+G_L3 zJp`#!tD^zup5!3cXq}Bb5I(4r!gV{!m;}GN(NtGip9^J8)TD;(96(Uk19%UuX3KnQ zW7=u?(SSIAh=&u0M0sHOd%7HaqZ^@O+a>{@0G;O6=0T(YqKy&|#r5jQ>%yQ5%T$6GyK zufv7g+qHTe5cWCSgOs4z@@`Uj84J0Po>qsKqB}n2oqRON7g&81%zsPm*IJ>%T8jf} z9owDGJO@?aps^4`bq?|nTUXxGk1akHV!jS2O}E4bIp=vCQ;RNsEJo@_jd1gEYYwhY zmlC7gBavwvtrgyGXX_GQht0jjTG=4arq8oV#47il5rbez`{r0=9(Y`1xccDAj1Yegpco*@`^cDa=63y;7J%ezDPF zKPiAJZhI)jrstJm0ND-!WWxT)LrNuObe90B=swKDN(EMr^JyG*aMFA2;B@9-1_n?^ z-p#C}&h3gO6!^M3tsOmfuczH14-3x|c|_zNkA;;RtWkM*DT>utTFyInI&Hos&&YL* zje?HR0kJSbU7KI$K^7ODx3;yX6{>y4H@Hbzep*>;vgAUX8She$n>ntj!Lu@(-vh-V zP%jooe9Bk9PQlkSu^EZD_C(BW-Q56Uh-9<1#)G-vd&?`aV=HRRy((r_Mq;% zBj$#@=S1!)>GzIL$x=PH$K28Hfu7fcy5F7JSePC|1r__x0n0+&XgMX_Do|1g4+l55 z-z!j4z9$JM@W@v;0rB8<@>P`%m*2M9<8`(>-O+rRo3Y8VvD;niaJTzAL{%KUu{f`D zj83LH5u=E~j3hDhEt2T6ws_ojtGx?sQ|&+&qi-DTUDhraSggG0U`Gq;-vv>;;PU6R z(JpIeEB95WAHBujM(=&m!_dZvyD2)1ZUz1#1kWyOYljnp#)MBk2$M!tG~6xlpw4cW zU;MDFqo;#=2zP3k^aDRMxD+1gx3#x(QaSh0;un*6=a z?pO+(%MnLiRBb*9%8iT@Q7r+6QR|()`EF3E-46R7MYL3$8-)jd7C{Un{>e5IYuKiO z(VEd3m+pf1{!Kj2&Lw`#62vv79m5}Z^3}_yujD3p6Bt2TJ$AqZ<$PkEr;R2E3jRrc z;3F1tpea2M8$PC0wnbGz)UR2L$|!&+BE)KlAUl!#6JaO*M7$p=0HEQYho}dYoSw+w+-TnbefcBep`h9H1yH8Qd8ZR>V(f%W;# zAW#w&QYlIhKua*k6(AesrRgG5qKG^i6RaJmBeAUo#k4>kniZM_c%91xG5L4+nq$|i zxdF-d%hFg(LR675UDPf*@-)>mi0U{A z25Blil5Lo$ukJ{;@q7r@kKnNd%B2iZY=MyvQQDHe5;a-F^DP`FvMz;OMUQvLVNC4k zhZ@$zj(+HHB~jKRu^hLC&(jygyZerH@otsh>35|+M|$6BjWy-txFs|P^;Q5_A>@c6pA%Dc6IViT(*P& zmO&bqlvjb4c+wshX!Tj+&((38# zaygde0Fb?LK$h>$jl%AWR(wOfq0r*{>36@Wx-3 zr>gAl=thdMs3uty@Q7`i9FpXOJu^T#j-lX3WEKpjV;cl!0jXmGuXq5Tisu#S#}yU& zfN=#82MNhtV{fq{>5@Asne)J?K^hsD%Ah5bb;bO;h8&=+v;arhj%$I~D*I4?5m( zY@v+}Q)kRN@7#vc(izjIW_&LtUUDs8pH4=UwsNqAy1wmhYl{QIEoGq1I?0UL>Nu|1 z=eWk|fv8BA!w+3thRWXG_eA9aOI6c?idu`QwxPjnYN~9it*fmuSt_d4Q&9!qooB9WHa0g^RaQ5f znr2&SYbxh8Rm_=VT2ND0Wu8~lXsNMO)>hAJs+}{(Tno=LH#b+A>uM`awF@gNYMZJS zE-*JY)GnwsFKnDQ+dR)QXI^cSxvt8Do;_~?lwLN?F`1236_qtjH8r)C96(sxVnt$r zo4^7wfE=?n2ws)p>j0y{vdT?!O60=(OCo+KGYcN}5`l}4AKfAmxS+o0HG2oHnRHQKEG7Va_O|OCn%n zj{!u^L*h&PYUd(lSZ*rMI5ZQB27-}DCK_x)d{>kVWOO#``#EN%vGpoud{(C$+7t{8 zwO~jy1We~8$l-`fGgCs2tCF$Fh$$4xBtf*j^-0Nq964ZodE|h66fs^<8JLWOzKW5u zgOwIAa^w{@cl){)C=^)X>2~`Sjf{z@vWmjC5$VB#x|UN!8CMIF4llp$vbyb}sF7!%gUlUN7F`d#Cj6*O6Sji0Ptb;iC^^txz>51;+Cx)aL1#Mu-QX>^I_y`rf)7eWPTe&JC@ zt5lN3p@L}PlT~UHgWwTU#x>By@}?N2AOpY}M64j|QzqxA%!-F%wd^KX8s)Z6aw?+A z<0ShhhZMT#-Wi~cPiQHEsSE-*;9C-Y%EVC0Ew5q}P&|>rLJAK6(bbfUO(8KTR%7o_ z4s1QOwnggz3o=D@E1EO!U`R}K)Tl1sm=JG-bP%8j zeRw!TABSxs8AF0UAlO1#7&})x*)EPh2XNvz6`a`2x4E=KogAktUmH@XHsp2>75;sn9Kn zIJ?F)v3z31#LR~Tv-rvOIw^Cm!KltovMPM`V?fJ6P~i4;H$!I zL`SV$y8fVDT6|$vgSfkkeJ7yle)?jw7UdqFkk3aB@$e%a49Vm zT}ukiDyfAM)q7tl1~mUthCfZiFVpeM4E!<^zsTcpQ=FGR8T?V1ml#n76`FOTCQp)D z>B?aPF^YrkaJZby;>|2d(S)gGc%e zT4@eRkv}maQh5%f1eLbbn&&q()>qXw*VLLDs?9RpGDSSmW(ZGcUJ2)xA>3g}fqUu=NU5t@K@%vhsz zh&wqS4H_qxGLf7oKSS%UV#buLe+~r*Py}Y2iEQWsIYe34@PlQfZ76ha$Pq$Qt*NTE zuBvuE(8BnLpPVOz97BpFgwbWl6GEZif*4JiIJR2Ic!?*3QNRwE5GJL5CF0`(AuRf) zc=YHI@q`eKGfxPWsv)ZoV^J2B)B#U{5ON%oi)zRVgt+<6kq`)@tB@yy=_G?&kensP z?Bt?2xD^q=p|<~yIyoe|3iYXMyxCZ^+3R@C zR>vx}16RVpzAR^)c>)^;ftDC$ls zQ{?#f^NdbLqGMpnQK$@TzvY&y9H7z^4+4NdOcFBA%9p~`=JLn82=i13Q6VX?QZUb} zAZczo6r+57kkT_Q)Pyk$dXu~DHZQ7#mrQ)cu>r8nJgoR^gtY4%IiHBZk>j#sp2-Uk z$kbjL3QS&rK@v>0Q8n= zdA%i3`{^VjhmH-B&NtzKGCS6#HdnVi`hm}=SU=mj%wccvINko$ZMIgw$GeyQNQnPB z8E06=q+jmUeJh+EO!$xJiYNQa>my^l8o1geH+U;llXKvobYvrXEaxLU^3h|^1VWV`=cC8UmBAz5U611uZ@8UuD_11V z+UBtNyS)x!o9!pY^AW$P)ds~k*_29(V`32?$Nek(>@%fzb#$$A;0a^z;R6O!zWd6BZ-YEclWB@I=@p@ck0Fz zGVO-^zQmJ1CU@b6bh+#{=7Ztv>r|v&EOM?rc70Fr1o>DdH!9FLCh5(Ey(f2tN5B2?fhJk3gB}#j&!^<96ggCKjJX9nGuZ^JUk9>D$rn z@%E&hsBEp|lBJ;+NlA;>X>ZR3C=BQAst~NN(!*zm;B-TM>bkrhzqQrri#h~G@(uyc zhZz^quapR$T2)zIDV5!giS@}e5)Wax3&nO*vAQ1#RL^-H(T_P>30~P_5vjNslt!NE&6U#} z0+*(XaV1;J1}QQh381ckV*NRweTd>!OuQs+iYyI|h=ZvT%udLflqFqtUet;kVWOx- zr5PO4TM4ha)G|IKV0skrehhi3A8bfKKBsBec216!c#z^iszd;?N5Uj)o~K~WcDKVW zM)oH*3kW6sW$Gecb40Od+}YsY)x88aq}&BZ-}B zg{izE<3gB9zaX(i0kq2!(3ys}Qe#pwmTw-8{WOhBCz42jnNxh29*pgS7CYW-28z)F zjoW>GFI1=+NQrAIrLCueqS%TI*bqZKl1x&|BLpgMJ!pgw$sLU;+M_Y#4yTa`?uYw? zs4hea2VYMp{iIK$Z0eKj)?6cf8bJ0~bYBl!u-yUBw3f>Bc_nxy#l*!@;>z$M@jlcL z1j)wG<>=7lrUs>CLz7GgKo!YCf`&Q8G;o)^0UFFT zl16E(d=9s7uvwEzsFIYxlL%EL;NIC~^V(c4hs);nJ0}SrGA;tCusKl#q6vMGu-8a} z<++|nfi;R0m?IAY1>R2#PYVt1X9pzJD^)w>2(cF0MbZ>zmwYHfd@}?k+)a%F>lFdT zQ(!$Jz+@y=IL^T~ttyj{Zb0Zr2%TE1Yb>V5DwCz5w%G#xT4ftB#U6!8A?g6M6q+w> z537NGLNQH6C99E=?>pTaP-R|{CCM%K2197bn9Y|=x7O79?xR+H3CgYZXm-%DBWxw%>NBF%N_g=%UF|2SgF5V|vM9 zI(xT^5(^k@Xui`0B8Q+M$e2^KwP>CkrqnD=P7b4hRm=`ml1y~T*l0wlS{a2!$z;LX z!;L$po)S+E(Lklnw6v>ad2;Br^>8LqK88s~4x{}-lZ&oaLZ#FcG!;~mP{W-^t9%$; zj66L|Cp}D}d26<%!)V{oBfJlfd8s7268TM-)=fD04zAggNklW69$UyB!}XQG#ns*F zw{p%~Ye$R6ZI9a0#-zoTmc|HRW&6BTjmY~mAc8`aS(V{66#W@Hu@^|}^_EnfYF?7- z$F`Iq_r@c+5aY=&J4M78>|}t=71pebxze4!Wr3-dx1ZLU(rzu#CgbN)F*3$j3pApG zLuM+FEkvd`b5aNfeF<_H{HYJmRG7^jAcgpamCMymyqvnF7 z5MYQ|Pm}iGig>~Wj3(FnG0$cEDAFc|%f!Mp&t$x<7Uy-2>@%6zb;$FWl(at?k9kz! zF_K_&nXy;Nl$cc(!wVT=zqcVKM%T$)LQ3#V$}}sQgj5)vD8ivgbGt2`z{vo6gehDi z1yCZmtpm{+jLjfSXB@UFQlto5hM! z8~+m%T_6rJFCMRV`r?*9G36p+DOVB!j^YvwN%1&$xkoHq^UOm5y;EWyvGC3FjyU3x zI5JnrO#&tzYg*yO-u~J$Dv4=XVjZH_Lku3~@CepNuOW zl_+>Van0+MrKaXt85gq>GXe%Pg1A~^TM^~)bVI-j4;h*Ik~#!(D9%4R`vEdSl23Tt z?DKScTOD~A_0#2MyW8XL^0<0BA&D-JOA0j&d0g5GqIS2#oyVqr%cSq>u=$|qZad^+ zT0L!6Xf5Y}<{^VkZc0p_j|S@W7=jh5i|BB=QTn^H&54^QrJa^P(wqaVjJL)_iTN`k z$L9-~J%QsKpzEI#3^xw0-80l#b^DIZU-PB*vfx+Es zvsA~5%Y96p73Y#qTC#k29r(#Zt#PHq&ooTll#{qCojk7vW? zejY!2#r!bgR(oXIakJD^6o0RLyH9BMxf$2@!nHN?Fp7yJ7exLoJ}1vP97u1JC!(iL zk5-jhXUv$91Dlc$Pf;Vt*k2S^LMe3s3?XhL&-G8kTKoFgIEWO|a$ae{x0BUVSSW7p534Freo1a*cGi zOe6Jn`s}z4TvQ(&r>2i86$OZvT)<`F`U!BJkjA)HL4GI66Q!Q|es$o)!Es5AP{rnr zM@W$qq~l9Xu~zbBBo%TZ#%N4XWFn*_Mq^_|c|}#7xx!RmZ!XRFw$v$(hY`axbOcH_ zhw21e9Lxk@a**{L;YxFmVUi^XCPIecFBzm_Y$K`Z^cg}N6}O@U__-F@Jd!Mk$L3^> z1xEs#pMaJkkm=u0ZH6@es@giJePS^~!hdsBZJJa5jcFuyBKo70e}jPgs8|_7b^?m@ zgnGQPTu?aSgDAiwFa}(=aQIBt48EhmpFN zCMKzwPcpP)RY<*L$L5e3pCT~piMkl?QDe~zT5%K=tjrI1a;t}a9t5Jfys0oH<4uNE zC~>i?M>4PQCXQ$jB006hNT}Rlsfm}*FBr5rMTuZ! zS_autQB_e{TCupI5`>Y;vdq`ZRW6bc6+`p{g$lOPyaz}khtMn)(gX=Y8=^w+HyrLy z4jHWWl%YLyl*u7ULN)bN;Y2J@!X}P+IYholN@OJm6WL!gv}5yfh^ictBP*1b zLu}6vN)DOEDF{&+Xn;E_t5kd6DIkW_qyJ?U=F+gucN&1D%k zkrXE|krbm$7We@P1)^m|O9_}sXp#)l&}5GcGkpP&1Pdef^k8^{Q7HWE7G7GhfOgo6Jb{7ic$dn&UvI6gANa7$ko z6X^A8pA9}3vFNG-mlD5?$!Xn=C+)X++o4~`pyqBACa2g5ig{c(J(wmx3Fa;uT0S7g z5kVb6hgZ&jqLzMl-2Wziw5sX^VubeNz8ItT7Cb$>W2w{TcY54uS=ZOYPY-l%Nv-Q0 z%z?n=#9p?5JR8>aV|F?5O7IokFMAKr2DVxpeI((dNQt&p} zx;};{?(;*yy3LESX$A>_epffOf&t3bWpQn3BEkenf{G0LYQ$F*8KHCs$9@qejG=(g zjsY7mH@KYcC6L_F=CZZR*7XQ6JfpHqE|pK~G9R&?*z7DM(iu6kvj$Ah43_=HFo6je z#Jgl+0)CIz2fIj!5%@K}Dn4ReNluDJ2qYuxDc3RLv&bM-UslN~#Q=bG7AV5xnj&M0 z(d1T(Z57~3@OBHhN1GZOkgL=@AF89v)LW&9kf=96&E3Rt$0JjOCJl|R`;nHDLZ6{$ zekVzuZ1D705;QNP$4c~Iz_Jl3k&Q&DQ3Yl3X44R7kBDtuu~8a@FB@Fy%p$4vR*`!p zn=YrE$Vt~T+bH!9=RuDoM?~{fmS0KjNgDJkx;mUPh$pAXy#ChKlq6o|-O6h+;FA9E ze_x!jS@)NTQVZmYE?;^Ezjw^X86Etu_c34ZA2yK>KomtNKHGnfOoZxkborfjhm~u+ zD+p4*Qxl{Hm#^2a)HAqS;Yhh;W4ctHC82FA;)Mt$CJ_}Pk`Wpc?=c!P47FU7%q9Po z%Ot2HG5Pq!Rg8xHMvAE{Ee}T%gqKJ+acbdPD$kOjQ4X=lC=N-ySIP<@29^*@pU-|< z;&j{B^_zYlOO6KX+)a!}DFDo%P!i)g*~e8WJcQ$JkgR$RX*(2wQ^2CS zPU5T*Wg^4NYFt)Hfta0;S*u}wecJsUg4iKu{S0P(b~!tt!dDl#UwNM(&K^#WNTl5_ zj4e^@fTDk0c^RZ1o;<)>$=^g|{?7s|_-p~8$k7Re5p9ErTz?YgritGuNS0@Tn4gDzj8oLlkpqow=;q3T}(H-BvO`{)qN;1u#*eDWlcJdD3_>i@>~y?=Ppn|*KvIf*3y1}_uQd-U@fzWR*5 zeei)Ik2{_u-z87_EB}D0i!0IC;&I8?S?ZqRBnkUoQibkGgu_*J74_AY^y*}p>J9|KFes$+1ro?L zT4{taw2prddF&98gmw_*gL7x$J%O%3{q51!uwlfjNhapI{FO%GUT~#>dk_0e_%?dw z?q`I1!aMGs47@K=de2=cj6?@lAw?360`@O|Z!~@?IwF2P__+`y0oQBb+Cu(&I=EJP zkC<8t+qv4{Hm|1>;uTvn6Wf*~5{^`+eG(L0o>S3nBZ)gm4Y7#S5+>3Bk#1U|M7qy0 zq7+l2R7MqVGDP%EDl)b`C^SOlnd0lIJF*G5h8t z?SfM-Vs!~n6*v_Ev;)uzL}j2})B=!v4>CO`(aXg7ER_8{Re5G2**X<*bgpm%2nQ%Z z68(TU4y7FU4>YzFyg;4{okq}MnjwXE8d^*1C_M$M4kEjFlq!h%R0v{;aIQ%59==T# zORt2_!^HE^?`Wvu)Cg)M%n9U}xNo0GjUy*eCsE&_N+efmBzHbpavY5jRGChjZwc>-5wU-;{?@c{ zG|^PNm$WM)M~qVaS%nFdPC)!>D0T*lwM?hRnsl8R>(UjV$nqL?Ms(`xa->xc6`i^< z83t3XxFk`Xx*Y9Skgz!^8^ms;FN3teRbHVK^lEfYS+lkyDMk1}S||F^ggI3me?Wh~ zmYf*@pFfv=X1)msAIE)69`}jx0pA6L9r8ojA%=V(9XXx+7s*IYmB6t>UyLm1d;he* zN8mmBIUB!le_xYadv@<3UY(X41pC6|e~=SD=CooP(1!dCs z(1SY(FmMnB@Qb8m^vb}mk$(jP5hs1?_+#i59Rm{zp#g1o7`%?=f01;Jz6%J%N$u!f zND4>NJGxFdhhAYp#b*iEqw9`<&YIL>tI!t7TIZI|T2B0o#P$ya`9{fm^!Y;WGdho~ z7I<}vW!H?Avfk|IxJj){VsGao@+_nDZ$Fhmh}#dqj5Ru%U>#DxW>g_f3Z97-Qahf2 zL?N|fh4d*L4diq?{Z5<9>UFd^-HuLZL5G_6I9xKPSA4dga>x{oAqR4%EF>6i?DY4b zrYf%Z63U58tSZlvZ0H|Coc>G8 z1<&0wpj%Iq-2%<6t=nv6( zB(>D{CSGa)(NVF~(~gp*hIzy-^-x4vm`5C5Da(Tb;hR5t%l2Jo|0o7!cg4`ur4B@I z;t6Vw!$)xvLv4{2ClR;8IlB^a31&T(U{GL%=SgD`Q{y8*>MRoAJklkNmCSiFho+ zXZ{s`6%P-8Q#w>MGVu%iqtOw5`23}4eyF8{IMdi$NWRCv6~-dG!hE4g6P~*;{MdV7 zi10%Zr^bIDj|Pp4ADJ*N;WOME{qWbpAkdQFN5)+$-dq?q_ZoAQ`wo9v9J%BQJQwb& zxTA1R{0a<7;UWBpcsRs9T?80^7D1jzndtn`RD~7d&<9^4Tncv-rXRg9_A%%J>lXx>DwUAS!iB1# zg8dMgR=!2rxWYKCF7dB5jHbY0i8^<}0(R)aJgUAq+Wbut~Efb0! z(HP0E;gj%}q##5Dofq!KJ&C`sa2IVM`9<^&+&CEWTmF08XwbLNi1_=Wr$hZw>Qng< z`V(PHa3vnI4qiXTGvwwZ%#wfQ#t^+1T!^mbM`VN)nlMDEL3j@LBs?2#L;|#gIpC2A zvxHCloiH?or=an1^WbNIK1>cE*xVl4l`^;1%M($IXc!uXMGea1dsLR#G}6>3d;(=x*F~QfNpZ7V`uN zE=XXFa8n#5cP|(nUR&WQfXTzj_jxSg!y@1sj5P>*iuw2Na&Rn$bMTIPHjg{eCt!S$ zLLoZy=vA?B$D@s&#h*p^zzpQr5IcSja_A}}jFcN&%)2P=AXq5v(C8I_|8h)}&Lwt# zq7la253Rrm@*oKjKJ>rv%E_pN6iZ}RbMptfg5V{`b~uJ;3HpYxp6E*cGZ;05J&smQ z*eQ~c!1pEii=KcXDBv7VKk#$Kvpg0_p(_0z8cB4waNmutJ_i?mCX)4)jyn1r9*bn7 zk&ZZShWz~{xG2C4trvzmw>~273uBewnRrfsB#d+<`9G4l{$eTTPbHWB$S{7pB3qQQ z{cIG#P%#RM(Lvz01_K;ti}MhtlN1YR39{z(7vQ(T$Lp`ZULQSzjw=14FJ6EBiLn== zgEjGou0MfW-oeyf00~SHrhEj#A|&aa&WpLo9gjXg!GG5BpEam~B>bSBlIZ+zc=1@z zfBrMPq95V&csP%3j=b!dv2^)_CNimbDcT+Pl2<9hM3yjbX@JysT>S=;;UQ-pmB6kw zL3%u7&O;eX$eG7gaUho-QtKg+9`ffQXCBh&W%6T+n<>Ky^DmHBLAU9Ro`7M&mvtjf&}q=!jPP`Wq1aFW&!)5mFm?h)I^2k^ z(&|TyG#IonYBU}g0pv&+V3b<83pzHkn9!s7q9=`n-a2TEdiWL^zoBSk@i3TEA3l+H^6*lTDOp|`TaY5li%_3cM!D8*@ z`YZHT>c3~O>aNm}BPjzk7uRWEtLqp&5EP@nprBA&q%YPE(+}5=Fpe~g(!ux*V-3d{ zj@OUVjMu~X4HF7s{7i`f#;-qBf12iW{TanDe(l-%@9NHBCKki^4O0rI>Pq!xx@r39 zMKg+L7R)l7TX3G?e8UCW3k??;F4kY7yVPLP!}v9o2D5gy4#sb&(Op(Br=YfIt_H@h zX)w%V8nqTZj9<4vzfiwO->f4rR-}=)4@W%tH4Wz3Pr-Z4lecj>wHd98pV;i9=nte6 zaZ>6J%&i#TJFy!rjZ4NA!@3jXF1%E0Dr{cA+qz6fzj$#XK1ggY{gF^WNwGK60ca9sDr#IJG@^DfJ(E(V_W~1EJ6KYpH#~Uk6_|ei*cBe@o_2-r zIQWOqoS-3OFMRv(<-y}a-wVz(zDzp~KTk{yT^c?=xXQSZ3Wl%N{OizV;nL97;N^uU z(dNLokRdcZ_*~FYFqioUWei#l_XM|uE(jhr{Dqv&uBT`B{xx)N_zw8?anwch6}@jo z^x?|jAA@@eE11>2RN={A-WJ>#TpoPf_*~$!;L6B^@NJs0%ss(sb^$p(ye)VgQA>>p zU0bN9Mn{_2!(ZIO>H5I)tP$k6=Q)9{H z#5lT=TpjouaTfVv@V(H?!snTvP*d1;O)+~9!P9~rtT8lGN3%DOr_hf?+9P(| zMTdSxzNaaqh=X5{w%+qc{6I4g8ae%xc~&!v-Wpk@y)R;-ANq00Qc*huCD>O5j);10Txp#$I1{F{86d7hXS{7>Kka&wRke#+hx`pww=%-50I=-uR- zWQe|%e1`q|7cWMhqZbgfLQ6s=`fsRZq3g&E%qV&@ap~9NLzjgwAbuQ~P5mnPPwoHu zYQob)zX>>rX0RC&>1j=ucQt}c2aY~{!oqPw};ONzrnss z+}8Il(Fs*Y4hO4)m+Qu{%YrZKpE>lRzAflvPa@ZZCXvg+?`!rGm(erWxy1X#4a}9j zKi6O0>t*&67ZMNCpG6i%uGTkF-yu&R%+zS&3&KyG@b$0QubHc;gWpoYXTq1VH-wtm zg~zR6PGMc4zv`ce6l(6G1EIZrH?q~57wMS?yn$QT#i8Ft%%Sb|sq=?^vRl zna*6?dop_$(M6reY$3Yo&4Js=-7xp{;n~58@O$B(gdfulBg52N%uePm;+S5PIDvu1^e%BSBIxE&d7v-E_gwBL+CG&+sSVN#V1Z?u4n%nzJ)nJyc3#AJg3)_ z?bO|0y&n$uO$yf-c4=l4A8PKP-Uwf-i@et!_->?^nh?30x;bQ{?xJ1fkBMi#c__R( z^lE4o-OcPdT&gV%Um5yg=ne82M#~&He3R~puYM7#kJPi?@Tl-P%y{zB@b18xh%fT{ zz!}7w!H0;qBZq_C5i?O-{4!fZewVb9-NEg|3XPjS>EP19EbREUas z_TCv9U+EHPT^k<9{)?G-7W|@vXm{k(K3%9Z z_)pDkhem3@8@w~L;Dj6K2iVcXyP7-t3fMb?k4A1bexIC9zxCx`g1;p0C2#K?9{CUX zr@*81+kw}~xAc42Spk2zvEVw*tgp1eGl;*^&cL&QD}##|FFl)R4F5j(E&W64!tf8+ zPv~31w?;momQhQ>mxh*WKR-|%S<7C;P7aPImIc_*=j2mGH?fm6|HnSl_Ypnj@UmfF zGRM;+B7Nj*WGCg_s5xsLq5;N@&Hb$~o6yfd^RypjnW zGK~C`e4eeQU!&#|&xW63F8a11GK^^@)=&g<6;llK=ym$5@P)w_bdHFbxjFJO{W#l0 zE`q9Avx&&Je~&!EUKsvcczNVOqKVqCJ&X7-avHe-c12lu342=P>c~0d4WW_jq+lic zZ^KW??|(BlSO@TbIam>#qMLZ|>yej9ou zyquX3F@;O$<;=p~w?Z$m>jDi1EA=Pl3S#-8nZXOm=Hc6!*CIcs9;Pk>UU4~lK3(|P z&mwz5??f&li-I=>&t@(>Je^q^To^oI&Jb> zbP+CU4cSFo7z6u|rt0&>#IvCfh;zty0&hijhl*&L-O~5Bz+=I$gUgAz$6Y`_KszE= zkza&rXj9~R?eE!l=vTv+QqAONu555F1e zDY&=y?(l>F5q{3-4HwcUhYN^rdanvx87>VrX(y6rM#_oHBM&pTvv06(66Z%u#4GG+ znm5_sGoLUs$fqJJm@(`x*jEUTCQPq5^d9>F@owa0O$Vc4KM3y7EFtI6PX*s2|IXgZ zR5BZS`+@airi1yquPZb`^LF@d;%T~pS{&{RJsbLC zXs`Aw!yBZRx|(>4xSjbbaNBdI_O6{Z|MPppUw-v?-zUb6k>W|Cp0$5>_J?()-~ZdW zV?70q_g_Dw^RqFd?SEM@d~EnX2Y>j1=gUir9vS&|!QuCQVc5*BD!;s7TkjVuZ<}}b zUnal*dF{Vm37qhD!MYEJm;B?!tIw(Uy!4Do8=oq>`=f0avoAX@xL}9*vvM_^=#pV|Cs&Gna%C*zSlU;$ z?6T)JZi;MM9A16>;(tB4==dv7IXyB_^S8(Dn0V8lFAi5PT1>sZX350Sr9XV|P)pg~ z?&Dtm=+`G$KPoxlhbJEleCLCTukS4M>_5424Rh*0yR~;czU<<0Z|{0{PQjn9eDZ_a zKE1hUKY3x_f>B={-tgil6EAs1Tk#`(QRJ_>clT_)_>Itp(BI$OJ-J8wz-Mdsj+^

ohF`pOW^HHqo~M3N^UCv56CkhG^`uL$oA#?r>vbQ7 ztn~lry$4|2rN=@#l%@@EIgNDT#ANAPL#cc7Jo z0v^~o9~nyMACX(hf0I_ag}9DAJ-9Rc6Y_n{CA8z8Mcai zivEAG+|JuYxF^(Rdw-FB!Wd_VP4?|+N0(CnvP3Rmm?P3>Tf?C08f#HK(A zt7os#%%smausYBT67w^?Aa8}fIPL_^2=b@Qhkf$`4dGGR2Z)i89ibbjzmZMMoW4c+ z2l`HC{*SybR1`Tcczk$C;Y8w{1BU|pgFggHdp=N|*Ms+HuL-?Dv4;=#T@_pzzKhyK z-bOweu`<114-cQqP7ME&`7q)RJRQEJV6*1_KIg&x0fzVqtqpDrEM>k9pUM6&cvo2}kq;t2Exeb0K+{K94nH0E!SMfY?>m61O18Dv>b>_lhaf=& zF=x>+J33flbMtw!`b#Sxfm?6*rrmF-bpU z7F7yK;k;tZhXzIwtZB@GT4v?^lUb4$#zs=!7{a^58NC@ahq9PnJkJL zwQP4Cx7A3?BiyJ~Q=(kW)!b|YmJ>hlLTUn%MJ4b2TpwinH7D2x1`2oI^O!7hL2YXU zegG}!vCOS?fw&yIEM%nnmebCQv*t}}b+(;(!#kHD1GGYFyuG1ON$aX^Qx7P$gpc{X z{++0ld$$q`p2`ZBOCD4=;f3r99`)2H;b6m+Xsn~1ltblqSr%TTW%3Pr3?E=|8MkfG z*$KKd9vUYshM89^H-vFsjx=?zrRL9iD|y4rV}u#c4BfIblRS;{ytSiN`AdY0%&aY1{TW~@}p>s!PFqosLTO=9Qu zO!rETIEo7QV}I4bUV$~Eidj`AVGX^hY-1diplnpni*ZI-Teqy^Mk;PsTjz9F_F+xD zZ%*b%h`D#qR+L?rjpZJG9!t^rUb(n>T29tlD}8e!WMNTVHiut5!nAqvvDww~0M3~m zt*LsntfEO}QBH27pK((SwLeiu8tsgZ`6^IE)Wv+m?x~AZsSk0>Z}e9utLa(!@UD7S zneJSGZpu@Uq)J??B%f(8DjQv1ag*nx1 z;O3*oi^G;C{G4U7h*d^>tVWTtpE=B@|*?y;&2t6q%*2Rw6TB$}{er$2Z_{ zGEput{cxsPo;8JjwzFAbK4&t9_ze5xpW7o{^@~nVH`^L^C}wS}c2m}=^iHCS+huLO^Ok#~&ja<3 z=6A+gOC5!qPnG_{YAmuv>j~y&c3qw4e62W)n=DwVs&>xq%kH@1Y<`%=A~dJ6Fe{ZU z)*5JqmE&q-$0WWIfX}jgWuYupEtpe>rKr^$C)H5aLd{ebXI16F?40w8vco{1dMa1D ztNC&QjP)>A33k=ZHAmgbZm37?5&Wp7D!<6?vZktAPFFt1=wMr|jAG-p8%j{tOjgzm zWtGei9&XxpWq~@^9;14T6=)i3*>0N69n|8EuOdh*p)BW*OedCPV{oHmBp+@3XuG7@ zSXEw^Ewwz(cwl}*IOq}Qei^KL5T?`?W|^uvRa50U;4;zdt}isldUWTfpoSXc{En~G zKbS3z5L-T_iCAxGV{W#T5|>$QW)VJE*F;PAuDqTsELV$`vZ74o2YEleH!omBSd)1% z#o&;rS?`V#AWydYTIQEdhWxt zk;+$bKwXp>2+Or*N=SBoy#{XwP4o~b!`JK6i%d5M^WJQVGDK?*VPD7dRpNV*NBEh= z*hc5sTxCU^kp!o#FL-;)inL?KG0{yF6Dj&+a5HLp-4GY_JTTg=o7P0%;(k{D(K^|@ ztW0$d_>!*2inijuNY^)WtL$xdkXH#aidGYiZh6*HF5HrD@HnZhQYLAam02RxVt=+@ z_cceFk(TSqM)^tk+3{H{Gu!I(2!p6@7K1&e26N4P@D67X0u*Viv#h`(6%Z zWumfK)NKPlq1037y6hrBlrg&VMV=veH!H+Ivg{4ry0G^~9(I<$GRGV9L}}hzi^BDC z8TK+QXo_)SmZ`g0w9TZF*e25uhm8VI!_9@Im~Xm^@RI_3n`y{h_}pA$?X1mV8{rUS z8g;P_hKd@B$u?={*-cSJ+edSArP;x_3T5F5tLcn^9;yr0U^{&@*so+AVfho6MRx{FPje zSze>GgJ4%|d<@exa%q@dzb}fj>i~?GgO%8wphJqeDBuqL#h#q zbC{2#HbtYKmFJcj5T`FSE9OcRMeM$}TYynvO4<>x)~A{UEGo4A$p$efpoVpbcl-Co zM@ug6vL)D)S2>k_%35zm-Y8Xb;x94A&LXV~-M0N|(H}pu=XSm@_)U& zxy`BcNr|q2ZJ9RNz;|6E|Fvc6rem^qaHgX~fh+MFzQ1m1@~L{E1GiVb+1~Jg zrkp4$hW>->)*pG@PybVSU6rW+)x54Sz;)H3X35=##Td>P;#jXQ>ISw?CPOc6t0O?$ z#NKL+lu!1p`UuYf+Ix0|7q+$~^vcS7jk?BVYccdu(-numC16VaLPG)H|tF`K`AU|unm-bozJ08Do|c!PrTOW;aAx?U$SeytJ~07ZEeu zN+==npuA)Wf|ozm(}}3(ZFzYc$JD)w_aK%;pnhBO{+@Pk7A^i&6Ni8Ln(` zH1w40q`4b5TC)3%a8mv%n z8JmC^ZNS&fPgKs9dSN)o2f;2q!tACLb5E8})E4S`ql0l*EK$66T?K=liUQjIM1X!9`(HwM{E*J&B+I_US4a#b9aZLAycJRfXH$n=xt^_OBH|4Dx> z?z5+Q5Wbc+-c(!?tvKcFJioe1xg=g0ezF`@Q{{#< zhcO;!;bq5I5y2zf$76YOC6DzOqTYjY{Efx_InqqW{pxGyL!JXuu((Rr-cb(ovghV^ z>SdDzo3X1~u=+v0qFga9sSahIS}&)E`kICDBbI)I*gaR;XU8iwAh+2KFUVVJh`qGi z411hv%DdK%P+3{Xj`Jc^PmDKhP+gR_I9NxerJU{ZHGkrQm2$=gSkz){rR=6`5*5XC zR@L0iN{PE7o^8Y@T1O?${z1R0<+e=I#)EDSQMPMc_1;P&2*YtofSkZPsx@Up{V7uI zlJC*Km^ST-doQKFm?@6>J)U3G)rS(^zgo{R{mrgw5PrjV>4Dtc7OyRm{>nZ6 zRIh8h|Fwp37Qa^y<*Y_;K1i_>>!S{P=qQ6ZdH^*n6f&k+n%REDcz(pVVd>2y$>;Zg z(b^=ZuPueS#B*7}I-OW=lc1JbTit0q_>>}kVDq4{V;sewLFN+7s||D%%(J zDRbFH)s%In%kP+cE0Wnnr8Z7r?aj=bC88955MFvOu@~C#df1qsgrlN|*Au8>CX1Qw z6L2M3MK7foMc@62?){S1OAV}VKSvfX-fcd+VGPErt^(!_VhOCag@^+p1DcC|#&>3b zQP0wuw==3)mO3{poiz`>l&72ixB|Uc8Co~_c_w~~?Zr+W!ZcnVpIer*m&$7l=TfQY zoJn=u)5Au$fIl^KyX84~`&fpsu&Wcel?Pj)5yTpn*Cb4h$GM1>#)_zoU zF<%6mgWa~-Bi+5!FxoYyfL>MY!3K+-B2hW7O;u*+Jk$G%AC%n=KfgZOC$>&4=X}GX zL^st<=_^a~ErePIyTkP7*(H_g{H5$vu}|Gso!`YX*n zk5L~-vNBH2BaCR#!))MP5Rd5&Gsh#){!qUQ+myY|dwMhV9xjKedOLHHdxSPn|7=Om zIH^C91K4(+Bxjnftoik+aDjJWsjd(FSEar@v513TS89d)HUJC5}^9 zIW~FJ(z3-2bB|@YtZGT*=Z&=@O-+`OF3mV(9H9(-g*SuObsj#ZqZA}g-` zjE3=%)EV84@)n_F^6~6C)>1Pv6%iqRp=>A^2dal14G2NcP)orEXPg*e-iNK;IqEuW z%9g@58H6499qb~Ci`{s}OeRImUMyJ)VnyLxT2FH-hQJzU0r860^sLXUgcQBf-|2Jk zmi~)RvTBpP*hg$nS!6I_mnm;oiGq9$mJ+eNy%DFMHr9HcGjFp+Vw~SXRwZ&@Y@-Cn$5YC%gG~PmAXzK-c zFs~?~qTH-+;+-Jbwgx@S7^rM}DyMO$su|DKbv#w>%Y0{?l$HptU z$f%mP9uzjF%X@}XKP9>o{~{B5!_T6JQq*WKdI%q^tvys@&bRRoazO~7d5Bz9Nw*gqTopu!Hlg%J+J^?2{@uvdL9zZyY!T1|Hx zww5+D=T*(yna*Z-A5~Mt6m(Mqv`}f)KS0SX|4MQ_Nh=eirrg17@TV#8Q0< zuybk`cEPnsIp-MczCxuOP7QS&gzt?XV768b9vM4r;Oe3=Yb(=c4HCAGJ;*LBha_Xa z-qbQ5z86SUM~lr*l$QHgA-yk7G-ip>)9em?>$l8&9!+sIe{9)L6;%h-r+w9vvgOBidKEcP$;;QkMOhRk zU;}(FraIe+W6D>IBJNN>y`YGchWAaSh$v0I`-IX^yWu?J9^-7L%})DHtc6P`9Lov! znh7h3bFvZFYrAovBiryKi{Qh``%KhKNLL1ktAqw7KtOugRgQJ$Y`Evb+-K2GyF+;D4IE}xFzQ;TS(DXw zVmk|Fo?3|MZ=WS5@NjhJXIOKV&IhQOyb@!MUfe_F^0MPcimuj(k;VzLC0kUmmD);~ zgKI<+zQi)fTyF6eS7kldid<`Q9Rz$j;1 z8+Q#C+x1W8Qfs_=P;jke#sK|-Xrc7w$t=ckVrXRkHo2E2dDmZZC+1xOWb)YsBPvw$ zy>mFO_{+e$F^e1DyE^Y#^WZ?ezIAzp;UD+$GABp6=hnxzyY%tyJDXom+a*WD!;Zy% zcs=Z6vtHj{38r+(GIAGh_cEeDrKDjWnm;^Rd-{tu_p<^U`<8ZF;s5qZdf(Rm7ZdVl z1|~oAI-VJ>dJl+?l=X^O%aos+Q$rh)b?8>9V#(Q%l6&XV zp#Jqr@2TCr{+EGqfxaD{yvwOFdFIj0Mzw&O`jgzNoy#;e=AFG?-M41&-Icx8rH=NS zq0NndV$>+@exga~T+sBJfi_pbl3ORdww9{CKbPO-w2Hl6eD3^io9E>C+4-M-XaQH6 zlzg%^Yvz+C9@m`~$KO}%_Kh4cAbeE6umS(=n%zGO_WfVU=ln~`{(rKQ$M4DK)jIB_ zG}ZmAW^tF&YN1}w|LWUaZqRJ**XdjSrOPPhFC5gvKlB+ob|gu;_X!ytK6LzlH?ajA{`&I&77?`z-V|-HqxOw;gG^@Pt6i=mQ)ZVwkco4R7kHSt)I-j?>Jj~^ zoF(tbJG{L78Ro%EoCqD{bJGpR5M#O=#PfykRHk z7{Y$y7kGbJ5aNiSFdatVBT-Lr(`!O9{0QGboM;RsNO$E0ypV^XlITzGdnM<>d%Ot_ z+0Z;CW8j(`DD&YZ$>b3k4ZXk+wXi*2l#Ru0C`el?<&zQ63Es+gqBDhL)u6Ooh?%UY ze5R+%wOAXbiwK#8okexpwkeVg#lGe<{1xNyGV97a67B9Vd?aFHxEgHPB04Ktda4X% zVb}-;%Jya_mZ&(0)_cuWQ)ZGdeIMQhuEI4+I!-acTi^iN)+vJ$^VPDq+{Pp12_;2D za{;mVBVAoRso)NB2VcZ{*^FI-HcGmqHQi|^)>lRoQRsmT=lR(h7VE4hw@5oYAlhgV zF`li5exjEw4+U6NF8evQEt zpJh#Qq=#UoJc><}5i}Yr^h(tAwhCg!HJC+buOX*E9cIe*Vga;)ParU=mE+_a7$GNMhNz9r>1n(1_i`426lLTkSP0*NDaXSZ z_#z+ig|G{I$)998WXMji3vSE(ye3qIS1?rGBiGmtwt>ng(Y)f|CA$)NxRzW^tD!aY z#7ji0z9cJASbv)6ztzn-@-%G45k$L-ftm6w3B$aCL?VeTlxwKa-5S=jGj!J(1Sk** ziv1wpK{rT()_fg}`yiYqr}K$oFx-PCa+pYx4`s6as<qb9xJ7OmejA`A9ob>sv&1G>O?c7R8~MC^hO^)BiYB@qhA zW%_G5ol>yI>=3&`be(2q_w4i135jB?G-M)KlO1r>%neOw{ah5=lp;8W=<$2t2ckDd zit^A5667J=DObTK*lb42>QDf$imq@N^3b*o5j1ZyXiTTcv2Y#Iq!r4*S-BM|VG^|B zy|b+b{orp0(WR9 zcVI`_!Ntw!O*X+Lmxu{8;zi(?u+dK+TKyq3qJ!aEQ2?G(dij#~CgF>MXp@gbVQNXQ zOIG7DL_=BLh{$nGq@SEd^J@>*Q;M3Y>>6)|Vd5ktnC`Y~#tl{QQL-515*w6UMikw} zT$V|2Rz)@eF36fZTXrI%$G1>Ow4=SBRXGzK^0_*~Og0#Jn$0D|g@kgz{D}L!@J@ z7Dpt~ePX^V1tw!Tu<;`79JaH>kWC-TJ3Bv$^6FTAQa)l{vcGsL&xj++WoQATWIi($ z3z26?qIUR=`W=}eZ^$0-EqlZL*j%EAzL#0zA%2h-l_CU)0yKgJWLK9!11iT&6dU25 zm_c^pE&QTnyB`tFtTRQoPY2`&TqqK-I5i{W=XYfpFsM;uBaT#7N}CZ#9&b7~Gdm|;L9Iou^@XqA-vN_!!sPTpX(EJ;_^8FTUD@}0~}*KdQZ zPtz}}GsFU&8|cu+T>m_ankXt@nf zi%2y{T$ImcD`T&$j$4T$m5Pl-SJ@6j@V;n)AK9>cwaEqtsX){t_ z23ch$%E>hP-PjN8ETltgnI<#EOXZoF;))>aUKB>q_+P>o`~ZzjeIkD^#4;j_N5W9@ z7i;ke7F24nIa;EbsPxPnBwLYv%}4OCyLuL@$w!V*TH$@c9|}=qlb2)??Bk{N-VhBNr)Oj5La*Ldhd7rQYZ;wqJUs zhl3|Hmra;m9p)HBGbxO1PZL9sH&l*gz8B&b|w9= z6hs4kD04Hb^bjj#duYZSJdf1LKhLA+qa^I2`Eic==%3Q_NU+RNmy##nD7&Nw;~+Q+ znz%$!ZF{YkYnpY6zCqn$CYfVlDJxDh@Q@I)9ot0BDF@j?C_&NLK{F6{vMtn_5hX8> zzy2yW<0dtTfa+(z6@cmaKb5ik2Z}Bdla>FL8glw!C9^lJ>pYlBW1Nhg2(U?%ci}!{ieh*a zg5)4M00Y>0kr#iW_6unYC7-qg3Xs3_gtp*E9*Z_0qH9vgqSXd3r~;dyFqEcO3)46T z(){TLlPMZPdc_SQ=o2l-l2jod-spGzg3=I7cBD7drC6E4Z>^8MbgW7+fkv|;_IE%?rEf+Vjy!e?xIhVfrdcEf0b8 zz91T>LG&qv?sV-1Xhmtz8A9O^DK+GyxHW}lrw5Hm3LPgChd_TClV$%PTKboLf(DHo z6H5EP3>eaH%;5jiJtzJrT9p562K`~uxR8cp#`K%?`vx$7Xmk8iGw6YS;p2w%`8W58 zv$TKNldJv?1H`hlKl{Z0-F@QysD?FwdO$*O5Dvy6I24EBa2$c56ugbZQ8=3TiDM}h z4#)90fkL`T_!CCpWJ*D&;xwF&GjJv$`m-^Tc)xRT9?r)F6x{xdi*PY6Au^18z>UNr+>BdrD{jN>l(6r_UAPq4D{Xw5I+oGOm9CX?_#z`hDxxK4IaZ|5SzlcY`p$ VpZ+RoivOX274x@!h)5#pKL8v)J3RmZ literal 0 HcmV?d00001 diff --git a/tests/structure/data/alphabet/README.rst b/tests/structure/data/alphabet/README.rst index 957108467..50fe5bc94 100644 --- a/tests/structure/data/alphabet/README.rst +++ b/tests/structure/data/alphabet/README.rst @@ -15,4 +15,11 @@ The 3Di sequences in `i3d.fasta` were generated with `foldseek` according to $ foldseek createdb --chain-name-mode 1 tests/structure/data/*.cif /tmp/biotite_3di $ foldseek lndb /tmp/biotite_3di_h /tmp/biotite_3di_ss_h - $ foldseek convert2fasta /tmp/biotite_3di_ss tests/structure/data/alphabet/i3d.fasta \ No newline at end of file + $ foldseek convert2fasta /tmp/biotite_3di_ss tests/structure/data/alphabet/i3d.fasta + +Protein Blocks sequences +------------------------ + +Only one sequence is available in `pb.fasta`, that is taken from +`https://pbxplore.readthedocs.io/en/latest/PBassign.html`. +`1ay7.bcif` contains the corresponding structure. \ No newline at end of file diff --git a/tests/structure/data/alphabet/pb.fasta b/tests/structure/data/alphabet/pb.fasta new file mode 100644 index 000000000..3d7cb3f8c --- /dev/null +++ b/tests/structure/data/alphabet/pb.fasta @@ -0,0 +1,2 @@ +>1ay7 +ZZdddfklpcbfklmmmmmmmmnopafklgoiaklmmmmmmmmpacddddddehkllmmmmnnommmmmmmmmmmmmmnopacddddZZ \ No newline at end of file diff --git a/tests/structure/test_i3d.py b/tests/structure/test_i3d.py index e08d94544..402554608 100644 --- a/tests/structure/test_i3d.py +++ b/tests/structure/test_i3d.py @@ -84,6 +84,7 @@ def test_missing_residues(): atoms = pdbx.get_structure(pdbx_file, model=1) atoms = atoms[struc.filter_amino_acids(atoms)] + # Randomly delete some backbone atoms rng = np.random.default_rng(1) del_backbone_residue_ids = rng.choice( np.unique(atoms.res_id), N_DELETIONS, replace=False @@ -94,6 +95,7 @@ def test_missing_residues(): ] test_sequences, _ = strucalph.to_3di(atoms) + # Apply the same deletions to the reference sequence ref_sequence = _get_ref_3di_sequence(PDB_ID, atoms.chain_id[0]) for res_id in del_backbone_residue_ids: seq_index = res_id - atoms.res_id[0] diff --git a/tests/structure/test_pb.py b/tests/structure/test_pb.py new file mode 100644 index 000000000..5ecb57348 --- /dev/null +++ b/tests/structure/test_pb.py @@ -0,0 +1,76 @@ +from pathlib import Path +import numpy as np +import pytest +import biotite.sequence.io.fasta as fasta +import biotite.structure as struc +import biotite.structure.alphabet as strucalph +import biotite.structure.io.pdbx as pdbx +from tests.util import data_dir + + +@pytest.fixture +def reference_sequence(): + """ + Get the reference Protein Blocks sequence for the alphabet example structure. + """ + _, seq_string = next( + fasta.FastaFile.read_iter(Path(data_dir("structure")) / "alphabet" / "pb.fasta") + ) + return strucalph.ProteinBlocksSequence(seq_string) + + +@pytest.fixture +def reference_chain(): + pdbx_file = pdbx.BinaryCIFFile.read( + Path(data_dir("structure")) / "alphabet" / "1ay7.bcif" + ) + atoms = pdbx.get_structure(pdbx_file, model=1) + atoms = atoms[struc.filter_amino_acids(atoms)] + chain = atoms[atoms.chain_id == "B"] + return chain + + +def test_to_protein_blocks(reference_chain, reference_sequence): + """ + Test the structure conversion to protein blocks based on a reference example from + the PBexplore documentation + (https://pbxplore.readthedocs.io/en/latest/intro_PB.html). + """ + test_pb_sequences, _ = strucalph.to_protein_blocks(reference_chain) + + assert len(test_pb_sequences) == 1 + assert str(test_pb_sequences[0]) == str(reference_sequence) + + +def test_missing_residues(reference_chain, reference_sequence): + """ + Like, `test_to_protein_blocks()`, but in some residues backbone atoms are missing. + Expect that these and adjacent residues get the unknown symbol 'Z' in the + PB sequence. + """ + N_DELETIONS = 5 + # The 'Z' symbol + UKNOWN_SYMBOL = strucalph.ProteinBlocksSequence.unknown_symbol + + # Randomly delete some backbone atoms + rng = np.random.default_rng(1) + del_backbone_residue_ids = rng.choice( + np.unique(reference_chain.res_id), N_DELETIONS, replace=False + ) + reference_chain = reference_chain[ + ~np.isin(reference_chain.res_id, del_backbone_residue_ids) + | ~np.isin(reference_chain.atom_name, ("N", "CA", "C")) + ] + + # Apply the same deletions to the reference sequence + for res_id in del_backbone_residue_ids: + seq_index = res_id - reference_chain.res_id[0] + # Convert the PB symbol for residue and adjacent ones to 'Z' + start_index = max(0, seq_index - 2) + end_index = min(len(reference_sequence), seq_index + 2) + reference_sequence[start_index : end_index + 1] = UKNOWN_SYMBOL + + test_pb_sequences, _ = strucalph.to_protein_blocks(reference_chain) + + assert len(test_pb_sequences) == 1 + assert str(test_pb_sequences[0]) == str(reference_sequence)