From a3844590bb2c177ca0167ce563b6c9b1b3a99b20 Mon Sep 17 00:00:00 2001 From: Simon Mathis Date: Sat, 19 Oct 2024 09:02:39 +0100 Subject: [PATCH] Allow loading computationally predicted cif files, which may not have certain custom annotations (#670) * feat: allow missing extra fields when reading cif files; warn user if values are being inferred * fix: explicitly note that masked values are set to 0 * chore: ruff --- src/biotite/structure/io/pdbx/convert.py | 45 +++++++++++++++++++++--- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/src/biotite/structure/io/pdbx/convert.py b/src/biotite/structure/io/pdbx/convert.py index 936016de3..26e268299 100644 --- a/src/biotite/structure/io/pdbx/convert.py +++ b/src/biotite/structure/io/pdbx/convert.py @@ -480,16 +480,53 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields): array.set_annotation("element", atom_site["type_symbol"].as_array(str)) if "atom_id" in extra_fields: - array.set_annotation("atom_id", atom_site["id"].as_array(int)) + if "id" in atom_site: + array.set_annotation("atom_id", atom_site["id"].as_array(int)) + else: + warnings.warn( + "Missing 'id' in 'atom_site' category. 'atom_id' generated automatically.", + UserWarning, + ) + array.set_annotation("atom_id", np.arange(array.array_length())) extra_fields.remove("atom_id") if "b_factor" in extra_fields: - array.set_annotation("b_factor", atom_site["B_iso_or_equiv"].as_array(float)) + if "B_iso_or_equiv" in atom_site: + array.set_annotation( + "b_factor", atom_site["B_iso_or_equiv"].as_array(float) + ) + else: + warnings.warn( + "Missing 'B_iso_or_equiv' in 'atom_site' category. 'b_factor' will be set to `nan`.", + UserWarning, + ) + array.set_annotation("b_factor", np.full(array.array_length(), np.nan)) extra_fields.remove("b_factor") if "occupancy" in extra_fields: - array.set_annotation("occupancy", atom_site["occupancy"].as_array(float)) + if "occupancy" in atom_site: + array.set_annotation("occupancy", atom_site["occupancy"].as_array(float)) + else: + warnings.warn( + "Missing 'occupancy' in 'atom_site' category. 'occupancy' will be assumed to be 1.0", + UserWarning, + ) + array.set_annotation( + "occupancy", np.ones(array.array_length(), dtype=float) + ) extra_fields.remove("occupancy") if "charge" in extra_fields: - array.set_annotation("charge", atom_site["pdbx_formal_charge"].as_array(int, 0)) + if "pdbx_formal_charge" in atom_site: + array.set_annotation( + "charge", + atom_site["pdbx_formal_charge"].as_array( + int, 0 + ), # masked values are set to 0 + ) + else: + warnings.warn( + "Missing 'pdbx_formal_charge' in 'atom_site' category. 'charge' will be set to 0", + UserWarning, + ) + array.set_annotation("charge", np.zeros(array.array_length(), dtype=int)) extra_fields.remove("charge") # Handle all remaining custom fields