From 942b6c9a1497672721474bd6b7cd9e369b143525 Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Wed, 2 Nov 2022 17:50:31 +0100 Subject: [PATCH 1/9] =?UTF-8?q?Bump=20version:=200.0.3-dev=20=E2=86=92=200?= =?UTF-8?q?.0.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- docs/source/conf.py | 2 +- setup.cfg | 2 +- src/moffragmentor/version.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 074cd8e..d11ddbe 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.0.3-dev +current_version = 0.0.3 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? diff --git a/docs/source/conf.py b/docs/source/conf.py index 0c17edf..3ed9780 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,7 +24,7 @@ author = "Kevin Maik Jablonka" # The full version, including alpha/beta/rc tags -release = "0.0.3-dev" +release = "0.0.3" # -- General configuration --------------------------------------------------- diff --git a/setup.cfg b/setup.cfg index a2b5930..ac5916a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = moffragmentor -version = 0.0.3-dev +version = 0.0.3 description = Splits MOFs into metal nodes and linkers. author = Kevin Maik Jablonka author_email = mail@kjablonka.com diff --git a/src/moffragmentor/version.py b/src/moffragmentor/version.py index 5a2a4a4..c1c13b8 100644 --- a/src/moffragmentor/version.py +++ b/src/moffragmentor/version.py @@ -14,7 +14,7 @@ "get_git_hash", ] -VERSION = "0.0.3-dev" +VERSION = "0.0.3" def get_git_hash() -> str: From 1c5a2cf25c0841de6f4518ad9b0e499d2b3696ca Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Wed, 2 Nov 2022 17:50:41 +0100 Subject: [PATCH 2/9] =?UTF-8?q?Bump=20version:=200.0.3=20=E2=86=92=200.0.4?= =?UTF-8?q?-dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- docs/source/conf.py | 2 +- setup.cfg | 2 +- src/moffragmentor/version.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index d11ddbe..1425f5d 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.0.3 +current_version = 0.0.4-dev commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? diff --git a/docs/source/conf.py b/docs/source/conf.py index 3ed9780..898b80e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,7 +24,7 @@ author = "Kevin Maik Jablonka" # The full version, including alpha/beta/rc tags -release = "0.0.3" +release = "0.0.4-dev" # -- General configuration --------------------------------------------------- diff --git a/setup.cfg b/setup.cfg index ac5916a..9187a96 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = moffragmentor -version = 0.0.3 +version = 0.0.4-dev description = Splits MOFs into metal nodes and linkers. author = Kevin Maik Jablonka author_email = mail@kjablonka.com diff --git a/src/moffragmentor/version.py b/src/moffragmentor/version.py index c1c13b8..da56be8 100644 --- a/src/moffragmentor/version.py +++ b/src/moffragmentor/version.py @@ -14,7 +14,7 @@ "get_git_hash", ] -VERSION = "0.0.3" +VERSION = "0.0.4-dev" def get_git_hash() -> str: From 32463d2822c9d52a8e42d342aea965d4b4987dd3 Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Sat, 5 Nov 2022 12:22:35 +0100 Subject: [PATCH 3/9] feat: also break nodes if it is the full MOF --- src/moffragmentor/fragmentor/__init__.py | 7 +++++++ src/moffragmentor/fragmentor/nodelocator.py | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/moffragmentor/fragmentor/__init__.py b/src/moffragmentor/fragmentor/__init__.py index a3ec970..f634a28 100644 --- a/src/moffragmentor/fragmentor/__init__.py +++ b/src/moffragmentor/fragmentor/__init__.py @@ -119,6 +119,13 @@ def run_fragmentation( ) linker_collection = LinkerCollection(is_linker) + if len(linker_collection) == 0 and len(is_capping) > 0: + logger.warning( + "No linkers found, but capping molecules. Assigning capping molecules to linkers." + ) + linker_collection = LinkerCollection(is_capping) + is_capping = [] + capping_molecules = LinkerCollection(is_capping) # Now handle the case of the the frameworks that have linkers without core (e.g. H-COO) diff --git a/src/moffragmentor/fragmentor/nodelocator.py b/src/moffragmentor/fragmentor/nodelocator.py index 7eed80e..1b24408 100644 --- a/src/moffragmentor/fragmentor/nodelocator.py +++ b/src/moffragmentor/fragmentor/nodelocator.py @@ -258,7 +258,10 @@ def break_organic_nodes(node_result, mof): """If we have a node that is mostly organic, we break it up into smaller pieces.""" new_nodes = [] for node in node_result.nodes: - if check_node(node, node_result.branching_indices, mof) or might_be_porphyrin( + if len(node) == len(mof): + logger.debug('Breaking node as full MOF is assigned as node.') + new_nodes.extend(break_rod_node(mof, node)) + elif check_node(node, node_result.branching_indices, mof) or might_be_porphyrin( node, node_result.branching_indices, mof ): new_nodes.append(node) From 54109d03d498194b2e6bb5d97b42476986ca6027 Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Sat, 5 Nov 2022 15:46:44 +0100 Subject: [PATCH 4/9] refactor: simplify SBU constructor and prepare passing of dummy structure --- src/moffragmentor/fragmentor/linkerlocator.py | 15 +-- src/moffragmentor/fragmentor/nodelocator.py | 12 +- src/moffragmentor/sbu/node.py | 124 ++++++++++++++---- src/moffragmentor/sbu/sbu.py | 54 +------- 4 files changed, 114 insertions(+), 91 deletions(-) diff --git a/src/moffragmentor/fragmentor/linkerlocator.py b/src/moffragmentor/fragmentor/linkerlocator.py index a13e0fc..2373433 100644 --- a/src/moffragmentor/fragmentor/linkerlocator.py +++ b/src/moffragmentor/fragmentor/linkerlocator.py @@ -81,23 +81,13 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals | set(mof.metal_indices) & all_node_indices # some metals might also be in the linker, e.g., in porphyrins ) - - # potential_linker_indices = set(list(range(len(mof.structure)))) - not_linker_indices - # get terminal indices we need to keep in the linker - - # terminal_indices = [] - # for linker_index in potential_linker_indices: - # for neighbor in mof.get_neighbor_indices(linker_index): - # if mof._is_terminal(neighbor): - # terminal_indices.append(neighbor) - graph_ = mof.structure_graph.__copy__() graph_.structure = Structure.from_sites(graph_.structure.sites) graph_.remove_nodes(not_linker_indices) # Second step: extract the connected components # return all as molecules - mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules( + _mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules( graph_, return_unique=False, filter_in_cell=False, @@ -121,15 +111,12 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals molecule_graph=graphs[linker_index], center=center, graph_branching_indices=branching_indices, - closest_branching_index_in_molecule=branching_indices, binding_indices=identify_linker_binding_indices( mof, node_location_result.connecting_paths, idx, ), - coordinates=coords_, original_indices=idx, - connecting_paths=[], molecule_original_indices_mapping=mapping, ) frac_center = mof.structure.lattice.get_fractional_coords(mol.center_of_mass) diff --git a/src/moffragmentor/fragmentor/nodelocator.py b/src/moffragmentor/fragmentor/nodelocator.py index 1b24408..b7345de 100644 --- a/src/moffragmentor/fragmentor/nodelocator.py +++ b/src/moffragmentor/fragmentor/nodelocator.py @@ -182,9 +182,9 @@ def create_node_collection(mof, node_location_result: NodelocationResult) -> Nod node = Node.from_mof_and_indices( mof=mof, node_indices=node_indices, - branching_indices=node_location_result.branching_indices & node_indices, - binding_indices=node_location_result.binding_indices & node_indices, - connecting_paths=node_location_result.connecting_paths & node_indices, + branching_indices=node_location_result.branching_indices, + binding_indices=node_location_result.binding_indices, + connecting_paths=node_location_result.connecting_paths, ) nodes.append(node) @@ -217,7 +217,7 @@ def break_rod_nodes(mof, node_result): """Break rod nodes into smaller pieces.""" new_nodes = [] for node in node_result.nodes: - if get_sbu_dimensionality(mof, node) == 1: + if get_sbu_dimensionality(mof, node) >= 1: logger.debug("Found 1- or 2-dimensional node. Will break into isolated metals.") new_nodes.extend(break_rod_node(mof, node)) else: @@ -258,8 +258,8 @@ def break_organic_nodes(node_result, mof): """If we have a node that is mostly organic, we break it up into smaller pieces.""" new_nodes = [] for node in node_result.nodes: - if len(node) == len(mof): - logger.debug('Breaking node as full MOF is assigned as node.') + if len(node) == len(mof): + logger.debug("Breaking node as full MOF is assigned as node.") new_nodes.extend(break_rod_node(mof, node)) elif check_node(node, node_result.branching_indices, mof) or might_be_porphyrin( node, node_result.branching_indices, mof diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py index b2a418f..9ff664e 100644 --- a/src/moffragmentor/sbu/node.py +++ b/src/moffragmentor/sbu/node.py @@ -10,8 +10,10 @@ from pymatgen.analysis.graphs import MoleculeGraph from pymatgen.core import Molecule, Structure from structuregraph_helpers.create import get_nx_graph_from_edge_tuples +from structuregraph_helpers.subgraph import get_subgraphs_as_molecules from moffragmentor.fragmentor.molfromgraph import wrap_molecule +from moffragmentor.utils import remove_all_nodes_not_in_indices from .sbu import SBU from ..fragmentor.splitter import _sites_and_classified_indices_from_indices @@ -23,12 +25,100 @@ __all__ = ["Node"] +_BINDING_DUMMY = "Xe" +_BRANCHING_DUMMY = "Kr" + + +def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, mof): + # first, create a molecule and molecule graph from those indices + # that intersect with the node indices + branching_in_node = [i for i in all_branching_indices if i in node_indices] + binding_in_node = [i for i in all_binding_indices if i in node_indices] + graph_ = mof.structure_graph.__copy__() + remove_all_nodes_not_in_indices( + graph_, list(node_indices) + branching_in_node + binding_in_node + ) + _mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules( + graph_, + return_unique=True, + filter_in_cell=True, + disable_boundary_crossing_check=False, + prune_long_edges=True, + ) + assert len(_mols) == 1 + mol, mapping = wrap_molecule(idxs[0], mof) + + # now, also create one molecule from the node indices, where we replace + # binding and branching indices with dummy atoms + node_metal_atoms = [i for i in node_indices if i in mof.metal_indices] + binding_to_node_metal = set(sum([mof.get_neighbor_indices(i) for i in node_metal_atoms], [])) + binding_to_node_metal = binding_to_node_metal.intersection(all_binding_indices) + branching_to_node_metal = set(sum([mof.get_neighbor_indices(i) for i in node_metal_atoms], [])) + binding_neighbors = sum([mof.get_neighbor_indices(i) for i in binding_to_node_metal], []) + branching_to_binding = set(binding_neighbors).intersection(all_branching_indices) + + # Now, make a copy of the structure and replace the indices with dummy atoms + dummy_structure = Structure.from_sites(mof.sites) + for i in binding_to_node_metal: + dummy_structure.replace(i, _BINDING_DUMMY) + for i in branching_to_binding + branching_to_node_metal: + dummy_structure.replace(i, _BRANCHING_DUMMY) + + to_delete = [ + i + for i in range(len(dummy_structure)) + if i + not in list(node_indices) + + list(binding_to_node_metal) + + list(branching_to_binding) + + list(branching_to_node_metal) + ] + graph_w_dummy = mof.structure_graph.__copy__() + graph_w_dummy.structure = dummy_structure + graph_w_dummy.remove_nodes_from(to_delete) + ( + _mols_w_dummy, + graphs_w_dummy, + idxs_w_dummy, + centers_w_dummy, + coordinates_with_dummy, + ) = get_subgraphs_as_molecules( + graph_w_dummy, + return_unique=True, + filter_in_cell=True, + disable_boundary_crossing_check=False, + prune_long_edges=True, + ) + assert len(_mols_w_dummy) == 1 + mol_w_dummy, mapping_w_dummy = wrap_molecule(idxs_w_dummy[0], mof) + + +def _find_node_hidden_indices(node_indices, all_binding_indices, all_branching_indices, mof): + # find the binding indices bound to node + node_neighbors = sum([mof.get_neighbor_indices(i) for i in node_indices], []) + relevant_binding_indices = [i for i in node_neighbors if i in all_binding_indices] + relevant_binding_indices_neighbors = sum( + [mof.get_neighbor_indices(i) for i in relevant_binding_indices], [] + ) + relevant_branching_indices = [ + i for i in relevant_binding_indices_neighbors if i in all_branching_indices + ] + [i for i in all_branching_indices if i in node_neighbors] + hidden_indices = relevant_branching_indices + relevant_binding_indices + return set(hidden_indices) + def node_from_mof_and_indices( # pylint:disable=too-many-locals, too-many-arguments - cls, mof, node_indices, branching_indices, binding_indices, connecting_paths + cls, mof, node_indices, all_branching_indices, all_binding_indices, all_connecting_paths ): """Create a node from a MOF and a list of indices of different types.""" - node_indices = node_indices | connecting_paths # This should actually not be necessary + branching_indices = node_indices & all_branching_indices + binding_indices = node_indices & all_binding_indices + connecting_paths = all_connecting_paths & node_indices + + hidden_indices = _find_node_hidden_indices( + node_indices, all_binding_indices, all_branching_indices, mof + ) + graph_ = mof.structure_graph.__copy__() graph_.structure = Structure.from_sites(graph_.structure.sites) to_delete = _not_relevant_structure_indices(mof.structure, node_indices) @@ -66,31 +156,21 @@ def node_from_mof_and_indices( # pylint:disable=too-many-locals, too-many-argum molecule_graph = MoleculeGraph.with_edges(molecule, edge_dict) center = np.mean(molecule.cart_coords, axis=0) graph_branching_indices = branching_indices & node_indices - closest_branching_index_in_molecule = [] - - graph = get_nx_graph_from_edge_tuples(sites_and_indices.edges) - for branching_index in branching_indices: - if branching_index not in relevant_indices: - - # now we need to find the closest neighbor in the - # set of vertices that are in the molecule - # ToDo: this is currently an assumption that it is terminal and the - # next partner then already is in the molecule, - # we could recursively call or get all the paths and then get the shortest - new_candidates = get_neighbors_from_nx_graph(graph, branching_index)[0] - closest_branching_index_in_molecule.append(new_candidates) - else: - closest_branching_index_in_molecule.append(branching_index) + idx = [i for i in relevant_indices if i in node_indices] - mol_w_hidden, mapping = wrap_molecule(idx + sites_and_indices.hidden_vertices, mof) + # Add the branching indices here? + mol_w_hidden, mapping = wrap_molecule( + idx + sites_and_indices.hidden_vertices + list(hidden_indices), mof + ) + + # perhaps than delete here the hidden ones again to to get the mol without the hidden ones node = cls( molecule=mol_w_hidden, molecule_graph=molecule_graph, center=center, graph_branching_indices=graph_branching_indices, - closest_branching_index_in_molecule=closest_branching_index_in_molecule, binding_indices=binding_indices, original_indices=idx + sites_and_indices.hidden_vertices, persistent_non_metal_bridged=sites_and_indices.persistent_non_metal_bridged_components, @@ -136,9 +216,9 @@ def from_mof_and_indices( # pylint:disable=too-many-arguments cls, mof, node_indices, - branching_indices, - binding_indices, - connecting_paths=connecting_paths, + all_branching_indices=branching_indices, + all_binding_indices=binding_indices, + all_connecting_paths=connecting_paths, ) def __repr__(self) -> str: diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py index 897f911..652f434 100644 --- a/src/moffragmentor/sbu/sbu.py +++ b/src/moffragmentor/sbu/sbu.py @@ -55,11 +55,6 @@ class SBU: * graph_branching_indices: are the branching indices according to the graph-based definition. They might not be part of the molecule. - * closest_branching_index_in_molecule: those are always part of the molecule. - In case the branching index is part of the molecule, - they are equal to to the graph_branching_indices. - Otherwise they are obtained as the closest vertex of the original - branching vertex that is part of the molecule. * binding_indices: are the indices of the sites between the branching index and metal * original_indices: complete original set of indices that has been selected @@ -90,13 +85,12 @@ class SBU: >>> sbu_object.search_pubchem() """ - def __init__( # pylint:disable=too-many-arguments + def __init__( self, molecule: Molecule, molecule_graph: MoleculeGraph, center: np.ndarray, graph_branching_indices: Collection[int], - closest_branching_index_in_molecule: Collection[int], binding_indices: Collection[int], original_indices: Collection[int], persistent_non_metal_bridged: Optional[Collection[int]] = None, @@ -115,22 +109,9 @@ def __init__( # pylint:disable=too-many-arguments center (np.ndarray): Center of the SBU. graph_branching_indices (Collection[int]): Branching indices in the original structure. - closest_branching_index_in_molecule (Collection[int]):1 - Closest branching index in the molecule. binding_indices (Collection[int]): Binding indices in the original structure. original_indices (Collection[int]): List of all indicies in the original structure this SBU corresponds to. - persistent_non_metal_bridged (Optional[Collection[int]], optional): - components that are connected via a bridge both in the MOF structure - and building block molecule. No metal is part of the edge, i.e., - bound solvents are not included in this set. Defaults to None. - terminal_in_mol_not_terminal_in_struct (Optional[Collection[int]], optional): - Tndices that are terminal in the molecule but not terminal in the structure. - Defaults to None. - connecting_paths (Optional[Collection[int]], optional): - Paths between node atoms and branching atoms. Defaults to None. - coordinates (Optional[np.ndarray], optional): Coordinates of all atoms in the molecule. - Defaults to None. molecule_original_indices_mapping (Optional[Dict[int, List[int]]], optional): Mapping from molecule indices to original indices. Defaults to None. """ @@ -139,10 +120,7 @@ def __init__( # pylint:disable=too-many-arguments self._original_indices = original_indices self.molecule_graph = molecule_graph self._original_graph_branching_indices = graph_branching_indices - self._original_closest_branching_index_in_molecule = closest_branching_index_in_molecule - self._persistent_non_metal_bridged = persistent_non_metal_bridged - self._terminal_in_mol_not_terminal_in_struct = terminal_in_mol_not_terminal_in_struct self._original_binding_indices = binding_indices self.mapping_from_original_indices = defaultdict(list) @@ -158,15 +136,6 @@ def __init__( # pylint:disable=too-many-arguments for v in value: self.mapping_to_original_indices[v] = key self._indices = original_indices - self._original_connecting_paths = connecting_paths - self.connecting_paths = [] - self._coordinates = coordinates - for i in connecting_paths: - try: - for index in self.mapping_from_original_indices[i]: - self.connecting_paths.append(index) - except KeyError: - pass @property def center(self): @@ -255,10 +224,7 @@ def composition(self): @property def cart_coords(self): - # if self._coordinates is not None: - # return self._coordinates return self.molecule.cart_coords - # return np.array(self._coordinates) @cached_property def mol_with_coords(self): @@ -323,16 +289,6 @@ def __eq__(self, other: "SBU") -> bool: def branching_coords(self): return self.cart_coords[self.graph_branching_indices] - @cached_property - def connecting_indices(self): - indices = [] - - for p in self._original_closest_branching_index_in_molecule: - for index in self.mapping_from_original_indices[p]: - indices.append(index) - - return indices - @property def original_binding_indices(self): return self._original_binding_indices @@ -359,17 +315,17 @@ def get_openbabel_mol(self): return pm def show_molecule(self): - import nglview # pylint:disable=import-outside-toplevel + import nglview return nglview.show_pymatgen(self.molecule) def show_connecting_structure(self): - import nglview # pylint:disable=import-outside-toplevel + import nglview return nglview.show_pymatgen(self._get_connected_sites_structure()) def show_binding_structure(self): - import nglview # pylint:disable=import-outside-toplevel + import nglview return nglview.show_pymatgen(self._get_binding_sites_structure()) @@ -394,7 +350,7 @@ def smiles(self) -> str: try: canonical = Chem.CanonSmiles(smiles) return canonical - except Exception: # pylint: disable=broad-except + except Exception: return smiles def _get_boxed_structure(self): From e96f430a3b2ea57c037e0b4af65a18ae8ca0028f Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Sun, 6 Nov 2022 14:58:48 +0100 Subject: [PATCH 5/9] first refactoring done --- src/moffragmentor/fragmentor/linkerlocator.py | 16 +- src/moffragmentor/fragmentor/nodelocator.py | 19 +- src/moffragmentor/fragmentor/splitter.py | 4 +- src/moffragmentor/mof.py | 4 + src/moffragmentor/sbu/node.py | 193 ++++++------------ src/moffragmentor/sbu/sbu.py | 49 ++--- tests/conftest.py | 44 ++-- tests/fragmentor/test_locator.py | 10 +- tests/test_mof.py | 9 - 9 files changed, 133 insertions(+), 215 deletions(-) diff --git a/src/moffragmentor/fragmentor/linkerlocator.py b/src/moffragmentor/fragmentor/linkerlocator.py index 2373433..71cdec1 100644 --- a/src/moffragmentor/fragmentor/linkerlocator.py +++ b/src/moffragmentor/fragmentor/linkerlocator.py @@ -55,16 +55,16 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals # First step: remove everything that is node or unbound solvent # bound solvent is part of the node by default all_node_indices = set() - all_persistent_non_metal_bridges = set() + # all_persistent_non_metal_bridges = set() for node_indices in node_location_result.nodes: all_node_indices.update(node_indices) - for node in node_collection: - all_persistent_non_metal_bridges.update( - _flatten_list_of_sets( - node._persistent_non_metal_bridged # pylint:disable=protected-access - ) - ) + # for node in node_collection: + # all_persistent_non_metal_bridges.update( + # _flatten_list_of_sets( + # node._persistent_non_metal_bridged # pylint:disable=protected-access + # ) + # ) not_linker_indices = ( ( @@ -109,14 +109,12 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals linker = Linker( molecule=mol, molecule_graph=graphs[linker_index], - center=center, graph_branching_indices=branching_indices, binding_indices=identify_linker_binding_indices( mof, node_location_result.connecting_paths, idx, ), - original_indices=idx, molecule_original_indices_mapping=mapping, ) frac_center = mof.structure.lattice.get_fractional_coords(mol.center_of_mass) diff --git a/src/moffragmentor/fragmentor/nodelocator.py b/src/moffragmentor/fragmentor/nodelocator.py index b7345de..0dd8964 100644 --- a/src/moffragmentor/fragmentor/nodelocator.py +++ b/src/moffragmentor/fragmentor/nodelocator.py @@ -184,7 +184,6 @@ def create_node_collection(mof, node_location_result: NodelocationResult) -> Nod node_indices=node_indices, branching_indices=node_location_result.branching_indices, binding_indices=node_location_result.binding_indices, - connecting_paths=node_location_result.connecting_paths, ) nodes.append(node) @@ -246,10 +245,10 @@ def check_node(node_indices, branching_indices, mof) -> bool: bool: True if the node is reasonable, False otherwise """ # check if there is not way more organic than metal - num_carbons = len(node_indices & set(mof.c_indices)) + num_organic = len(node_indices & set(mof.c_indices)) + len(node_indices & set(mof.n_indices)) num_metals = len(node_indices & set(mof.metal_indices)) branching_indices_in_node = branching_indices & node_indices - if num_carbons > num_metals + len(branching_indices_in_node): + if num_organic > num_metals + len(branching_indices_in_node): return False return True @@ -307,7 +306,6 @@ def find_nodes( node_result = find_node_clusters( mof, unbound_solvent.indices, forbidden_indices=forbidden_indices ) - if create_single_metal_bus: # Rewrite the node result node_result = create_single_metal_nodes(mof, node_result) @@ -334,16 +332,22 @@ def might_be_porphyrin(node_indices, branching_idx, mof): metal_in_node = _get_metal_sublist(node_indices, mof.metal_indices) node_indices = set(node_indices) branching_idx = set(branching_idx) + bound_to_metal = sum([mof.get_neighbor_indices(i) for i in metal_in_node], []) + branching_bound_to_metal = branching_idx & set(bound_to_metal) # ToDo: check and think if this can handle the general case # it should, at least if we only look at the metals - if (len(metal_in_node) == 1) & (len(node_indices) > 1): + if (len(metal_in_node) == 1) & (len(node_indices | branching_bound_to_metal) > 1): logger.debug( "metal_in_node", metal_in_node, node_indices, ) - num_neighbors = len(mof.get_neighbor_indices(metal_in_node[0])) - if metal_and_branching_coplanar(node_indices, branching_idx, mof) & (num_neighbors > 2): + num_neighbors = len(bound_to_metal) + if ( + metal_and_branching_coplanar(node_indices, branching_bound_to_metal, mof) + & (num_neighbors > 2) + & (len(branching_bound_to_metal) < 5) + ): logger.debug( "Metal in linker found, indices: {}".format( node_indices, @@ -358,5 +362,6 @@ def detect_porphyrin(node_collection, mof): not_node = [] for i, node in enumerate(node_collection): if might_be_porphyrin(node._original_indices, node._original_graph_branching_indices, mof): + logger.info("Found porphyrin in node {}".format(node._original_indices)) not_node.append(i) return not_node diff --git a/src/moffragmentor/fragmentor/splitter.py b/src/moffragmentor/fragmentor/splitter.py index d09c987..f5e6920 100644 --- a/src/moffragmentor/fragmentor/splitter.py +++ b/src/moffragmentor/fragmentor/splitter.py @@ -7,8 +7,8 @@ from pymatgen.analysis.graphs import MoleculeGraph from pymatgen.core import Molecule -from .solventlocator import _locate_bound_solvent -from ..utils import ( +from moffragmentor.fragmentor.solventlocator import _locate_bound_solvent +from moffragmentor.utils import ( _get_cartesian_coords, _get_molecule_edge_label, _get_vertices_of_smaller_component_upon_edge_break, diff --git a/src/moffragmentor/mof.py b/src/moffragmentor/mof.py index f6bd6cb..b334538 100644 --- a/src/moffragmentor/mof.py +++ b/src/moffragmentor/mof.py @@ -257,6 +257,10 @@ def h_indices(self) -> List[int]: def c_indices(self) -> List[int]: return [i for i, species in enumerate(self.structure.species) if str(species) == "C"] + @cached_property + def n_indices(self) -> List[int]: + return [i for i, species in enumerate(self.structure.species) if str(species) == "N"] + def get_neighbor_indices(self, site: int) -> List[int]: """Get list of indices of neighboring sites.""" return [site.index for site in self.structure_graph.get_connected_sites(site)] diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py index 9ff664e..a55d856 100644 --- a/src/moffragmentor/sbu/node.py +++ b/src/moffragmentor/sbu/node.py @@ -9,19 +9,11 @@ import numpy as np from pymatgen.analysis.graphs import MoleculeGraph from pymatgen.core import Molecule, Structure -from structuregraph_helpers.create import get_nx_graph_from_edge_tuples -from structuregraph_helpers.subgraph import get_subgraphs_as_molecules from moffragmentor.fragmentor.molfromgraph import wrap_molecule -from moffragmentor.utils import remove_all_nodes_not_in_indices - -from .sbu import SBU -from ..fragmentor.splitter import _sites_and_classified_indices_from_indices -from ..utils import ( - _not_relevant_structure_indices, - _reindex_list_of_tuple, - get_neighbors_from_nx_graph, -) +from moffragmentor.fragmentor.splitter import _sites_and_classified_indices_from_indices +from moffragmentor.sbu.sbu import SBU +from moffragmentor.utils import _reindex_list_of_tuple __all__ = ["Node"] @@ -29,24 +21,56 @@ _BRANCHING_DUMMY = "Kr" +def _build_mol_and_graph(mof, indices, ignore_hidden_indices=False): + indices = set(indices) + sites_and_indices = _sites_and_classified_indices_from_indices(mof, indices) + if not ignore_hidden_indices: + relevant_indices = indices - set(sites_and_indices.hidden_vertices) + else: + relevant_indices = indices + + selected_positions, selected_species, selected_edge_indices = [], [], [] + + # give us a quick and easy way to filter the edges we want to keep + # for this reason we will use both edge "partners" as keys + # and have a list of indices in the original edges list as the value + edge_dict = defaultdict(list) + for i, egde in enumerate(sites_and_indices.edges): + edge_partner_1, edge_partner_2 = egde + edge_dict[edge_partner_1].append(i) + edge_dict[edge_partner_2].append(i) + + for i, index in enumerate(indices): + if index in relevant_indices: + selected_positions.append(sites_and_indices.cartesian_coordinates[i]) + selected_species.append(sites_and_indices.species[i]) + edge_idxs = edge_dict[index] + for edge_idx in edge_idxs: + edge = sites_and_indices.edges[edge_idx] + if (edge[0] in relevant_indices) and (edge[1] in relevant_indices): + selected_edge_indices.append(edge_idx) + + index_map = dict(zip(relevant_indices, range(len(relevant_indices)))) + molecule = Molecule(selected_species, selected_positions) + selected_edges = [sites_and_indices.edges[i] for i in selected_edge_indices] + selected_edges = _reindex_list_of_tuple(selected_edges, index_map) + edge_dict = dict(zip(selected_edges, [None] * len(selected_edges))) + molecule_graph = MoleculeGraph.with_edges(molecule, edge_dict) + + mol, mapping = wrap_molecule(list(relevant_indices), mof) + + return mol, molecule_graph, mapping + + def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, mof): # first, create a molecule and molecule graph from those indices # that intersect with the node indices branching_in_node = [i for i in all_branching_indices if i in node_indices] binding_in_node = [i for i in all_binding_indices if i in node_indices] - graph_ = mof.structure_graph.__copy__() - remove_all_nodes_not_in_indices( - graph_, list(node_indices) + branching_in_node + binding_in_node - ) - _mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules( - graph_, - return_unique=True, - filter_in_cell=True, - disable_boundary_crossing_check=False, - prune_long_edges=True, + + mol, graph, mapping = _build_mol_and_graph( + mof, list(node_indices) + branching_in_node + binding_in_node ) - assert len(_mols) == 1 - mol, mapping = wrap_molecule(idxs[0], mof) # now, also create one molecule from the node indices, where we replace # binding and branching indices with dummy atoms @@ -58,125 +82,46 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, branching_to_binding = set(binding_neighbors).intersection(all_branching_indices) # Now, make a copy of the structure and replace the indices with dummy atoms - dummy_structure = Structure.from_sites(mof.sites) + dummy_structure = Structure.from_sites(mof.structure.sites) for i in binding_to_node_metal: dummy_structure.replace(i, _BINDING_DUMMY) - for i in branching_to_binding + branching_to_node_metal: + for i in branching_to_binding | branching_to_node_metal: dummy_structure.replace(i, _BRANCHING_DUMMY) - to_delete = [ - i - for i in range(len(dummy_structure)) - if i - not in list(node_indices) + mol_without_dummy, graph_w_dummy, mapping_w_dummy = _build_mol_and_graph( + mof, # todo: fix with dummy mof structure + list(node_indices) + list(binding_to_node_metal) + list(branching_to_binding) - + list(branching_to_node_metal) - ] - graph_w_dummy = mof.structure_graph.__copy__() - graph_w_dummy.structure = dummy_structure - graph_w_dummy.remove_nodes_from(to_delete) - ( - _mols_w_dummy, - graphs_w_dummy, - idxs_w_dummy, - centers_w_dummy, - coordinates_with_dummy, - ) = get_subgraphs_as_molecules( - graph_w_dummy, - return_unique=True, - filter_in_cell=True, - disable_boundary_crossing_check=False, - prune_long_edges=True, + + list(branching_to_node_metal), + ignore_hidden_indices=True, ) - assert len(_mols_w_dummy) == 1 - mol_w_dummy, mapping_w_dummy = wrap_molecule(idxs_w_dummy[0], mof) + return mol, graph, mapping, mol_without_dummy, graph_w_dummy, mapping_w_dummy -def _find_node_hidden_indices(node_indices, all_binding_indices, all_branching_indices, mof): - # find the binding indices bound to node - node_neighbors = sum([mof.get_neighbor_indices(i) for i in node_indices], []) - relevant_binding_indices = [i for i in node_neighbors if i in all_binding_indices] - relevant_binding_indices_neighbors = sum( - [mof.get_neighbor_indices(i) for i in relevant_binding_indices], [] - ) - relevant_branching_indices = [ - i for i in relevant_binding_indices_neighbors if i in all_branching_indices - ] + [i for i in all_branching_indices if i in node_neighbors] - hidden_indices = relevant_branching_indices + relevant_binding_indices - return set(hidden_indices) - - -def node_from_mof_and_indices( # pylint:disable=too-many-locals, too-many-arguments - cls, mof, node_indices, all_branching_indices, all_binding_indices, all_connecting_paths -): +def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all_binding_indices): """Create a node from a MOF and a list of indices of different types.""" + node_indices = node_indices branching_indices = node_indices & all_branching_indices binding_indices = node_indices & all_binding_indices - connecting_paths = all_connecting_paths & node_indices - - hidden_indices = _find_node_hidden_indices( - node_indices, all_binding_indices, all_branching_indices, mof - ) - - graph_ = mof.structure_graph.__copy__() - graph_.structure = Structure.from_sites(graph_.structure.sites) - to_delete = _not_relevant_structure_indices(mof.structure, node_indices) - graph_.remove_nodes(to_delete) - - sites_and_indices = _sites_and_classified_indices_from_indices(mof, node_indices) - relevant_indices = node_indices - set(sites_and_indices.hidden_vertices) - - selected_positions, selected_species, selected_edge_indices = [], [], [] - - # give us a quick and easy way to filter the edges we want to keep - # for this reason we will use both edge "partners" as keys - # and have a list of indices in the original edges list as the value - edge_dict = defaultdict(list) - for i, egde in enumerate(sites_and_indices.edges): - edge_partner_1, edge_partner_2 = egde - edge_dict[edge_partner_1].append(i) - edge_dict[edge_partner_2].append(i) - - for i, index in enumerate(node_indices): - if index in relevant_indices: - selected_positions.append(sites_and_indices.cartesian_coordinates[i]) - selected_species.append(sites_and_indices.species[i]) - edge_idxs = edge_dict[index] - for edge_idx in edge_idxs: - edge = sites_and_indices.edges[edge_idx] - if (edge[0] in relevant_indices) and (edge[1] in relevant_indices): - selected_edge_indices.append(edge_idx) - - index_map = dict(zip(relevant_indices, range(len(relevant_indices)))) - molecule = Molecule(selected_species, selected_positions) - selected_edges = [sites_and_indices.edges[i] for i in selected_edge_indices] - selected_edges = _reindex_list_of_tuple(selected_edges, index_map) - edge_dict = dict(zip(selected_edges, [None] * len(selected_edges))) - molecule_graph = MoleculeGraph.with_edges(molecule, edge_dict) - center = np.mean(molecule.cart_coords, axis=0) graph_branching_indices = branching_indices & node_indices - idx = [i for i in relevant_indices if i in node_indices] - - # Add the branching indices here? - mol_w_hidden, mapping = wrap_molecule( - idx + sites_and_indices.hidden_vertices + list(hidden_indices), mof + mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy = _extract_and_wrap( + node_indices=node_indices, + all_branching_indices=all_branching_indices, + all_binding_indices=all_binding_indices, + mof=mof, ) - # perhaps than delete here the hidden ones again to to get the mol without the hidden ones - node = cls( - molecule=mol_w_hidden, - molecule_graph=molecule_graph, - center=center, + molecule=mol, + molecule_graph=graph, graph_branching_indices=graph_branching_indices, binding_indices=binding_indices, - original_indices=idx + sites_and_indices.hidden_vertices, - persistent_non_metal_bridged=sites_and_indices.persistent_non_metal_bridged_components, - terminal_in_mol_not_terminal_in_struct=sites_and_indices.hidden_vertices, - connecting_paths=connecting_paths, molecule_original_indices_mapping=mapping, + dummy_molecule=mol_w_dummy, + dummy_molecule_graph=graph_w_dummy, + dummy_molecule_indices_mapping=mapping_w_dummy, ) return node @@ -195,7 +140,6 @@ def from_mof_and_indices( # pylint:disable=too-many-arguments node_indices: Set[int], branching_indices: Set[int], binding_indices: Set[int], - connecting_paths: Set[int], ): """Build a node object from a MOF and some intermediate outputs of the fragmentation. @@ -206,8 +150,6 @@ def from_mof_and_indices( # pylint:disable=too-many-arguments that belong to this node. binding_indices (Set[int]): The indices of the binding points in the MOF that belong to this node. - connecting_paths (Set[int]): The indices of the connecting paths in the MOF - that belong to this node. Returns: A node object. @@ -218,7 +160,6 @@ def from_mof_and_indices( # pylint:disable=too-many-arguments node_indices, all_branching_indices=branching_indices, all_binding_indices=binding_indices, - all_connecting_paths=connecting_paths, ) def __repr__(self) -> str: diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py index 652f434..5e82a81 100644 --- a/src/moffragmentor/sbu/sbu.py +++ b/src/moffragmentor/sbu/sbu.py @@ -9,15 +9,14 @@ import pubchempy as pcp from backports.cached_property import cached_property from loguru import logger -from openbabel import pybel as pb from pymatgen.analysis.graphs import MoleculeGraph from pymatgen.core import Molecule, Structure from pymatgen.io.babel import BabelMolAdaptor from rdkit import Chem from scipy.spatial.distance import pdist -from ..utils import pickle_dump -from ..utils.mol_compare import mcs_rank +from moffragmentor.utils import pickle_dump +from moffragmentor.utils.mol_compare import mcs_rank def ob_mol_without_metals(obmol): @@ -59,16 +58,6 @@ class SBU: the branching index and metal * original_indices: complete original set of indices that has been selected for this building blocks - * persistent_non_metal_bridged: components that are connected - via a bridge both in the MOF structure - and building block molecule. No metal is part of the edge, - i.e., bound solvents are not included in this set - * terminal_in_mol_not_terminal_in_struct: indices that are terminal - in the molecule but not terminal in the structure. - This is for instance, the case for carboxy groups that are only - coordinated with one O. In this case, a chemically more faithful - representation might be to not include the C of the carboxy - in the node. This collection allows us to do so. .. note:: @@ -89,15 +78,12 @@ def __init__( self, molecule: Molecule, molecule_graph: MoleculeGraph, - center: np.ndarray, graph_branching_indices: Collection[int], binding_indices: Collection[int], - original_indices: Collection[int], - persistent_non_metal_bridged: Optional[Collection[int]] = None, - terminal_in_mol_not_terminal_in_struct: Optional[Collection[int]] = None, - connecting_paths: Optional[Collection[int]] = None, - coordinates: Optional[np.ndarray] = None, molecule_original_indices_mapping: Optional[Dict[int, List[int]]] = None, + dummy_molecule: Optional[Molecule] = None, + dummy_molecule_graph: Optional[MoleculeGraph] = None, + dummy_molecule_indices_mapping: Optional[Dict[int, List[int]]] = None, ): """Initialize a secondary building block. @@ -106,26 +92,27 @@ def __init__( Args: molecule (Molecule): Pymatgen molecule object. molecule_graph (MoleculeGraph): Pymatgen molecule graph object. - center (np.ndarray): Center of the SBU. graph_branching_indices (Collection[int]): Branching indices in the original structure. binding_indices (Collection[int]): Binding indices in the original structure. - original_indices (Collection[int]): List of all indicies in the original - structure this SBU corresponds to. molecule_original_indices_mapping (Optional[Dict[int, List[int]]], optional): Mapping from molecule indices to original indices. Defaults to None. """ self.molecule = molecule - self._center = center - self._original_indices = original_indices + self._mapping = molecule_original_indices_mapping + self._indices = sum(list(molecule_original_indices_mapping.values()), []) + self._original_indices = self._indices self.molecule_graph = molecule_graph self._original_graph_branching_indices = graph_branching_indices - self._original_binding_indices = binding_indices + self._dummy_molecule = dummy_molecule + self._dummy_molecule_graph = dummy_molecule_graph + self._dummy_molecule_indices_mapping = dummy_molecule_indices_mapping + self.mapping_from_original_indices = defaultdict(list) if molecule_original_indices_mapping is None: - for ori_index, index in zip(self._original_indices, range(len(molecule))): + for ori_index, index in zip(self._indices, range(len(molecule))): self.mapping_from_original_indices[ori_index].append(index) else: for k, v in molecule_original_indices_mapping.items(): @@ -135,7 +122,6 @@ def __init__( for key, value in self.mapping_from_original_indices.items(): for v in value: self.mapping_to_original_indices[v] = key - self._indices = original_indices @property def center(self): @@ -310,6 +296,8 @@ def openbabel_mol(self): return self.get_openbabel_mol() def get_openbabel_mol(self): + from openbabel import pybel as pb + a = BabelMolAdaptor(self.molecule) pm = pb.Molecule(a.openbabel_mol) return pm @@ -363,13 +351,6 @@ def _get_boxed_structure(self): ) return structure - def _get_connected_sites_structure(self): - sites = [] - s = self._get_boxed_structure() - for i in self.connecting_indices: - sites.append(s[i]) - return Structure.from_sites(sites) - def _get_binding_sites_structure(self): sites = [] s = self._get_boxed_structure() diff --git a/tests/conftest.py b/tests/conftest.py index 861403f..d35541d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1670,31 +1670,28 @@ def get_linker_object(): linker = Linker( mol, mg, - center=np.array([-502.5498308, -406.76009494, -502.5498308]), graph_branching_indices={8, 13, 17}, - closest_branching_index_in_molecule={1, 37, 46}, binding_indices=[129, 137, 108, 141, 120, 152], - original_indices=[ - 96, - 129, - 23, - 152, - 60, - 137, - 105, - 0, - 1, - 2, - 108, - 141, - 69, - 37, - 14, - 84, - 120, - 4, - ], - connecting_paths=[], + molecule_original_indices_mapping={ + 0: [96], + 1: [129], + 2: [23], + 3: [152], + 4: [60], + 5: [137], + 6: [105], + 7: [0], + 8: [1], + 9: [2], + 10: [108], + 11: [141], + 12: [69], + 13: [37], + 14: [14], + 15: [84], + 16: [120], + 17: [46], + }, ) return linker @@ -1707,7 +1704,6 @@ def get_node_object(): node_indices={32, 1, 132, 134, 72, 137, 74, 139, 108, 110, 113, 115, 27, 30}, branching_indices={1, 27, 30, 32}, binding_indices={108, 110, 113, 115, 132, 134, 137, 139}, - connecting_paths={1, 27, 30, 32, 108, 110, 113, 115, 132, 134, 137, 139}, ) return node diff --git a/tests/fragmentor/test_locator.py b/tests/fragmentor/test_locator.py index 97efd28..3a507fc 100644 --- a/tests/fragmentor/test_locator.py +++ b/tests/fragmentor/test_locator.py @@ -86,13 +86,15 @@ def test_find_rod_node_floating_mof_cluster(get_1d_node_with_floating): assert node_lengths[0] == 20 -def test_formatte_mof(get_formate_structure_and_graph): +def test_formate_mof(get_formate_structure_and_graph): s, sg = get_formate_structure_and_graph mof = MOF(s, sg) bbs = mof.fragment() - assert len(bbs.nodes) == 1 - assert len(bbs.capping_molecules) == 6 - assert bbs.capping_molecules.composition == {"C1 H1 O2": 6} + assert len(bbs.nodes) == 2 + assert ( + len(bbs.capping_molecules) == 0 + ) # since there is no linker, we assign the formate as capping molecule + assert bbs.linkers.composition == {"C1 H1 O2": 6} def test__get_solvent_molecules_bound_to_node(get_li_mof_with_floating): diff --git a/tests/test_mof.py b/tests/test_mof.py index f9f90fc..207899b 100644 --- a/tests/test_mof.py +++ b/tests/test_mof.py @@ -76,15 +76,6 @@ def test_hkust(get_hkust_mof): mof = get_hkust_mof fragments = mof.fragment() assert fragments.net_embedding.rcsr_code == "tbo" - for linker in fragments.linkers: - cs = linker._get_connected_sites_structure() - distances = _get_distances(cs) - assert max(distances) < 8 - - for node in fragments.nodes: - cs = node._get_connected_sites_structure() - distances = _get_distances(cs) - assert max(distances) < 8 @pytest.mark.slow From 7ff0cc34e607322526005565a75cd6c6501b2ad8 Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Sun, 6 Nov 2022 16:19:52 +0100 Subject: [PATCH 6/9] dummy seems to work, still need to use it in the net construction and for the construction of the inputs for pormake --- .flake8 | 40 ------------------- setup.cfg | 42 ++++++++++++++++++++ src/moffragmentor/fragmentor/molfromgraph.py | 8 +++- src/moffragmentor/mof.py | 3 ++ src/moffragmentor/sbu/node.py | 39 +++++++++++++----- src/moffragmentor/sbu/sbu.py | 2 + 6 files changed, 83 insertions(+), 51 deletions(-) delete mode 100644 .flake8 diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 26860e5..0000000 --- a/.flake8 +++ /dev/null @@ -1,40 +0,0 @@ -######################### -# Flake8 Configuration # -# (.flake8) # -######################### -[flake8] -ignore = - S301 # pickle - S403 # pickle - S404 - S603 - W503 # Line break before binary operator (flake8 is wrong) - E203 # whitespace before ':' - S101 # Complaining about assert statements - D101 # Docstring missing - D102 # Docstring missing - D103 # Docstring missing - D104 # Docstring missing - D400 -exclude = - .tox, - .git, - __pycache__, - docs/source/conf.py, - build, - dist, - tests/fixtures/*, - *.pyc, - *.egg-info, - .cache, - .eggs, - data -max-line-length = 120 -max-complexity = 20 -import-order-style = pycharm -application-import-names = - moffragmentor - tests -format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s -per-file-ignores = - tests/*/*.py:DAR101, D205, D100, DAR101, DAR201, D209 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 9187a96..00a1a5b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -142,3 +142,45 @@ exclude_lines = [darglint] docstring_style = google strictness = short + + +######################### +# Flake8 Configuration # +# (.flake8) # +######################### +[flake8] +ignore = + S301 # pickle + S403 # pickle + S404 + S603 + W503 # Line break before binary operator (flake8 is wrong) + E203 # whitespace before ':' + S101 # Complaining about assert statements + D101 # Docstring missing + D102 # Docstring missing + D103 # Docstring missing + D104 # Docstring missing + D400 +exclude = + .tox, + .git, + __pycache__, + docs/source/conf.py, + build, + dist, + tests/fixtures/*, + *.pyc, + *.egg-info, + .cache, + .eggs, + data +max-line-length = 120 +max-complexity = 20 +import-order-style = pycharm +application-import-names = + moffragmentor + tests +format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s +per-file-ignores = + tests/*/*.py:DAR101, D205, D100, DAR101, DAR201, D209 \ No newline at end of file diff --git a/src/moffragmentor/fragmentor/molfromgraph.py b/src/moffragmentor/fragmentor/molfromgraph.py index b354ee8..978bd1a 100644 --- a/src/moffragmentor/fragmentor/molfromgraph.py +++ b/src/moffragmentor/fragmentor/molfromgraph.py @@ -12,7 +12,10 @@ def wrap_molecule( - mol_idxs: Iterable[int], mof: "mof.MOF", starting_index: Optional[int] = None # noqa: F821 + mol_idxs: Iterable[int], + mof: "mof.MOF", + starting_index: Optional[int] = None, + add_additional_site: bool = True, # noqa: F821 ) -> Molecule: """Wrap a molecule in the cell of the MOF by walking along the structure graph. @@ -30,6 +33,7 @@ def wrap_molecule( mof (MOF): MOF object that contains the mol_idxs. starting_index (int, optional): Starting index for the walk. Defaults to 0. + add_additional_site (bool, optional): Whether to add an additional site Returns: Molecule: wrapped molecule @@ -76,7 +80,7 @@ def wrap_molecule( - new_positions_cart[current_index] ) > VestaCutoffDictNN._lookup_dict[species_a][species_b] - ): + ) & add_additional_site: logger.warning( "Warning: neighbor_index {} is already in new_positions_cart, " "but the distance is too large. " diff --git a/src/moffragmentor/mof.py b/src/moffragmentor/mof.py index b334538..d2bff13 100644 --- a/src/moffragmentor/mof.py +++ b/src/moffragmentor/mof.py @@ -71,6 +71,9 @@ def _reset(self): self._bridges = None self._nx_graph = None + def __copy__(self): + return MOF(IStructure.from_sites(self._structure.sites), self.structure_graph.__copy__()) + def dump(self, path) -> None: """Dump this object as pickle file""" pickle_dump(self, path) diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py index a55d856..ed80893 100644 --- a/src/moffragmentor/sbu/node.py +++ b/src/moffragmentor/sbu/node.py @@ -6,9 +6,8 @@ from collections import defaultdict from typing import Set -import numpy as np from pymatgen.analysis.graphs import MoleculeGraph -from pymatgen.core import Molecule, Structure +from pymatgen.core import Molecule, Site, Structure from moffragmentor.fragmentor.molfromgraph import wrap_molecule from moffragmentor.fragmentor.splitter import _sites_and_classified_indices_from_indices @@ -21,10 +20,10 @@ _BRANCHING_DUMMY = "Kr" -def _build_mol_and_graph(mof, indices, ignore_hidden_indices=False): +def _build_mol_and_graph(mof, indices, ignore_hidden_indices=True, add_additional_site=True): indices = set(indices) sites_and_indices = _sites_and_classified_indices_from_indices(mof, indices) - if not ignore_hidden_indices: + if ignore_hidden_indices: relevant_indices = indices - set(sites_and_indices.hidden_vertices) else: relevant_indices = indices @@ -51,13 +50,16 @@ def _build_mol_and_graph(mof, indices, ignore_hidden_indices=False): selected_edge_indices.append(edge_idx) index_map = dict(zip(relevant_indices, range(len(relevant_indices)))) + molecule = Molecule(selected_species, selected_positions) selected_edges = [sites_and_indices.edges[i] for i in selected_edge_indices] selected_edges = _reindex_list_of_tuple(selected_edges, index_map) edge_dict = dict(zip(selected_edges, [None] * len(selected_edges))) molecule_graph = MoleculeGraph.with_edges(molecule, edge_dict) - mol, mapping = wrap_molecule(list(relevant_indices), mof) + mol, mapping = wrap_molecule( + list(relevant_indices), mof, add_additional_site=add_additional_site + ) return mol, molecule_graph, mapping @@ -88,15 +90,34 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, for i in branching_to_binding | branching_to_node_metal: dummy_structure.replace(i, _BRANCHING_DUMMY) - mol_without_dummy, graph_w_dummy, mapping_w_dummy = _build_mol_and_graph( - mof, # todo: fix with dummy mof structure + mol_w_dummy, graph_w_dummy, mapping_w_dummy = _build_mol_and_graph( + mof, list(node_indices) + list(binding_to_node_metal) + list(branching_to_binding) + list(branching_to_node_metal), - ignore_hidden_indices=True, + ignore_hidden_indices=False, + add_additional_site=False, ) - return mol, graph, mapping, mol_without_dummy, graph_w_dummy, mapping_w_dummy + + inverse_mapping = {v[0]: k for k, v in mapping_w_dummy.items()} + # let's replace here now the atoms with the dummys as doing it beforehand might cause issues + # (e.g. we do not have the distances for a cutoffdict) + + for i in branching_to_binding | branching_to_node_metal: + if i not in node_indices & set(mof.metal_indices): + mol_w_dummy._sites[inverse_mapping[i]] = Site( + _BRANCHING_DUMMY, mol_w_dummy._sites[inverse_mapping[i]].coords + ) + + for i in binding_to_node_metal: + mol_w_dummy._sites[inverse_mapping[i]] = Site( + _BINDING_DUMMY, mol_w_dummy._sites[inverse_mapping[i]].coords + ) + + graph_w_dummy.molecule = mol_w_dummy + + return mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all_binding_indices): diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py index 5e82a81..53624c8 100644 --- a/src/moffragmentor/sbu/sbu.py +++ b/src/moffragmentor/sbu/sbu.py @@ -100,7 +100,9 @@ def __init__( """ self.molecule = molecule self._mapping = molecule_original_indices_mapping + self._indices = sum(list(molecule_original_indices_mapping.values()), []) + self._original_indices = self._indices self.molecule_graph = molecule_graph self._original_graph_branching_indices = graph_branching_indices From c6cf0f730b431634019b62ce5264fb697edd2818 Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Mon, 7 Nov 2022 09:53:02 +0100 Subject: [PATCH 7/9] remove the format --- setup.cfg | 1 - src/moffragmentor/sbu/node.py | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index 00a1a5b..dca614c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -181,6 +181,5 @@ import-order-style = pycharm application-import-names = moffragmentor tests -format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s per-file-ignores = tests/*/*.py:DAR101, D205, D100, DAR101, DAR201, D209 \ No newline at end of file diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py index ed80893..da3193c 100644 --- a/src/moffragmentor/sbu/node.py +++ b/src/moffragmentor/sbu/node.py @@ -107,12 +107,16 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, for i in branching_to_binding | branching_to_node_metal: if i not in node_indices & set(mof.metal_indices): mol_w_dummy._sites[inverse_mapping[i]] = Site( - _BRANCHING_DUMMY, mol_w_dummy._sites[inverse_mapping[i]].coords + _BRANCHING_DUMMY, + mol_w_dummy._sites[inverse_mapping[i]].coords, + properties={"original_species": str(mol_w_dummy._sites[inverse_mapping[i]].specie)}, ) for i in binding_to_node_metal: mol_w_dummy._sites[inverse_mapping[i]] = Site( - _BINDING_DUMMY, mol_w_dummy._sites[inverse_mapping[i]].coords + _BINDING_DUMMY, + mol_w_dummy._sites[inverse_mapping[i]].coords, + properties={"original_species": str(mol_w_dummy._sites[inverse_mapping[i]].specie)}, ) graph_w_dummy.molecule = mol_w_dummy From a20e15b3004049fc4fc8e0d2afbc95d741d8af33 Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Mon, 7 Nov 2022 18:13:47 +0100 Subject: [PATCH 8/9] chore: leave the small unitcell fix for later --- src/moffragmentor/fragmentor/linkerlocator.py | 13 +++---- src/moffragmentor/fragmentor/molfromgraph.py | 2 +- src/moffragmentor/mof.py | 1 + src/moffragmentor/net.py | 38 +++++++++++++++++- src/moffragmentor/sbu/node.py | 23 +++++++++-- src/moffragmentor/sbu/sbu.py | 39 +++++++++++++++++-- 6 files changed, 98 insertions(+), 18 deletions(-) diff --git a/src/moffragmentor/fragmentor/linkerlocator.py b/src/moffragmentor/fragmentor/linkerlocator.py index 71cdec1..d6d45cd 100644 --- a/src/moffragmentor/fragmentor/linkerlocator.py +++ b/src/moffragmentor/fragmentor/linkerlocator.py @@ -6,9 +6,8 @@ from pymatgen.core import Structure from structuregraph_helpers.subgraph import get_subgraphs_as_molecules -from .molfromgraph import wrap_molecule -from ..sbu import Linker, LinkerCollection -from ..utils import _flatten_list_of_sets +from moffragmentor.fragmentor.molfromgraph import wrap_molecule +from moffragmentor.sbu import Linker, LinkerCollection __all__ = ["create_linker_collection", "identify_linker_binding_indices"] @@ -87,7 +86,7 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals # Second step: extract the connected components # return all as molecules - _mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules( + _mols, graphs, idxs, _centers, coordinates = get_subgraphs_as_molecules( graph_, return_unique=False, filter_in_cell=False, @@ -97,13 +96,11 @@ def _create_linkers_from_node_location_result( # pylint:disable=too-many-locals # Third: pick those molecules that are closest to the UC # ToDo: we should be able to skip this - linker_indices, coords = _pick_central_linker_indices(mof, coordinates) + linker_indices, _coords = _pick_central_linker_indices(mof, coordinates) found_frac_centers = set() found_hashes = set() - for i, linker_index in enumerate(linker_indices): + for linker_index in linker_indices: idx = idxs[linker_index] - center = centers[linker_index] - coords_ = coords[i] branching_indices = node_location_result.branching_indices & set(idx) mol, mapping = wrap_molecule(idx, mof) linker = Linker( diff --git a/src/moffragmentor/fragmentor/molfromgraph.py b/src/moffragmentor/fragmentor/molfromgraph.py index 978bd1a..b29a7d1 100644 --- a/src/moffragmentor/fragmentor/molfromgraph.py +++ b/src/moffragmentor/fragmentor/molfromgraph.py @@ -33,7 +33,7 @@ def wrap_molecule( mof (MOF): MOF object that contains the mol_idxs. starting_index (int, optional): Starting index for the walk. Defaults to 0. - add_additional_site (bool, optional): Whether to add an additional site + add_additional_site (bool): Whether to add an additional site Returns: Molecule: wrapped molecule diff --git a/src/moffragmentor/mof.py b/src/moffragmentor/mof.py index d2bff13..b249ffd 100644 --- a/src/moffragmentor/mof.py +++ b/src/moffragmentor/mof.py @@ -72,6 +72,7 @@ def _reset(self): self._nx_graph = None def __copy__(self): + """Make a a new MOF object with copies of the same structure and structure graph.""" return MOF(IStructure.from_sites(self._structure.sites), self.structure_graph.__copy__()) def dump(self, path) -> None: diff --git a/src/moffragmentor/net.py b/src/moffragmentor/net.py index f756d53..4a0482e 100644 --- a/src/moffragmentor/net.py +++ b/src/moffragmentor/net.py @@ -54,6 +54,35 @@ def is_3d_parallel(v1: np.array, v2: np.array, eps: float = 1e-5) -> bool: return np.allclose(cp, np.zeros(3), atol=eps) +def get_image_if_on_edge(frac_coords): + # exactly one --> return the -1 image + images = np.zeros(3, dtype=int) + is_exactly_one = np.argwhere(np.abs(frac_coords - 1) < 1e-4) + if is_exactly_one.size > 0: + images[is_exactly_one] = -1 + is_exactly_zero = np.argwhere(np.abs(frac_coords) < 1e-4) + if is_exactly_zero.size > 0: + images[is_exactly_zero] = 1 + return images + + +def sanitize_graph(structure_graph): + # if there is a node with only one edge, it is probably because the binding partner is at an edge or corner of the structure + # that is, at least one fractional coordinate is 0 or 1 + # in this case, we add a bond to the image of this site + new_edges = [] + for i, _ in enumerate(structure_graph.structure): + connected_sites = structure_graph.get_connected_sites(i) + if len(connected_sites) == 1: + frac_coords = connected_sites[0].site.frac_coords + images = get_image_if_on_edge(frac_coords) + if np.any(images != 0): + new_edges.append((i, connected_sites[0].index, images)) + + for edge in new_edges: + structure_graph.add_edge(edge[0], edge[1], to_jimage=edge[2]) + + class VoltageEdge: """Representation of an edge in a labeled quotient graph (LQG). @@ -268,12 +297,17 @@ def get_dummy_structure(self) -> Structure: lattice=self.lattice, species=symbols, coords=coords, coords_are_cartesian=True ) - def get_pmg_structure_graph(self, simplify: bool = True) -> StructureGraph: + def get_pmg_structure_graph( + self, simplify: bool = True, sanitize: bool = False + ) -> StructureGraph: """Return a StructureGraph object from the Net object Args: simplify (bool): Whether to simplify the graph. Defaults to True. If True, 2c vertices are removed. + sanitize (bool): Whether to sanitize the graph. Defaults to False. + If True, add edge to periodic image for sites with only one neighbor + if this neighbor is at the egde of the unit cell. Returns: StructureGraph: The StructureGraph object. @@ -292,6 +326,8 @@ def get_pmg_structure_graph(self, simplify: bool = True) -> StructureGraph: ] = None structure_graph = StructureGraph.with_edges(s, edge_dict) + if sanitize: + sanitize_graph(structure_graph) if simplify: structure_graph = _simplify_structure_graph(structure_graph) return structure_graph diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py index da3193c..ee12f95 100644 --- a/src/moffragmentor/sbu/node.py +++ b/src/moffragmentor/sbu/node.py @@ -79,7 +79,13 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, node_metal_atoms = [i for i in node_indices if i in mof.metal_indices] binding_to_node_metal = set(sum([mof.get_neighbor_indices(i) for i in node_metal_atoms], [])) binding_to_node_metal = binding_to_node_metal.intersection(all_binding_indices) - branching_to_node_metal = set(sum([mof.get_neighbor_indices(i) for i in node_metal_atoms], [])) + + branching_to_node_metal = set( + sum( + [mof.get_neighbor_indices(i) for i in node_metal_atoms if i in all_branching_indices], + [], + ) + ) binding_neighbors = sum([mof.get_neighbor_indices(i) for i in binding_to_node_metal], []) branching_to_binding = set(binding_neighbors).intersection(all_branching_indices) @@ -89,7 +95,7 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, dummy_structure.replace(i, _BINDING_DUMMY) for i in branching_to_binding | branching_to_node_metal: dummy_structure.replace(i, _BRANCHING_DUMMY) - + dummy_branching_sites = branching_to_binding | branching_to_node_metal mol_w_dummy, graph_w_dummy, mapping_w_dummy = _build_mol_and_graph( mof, list(node_indices) @@ -121,7 +127,7 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, graph_w_dummy.molecule = mol_w_dummy - return mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy + return mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy, dummy_branching_sites def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all_binding_indices): @@ -131,7 +137,15 @@ def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all binding_indices = node_indices & all_binding_indices graph_branching_indices = branching_indices & node_indices - mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy = _extract_and_wrap( + ( + mol, + graph, + mapping, + mol_w_dummy, + graph_w_dummy, + mapping_w_dummy, + dummy_branching_sites, + ) = _extract_and_wrap( node_indices=node_indices, all_branching_indices=all_branching_indices, all_binding_indices=all_binding_indices, @@ -147,6 +161,7 @@ def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all dummy_molecule=mol_w_dummy, dummy_molecule_graph=graph_w_dummy, dummy_molecule_indices_mapping=mapping_w_dummy, + dummy_branching_indices=dummy_branching_sites, ) return node diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py index 53624c8..6cd9895 100644 --- a/src/moffragmentor/sbu/sbu.py +++ b/src/moffragmentor/sbu/sbu.py @@ -67,6 +67,12 @@ class SBU: To obtain the "original" coordinates, use the `_coordinates` attribute. + .. note:: Dummy molecules + + In dummy molecules the binding and branching sites are replaces by + dummy atoms (noble gas). They also have special properties that indicate + the original species. + Examples: >>> # visualize the molecule >>> sbu_object.show_molecule() @@ -84,6 +90,7 @@ def __init__( dummy_molecule: Optional[Molecule] = None, dummy_molecule_graph: Optional[MoleculeGraph] = None, dummy_molecule_indices_mapping: Optional[Dict[int, List[int]]] = None, + dummy_branching_indices: Optional[Collection[int]] = None, ): """Initialize a secondary building block. @@ -97,6 +104,13 @@ def __init__( binding_indices (Collection[int]): Binding indices in the original structure. molecule_original_indices_mapping (Optional[Dict[int, List[int]]], optional): Mapping from molecule indices to original indices. Defaults to None. + dummy_molecule (Optional[Molecule], optional): Dummy molecule. Defaults to None. + dummy_molecule_graph (Optional[MoleculeGraph], optional): Dummy molecule graph. + Defaults to None. + dummy_molecule_indices_mapping (Optional[Dict[int, List[int]]], optional): + Dummy molecule indices mapping. Defaults to None. + dummy_branching_indices (Optional[Collection[int]], optional): + Dummy branching indices. Defaults to None. """ self.molecule = molecule self._mapping = molecule_original_indices_mapping @@ -111,6 +125,7 @@ def __init__( self._dummy_molecule = dummy_molecule self._dummy_molecule_graph = dummy_molecule_graph self._dummy_molecule_indices_mapping = dummy_molecule_indices_mapping + self._dummy_branching_indices = dummy_branching_indices self.mapping_from_original_indices = defaultdict(list) if molecule_original_indices_mapping is None: @@ -120,6 +135,17 @@ def __init__( for k, v in molecule_original_indices_mapping.items(): for i in v: self.mapping_from_original_indices[i].append(k) + + if dummy_molecule: + self.dummy_mapping_from_original_indices = defaultdict(list) + if dummy_molecule_indices_mapping is None: + for ori_index, index in zip(self._indices, range(len(dummy_molecule))): + self.dummy_mapping_from_original_indices[ori_index].append(index) + else: + for k, v in dummy_molecule_indices_mapping.items(): + for i in v: + self.dummy_mapping_from_original_indices[i].append(k) + self.mapping_to_original_indices = {} for key, value in self.mapping_from_original_indices.items(): for v in value: @@ -232,9 +258,14 @@ def original_graph_branching_indices(self): @property def graph_branching_indices(self): indices = [] - for i in self.original_graph_branching_indices: - for index in self.mapping_from_original_indices[i]: - indices.append(index) + if self._dummy_branching_indices is None: + for i in self.original_graph_branching_indices: + for index in self.mapping_from_original_indices[i]: + indices.append(index) + else: + for i in self._dummy_branching_indices: + for index in self.dummy_mapping_from_original_indices[i]: + indices.append(index) return indices @cached_property @@ -275,7 +306,7 @@ def __eq__(self, other: "SBU") -> bool: @cached_property def branching_coords(self): - return self.cart_coords[self.graph_branching_indices] + return self.cart_coords[self.graph_branching_indices] if self._dummy_branching_indices is None else self._dummy_molecule.cart_coords[self.graph_branching_indices] @property def original_binding_indices(self): From 95374a4834232b4e22e2156eb8a96076c0a41427 Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Mon, 7 Nov 2022 18:15:11 +0100 Subject: [PATCH 9/9] chore: remove lint --- src/moffragmentor/sbu/sbu.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py index 6cd9895..1214a9b 100644 --- a/src/moffragmentor/sbu/sbu.py +++ b/src/moffragmentor/sbu/sbu.py @@ -306,7 +306,11 @@ def __eq__(self, other: "SBU") -> bool: @cached_property def branching_coords(self): - return self.cart_coords[self.graph_branching_indices] if self._dummy_branching_indices is None else self._dummy_molecule.cart_coords[self.graph_branching_indices] + return ( + self.cart_coords[self.graph_branching_indices] + if self._dummy_branching_indices is None + else self._dummy_molecule.cart_coords[self.graph_branching_indices] + ) @property def original_binding_indices(self):