From 942b6c9a1497672721474bd6b7cd9e369b143525 Mon Sep 17 00:00:00 2001
From: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Date: Wed, 2 Nov 2022 17:50:31 +0100
Subject: [PATCH 1/9] =?UTF-8?q?Bump=20version:=200.0.3-dev=20=E2=86=92=200?=
 =?UTF-8?q?.0.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg             | 2 +-
 docs/source/conf.py          | 2 +-
 setup.cfg                    | 2 +-
 src/moffragmentor/version.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 074cd8e..d11ddbe 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.3-dev
+current_version = 0.0.3
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 0c17edf..3ed9780 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -24,7 +24,7 @@
 author = "Kevin Maik Jablonka"
 
 # The full version, including alpha/beta/rc tags
-release = "0.0.3-dev"
+release = "0.0.3"
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/setup.cfg b/setup.cfg
index a2b5930..ac5916a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = moffragmentor
-version = 0.0.3-dev
+version = 0.0.3
 description = Splits MOFs into metal nodes and linkers.
 author = Kevin Maik Jablonka
 author_email = mail@kjablonka.com
diff --git a/src/moffragmentor/version.py b/src/moffragmentor/version.py
index 5a2a4a4..c1c13b8 100644
--- a/src/moffragmentor/version.py
+++ b/src/moffragmentor/version.py
@@ -14,7 +14,7 @@
     "get_git_hash",
 ]
 
-VERSION = "0.0.3-dev"
+VERSION = "0.0.3"
 
 
 def get_git_hash() -> str:

From 1c5a2cf25c0841de6f4518ad9b0e499d2b3696ca Mon Sep 17 00:00:00 2001
From: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Date: Wed, 2 Nov 2022 17:50:41 +0100
Subject: [PATCH 2/9] =?UTF-8?q?Bump=20version:=200.0.3=20=E2=86=92=200.0.4?=
 =?UTF-8?q?-dev?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg             | 2 +-
 docs/source/conf.py          | 2 +-
 setup.cfg                    | 2 +-
 src/moffragmentor/version.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index d11ddbe..1425f5d 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.3
+current_version = 0.0.4-dev
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 3ed9780..898b80e 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -24,7 +24,7 @@
 author = "Kevin Maik Jablonka"
 
 # The full version, including alpha/beta/rc tags
-release = "0.0.3"
+release = "0.0.4-dev"
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/setup.cfg b/setup.cfg
index ac5916a..9187a96 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = moffragmentor
-version = 0.0.3
+version = 0.0.4-dev
 description = Splits MOFs into metal nodes and linkers.
 author = Kevin Maik Jablonka
 author_email = mail@kjablonka.com
diff --git a/src/moffragmentor/version.py b/src/moffragmentor/version.py
index c1c13b8..da56be8 100644
--- a/src/moffragmentor/version.py
+++ b/src/moffragmentor/version.py
@@ -14,7 +14,7 @@
     "get_git_hash",
 ]
 
-VERSION = "0.0.3"
+VERSION = "0.0.4-dev"
 
 
 def get_git_hash() -> str:

From 32463d2822c9d52a8e42d342aea965d4b4987dd3 Mon Sep 17 00:00:00 2001
From: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Date: Sat, 5 Nov 2022 12:22:35 +0100
Subject: [PATCH 3/9] feat: also break nodes if it is the full MOF

---
 src/moffragmentor/fragmentor/__init__.py    | 7 +++++++
 src/moffragmentor/fragmentor/nodelocator.py | 5 ++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/moffragmentor/fragmentor/__init__.py b/src/moffragmentor/fragmentor/__init__.py
index a3ec970..f634a28 100644
--- a/src/moffragmentor/fragmentor/__init__.py
+++ b/src/moffragmentor/fragmentor/__init__.py
@@ -119,6 +119,13 @@ def run_fragmentation(
                 )
 
     linker_collection = LinkerCollection(is_linker)
+    if len(linker_collection) == 0 and len(is_capping) > 0:
+        logger.warning(
+            "No linkers found, but capping molecules. Assigning capping molecules to linkers."
+        )
+        linker_collection = LinkerCollection(is_capping)
+        is_capping = []
+
     capping_molecules = LinkerCollection(is_capping)
 
     # Now handle the case of the the frameworks that have linkers without core (e.g. H-COO)
diff --git a/src/moffragmentor/fragmentor/nodelocator.py b/src/moffragmentor/fragmentor/nodelocator.py
index 7eed80e..1b24408 100644
--- a/src/moffragmentor/fragmentor/nodelocator.py
+++ b/src/moffragmentor/fragmentor/nodelocator.py
@@ -258,7 +258,10 @@ def break_organic_nodes(node_result, mof):
     """If we have a node that is mostly organic, we break it up into smaller pieces."""
     new_nodes = []
     for node in node_result.nodes:
-        if check_node(node, node_result.branching_indices, mof) or might_be_porphyrin(
+        if len(node) == len(mof): 
+            logger.debug('Breaking node as full MOF is assigned as node.')
+            new_nodes.extend(break_rod_node(mof, node))
+        elif check_node(node, node_result.branching_indices, mof) or might_be_porphyrin(
             node, node_result.branching_indices, mof
         ):
             new_nodes.append(node)

From 54109d03d498194b2e6bb5d97b42476986ca6027 Mon Sep 17 00:00:00 2001
From: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Date: Sat, 5 Nov 2022 15:46:44 +0100
Subject: [PATCH 4/9] refactor: simplify SBU constructor and prepare passing of
 dummy structure

---
 src/moffragmentor/fragmentor/linkerlocator.py |  15 +--
 src/moffragmentor/fragmentor/nodelocator.py   |  12 +-
 src/moffragmentor/sbu/node.py                 | 124 ++++++++++++++----
 src/moffragmentor/sbu/sbu.py                  |  54 +-------
 4 files changed, 114 insertions(+), 91 deletions(-)

diff --git a/src/moffragmentor/fragmentor/linkerlocator.py b/src/moffragmentor/fragmentor/linkerlocator.py
index a13e0fc..2373433 100644
--- a/src/moffragmentor/fragmentor/linkerlocator.py
+++ b/src/moffragmentor/fragmentor/linkerlocator.py
@@ -81,23 +81,13 @@ def _create_linkers_from_node_location_result(  # pylint:disable=too-many-locals
         | set(mof.metal_indices) & all_node_indices
         # some metals might also be in the linker, e.g., in porphyrins
     )
-
-    # potential_linker_indices = set(list(range(len(mof.structure)))) - not_linker_indices
-    # get terminal indices we need to keep in the linker
-
-    # terminal_indices = []
-    # for linker_index in potential_linker_indices:
-    #     for neighbor in mof.get_neighbor_indices(linker_index):
-    #         if mof._is_terminal(neighbor):
-    #             terminal_indices.append(neighbor)
-
     graph_ = mof.structure_graph.__copy__()
     graph_.structure = Structure.from_sites(graph_.structure.sites)
     graph_.remove_nodes(not_linker_indices)
 
     # Second step: extract the connected components
     # return all as molecules
-    mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules(
+    _mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules(
         graph_,
         return_unique=False,
         filter_in_cell=False,
@@ -121,15 +111,12 @@ def _create_linkers_from_node_location_result(  # pylint:disable=too-many-locals
             molecule_graph=graphs[linker_index],
             center=center,
             graph_branching_indices=branching_indices,
-            closest_branching_index_in_molecule=branching_indices,
             binding_indices=identify_linker_binding_indices(
                 mof,
                 node_location_result.connecting_paths,
                 idx,
             ),
-            coordinates=coords_,
             original_indices=idx,
-            connecting_paths=[],
             molecule_original_indices_mapping=mapping,
         )
         frac_center = mof.structure.lattice.get_fractional_coords(mol.center_of_mass)
diff --git a/src/moffragmentor/fragmentor/nodelocator.py b/src/moffragmentor/fragmentor/nodelocator.py
index 1b24408..b7345de 100644
--- a/src/moffragmentor/fragmentor/nodelocator.py
+++ b/src/moffragmentor/fragmentor/nodelocator.py
@@ -182,9 +182,9 @@ def create_node_collection(mof, node_location_result: NodelocationResult) -> Nod
         node = Node.from_mof_and_indices(
             mof=mof,
             node_indices=node_indices,
-            branching_indices=node_location_result.branching_indices & node_indices,
-            binding_indices=node_location_result.binding_indices & node_indices,
-            connecting_paths=node_location_result.connecting_paths & node_indices,
+            branching_indices=node_location_result.branching_indices,
+            binding_indices=node_location_result.binding_indices,
+            connecting_paths=node_location_result.connecting_paths,
         )
         nodes.append(node)
 
@@ -217,7 +217,7 @@ def break_rod_nodes(mof, node_result):
     """Break rod nodes into smaller pieces."""
     new_nodes = []
     for node in node_result.nodes:
-        if get_sbu_dimensionality(mof, node) == 1:
+        if get_sbu_dimensionality(mof, node) >= 1:
             logger.debug("Found 1- or 2-dimensional node. Will break into isolated metals.")
             new_nodes.extend(break_rod_node(mof, node))
         else:
@@ -258,8 +258,8 @@ def break_organic_nodes(node_result, mof):
     """If we have a node that is mostly organic, we break it up into smaller pieces."""
     new_nodes = []
     for node in node_result.nodes:
-        if len(node) == len(mof): 
-            logger.debug('Breaking node as full MOF is assigned as node.')
+        if len(node) == len(mof):
+            logger.debug("Breaking node as full MOF is assigned as node.")
             new_nodes.extend(break_rod_node(mof, node))
         elif check_node(node, node_result.branching_indices, mof) or might_be_porphyrin(
             node, node_result.branching_indices, mof
diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py
index b2a418f..9ff664e 100644
--- a/src/moffragmentor/sbu/node.py
+++ b/src/moffragmentor/sbu/node.py
@@ -10,8 +10,10 @@
 from pymatgen.analysis.graphs import MoleculeGraph
 from pymatgen.core import Molecule, Structure
 from structuregraph_helpers.create import get_nx_graph_from_edge_tuples
+from structuregraph_helpers.subgraph import get_subgraphs_as_molecules
 
 from moffragmentor.fragmentor.molfromgraph import wrap_molecule
+from moffragmentor.utils import remove_all_nodes_not_in_indices
 
 from .sbu import SBU
 from ..fragmentor.splitter import _sites_and_classified_indices_from_indices
@@ -23,12 +25,100 @@
 
 __all__ = ["Node"]
 
+_BINDING_DUMMY = "Xe"
+_BRANCHING_DUMMY = "Kr"
+
+
+def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, mof):
+    # first, create a molecule and molecule graph from those indices
+    # that intersect with the node indices
+    branching_in_node = [i for i in all_branching_indices if i in node_indices]
+    binding_in_node = [i for i in all_binding_indices if i in node_indices]
+    graph_ = mof.structure_graph.__copy__()
+    remove_all_nodes_not_in_indices(
+        graph_, list(node_indices) + branching_in_node + binding_in_node
+    )
+    _mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules(
+        graph_,
+        return_unique=True,
+        filter_in_cell=True,
+        disable_boundary_crossing_check=False,
+        prune_long_edges=True,
+    )
+    assert len(_mols) == 1
+    mol, mapping = wrap_molecule(idxs[0], mof)
+
+    # now, also create one molecule from the node indices, where we replace
+    # binding and branching indices with dummy atoms
+    node_metal_atoms = [i for i in node_indices if i in mof.metal_indices]
+    binding_to_node_metal = set(sum([mof.get_neighbor_indices(i) for i in node_metal_atoms], []))
+    binding_to_node_metal = binding_to_node_metal.intersection(all_binding_indices)
+    branching_to_node_metal = set(sum([mof.get_neighbor_indices(i) for i in node_metal_atoms], []))
+    binding_neighbors = sum([mof.get_neighbor_indices(i) for i in binding_to_node_metal], [])
+    branching_to_binding = set(binding_neighbors).intersection(all_branching_indices)
+
+    # Now, make a copy of the structure and replace the indices with dummy atoms
+    dummy_structure = Structure.from_sites(mof.sites)
+    for i in binding_to_node_metal:
+        dummy_structure.replace(i, _BINDING_DUMMY)
+    for i in branching_to_binding + branching_to_node_metal:
+        dummy_structure.replace(i, _BRANCHING_DUMMY)
+
+    to_delete = [
+        i
+        for i in range(len(dummy_structure))
+        if i
+        not in list(node_indices)
+        + list(binding_to_node_metal)
+        + list(branching_to_binding)
+        + list(branching_to_node_metal)
+    ]
+    graph_w_dummy = mof.structure_graph.__copy__()
+    graph_w_dummy.structure = dummy_structure
+    graph_w_dummy.remove_nodes_from(to_delete)
+    (
+        _mols_w_dummy,
+        graphs_w_dummy,
+        idxs_w_dummy,
+        centers_w_dummy,
+        coordinates_with_dummy,
+    ) = get_subgraphs_as_molecules(
+        graph_w_dummy,
+        return_unique=True,
+        filter_in_cell=True,
+        disable_boundary_crossing_check=False,
+        prune_long_edges=True,
+    )
+    assert len(_mols_w_dummy) == 1
+    mol_w_dummy, mapping_w_dummy = wrap_molecule(idxs_w_dummy[0], mof)
+
+
+def _find_node_hidden_indices(node_indices, all_binding_indices, all_branching_indices, mof):
+    # find the binding indices bound to node
+    node_neighbors = sum([mof.get_neighbor_indices(i) for i in node_indices], [])
+    relevant_binding_indices = [i for i in node_neighbors if i in all_binding_indices]
+    relevant_binding_indices_neighbors = sum(
+        [mof.get_neighbor_indices(i) for i in relevant_binding_indices], []
+    )
+    relevant_branching_indices = [
+        i for i in relevant_binding_indices_neighbors if i in all_branching_indices
+    ] + [i for i in all_branching_indices if i in node_neighbors]
+    hidden_indices = relevant_branching_indices + relevant_binding_indices
+    return set(hidden_indices)
+
 
 def node_from_mof_and_indices(  # pylint:disable=too-many-locals, too-many-arguments
-    cls, mof, node_indices, branching_indices, binding_indices, connecting_paths
+    cls, mof, node_indices, all_branching_indices, all_binding_indices, all_connecting_paths
 ):
     """Create a node from a MOF and a list of indices of different types."""
-    node_indices = node_indices | connecting_paths  # This should actually not be necessary
+    branching_indices = node_indices & all_branching_indices
+    binding_indices = node_indices & all_binding_indices
+    connecting_paths = all_connecting_paths & node_indices
+
+    hidden_indices = _find_node_hidden_indices(
+        node_indices, all_binding_indices, all_branching_indices, mof
+    )
+
     graph_ = mof.structure_graph.__copy__()
     graph_.structure = Structure.from_sites(graph_.structure.sites)
     to_delete = _not_relevant_structure_indices(mof.structure, node_indices)
@@ -66,31 +156,21 @@ def node_from_mof_and_indices(  # pylint:disable=too-many-locals, too-many-argum
     molecule_graph = MoleculeGraph.with_edges(molecule, edge_dict)
     center = np.mean(molecule.cart_coords, axis=0)
     graph_branching_indices = branching_indices & node_indices
-    closest_branching_index_in_molecule = []
-
-    graph = get_nx_graph_from_edge_tuples(sites_and_indices.edges)
-    for branching_index in branching_indices:
-        if branching_index not in relevant_indices:
-
-            # now we need to find the closest neighbor in the
-            # set of vertices that are in the molecule
-            # ToDo: this is currently an assumption that it is terminal and the
-            # next partner then already is in the molecule,
-            # we could recursively call or get all the paths and then get the shortest
-            new_candidates = get_neighbors_from_nx_graph(graph, branching_index)[0]
-            closest_branching_index_in_molecule.append(new_candidates)
-        else:
-            closest_branching_index_in_molecule.append(branching_index)
+
     idx = [i for i in relevant_indices if i in node_indices]
 
-    mol_w_hidden, mapping = wrap_molecule(idx + sites_and_indices.hidden_vertices, mof)
+    # Add the branching indices here?
+    mol_w_hidden, mapping = wrap_molecule(
+        idx + sites_and_indices.hidden_vertices + list(hidden_indices), mof
+    )
+
+    # perhaps than delete here the hidden ones again to to get the mol without the hidden ones
 
     node = cls(
         molecule=mol_w_hidden,
         molecule_graph=molecule_graph,
         center=center,
         graph_branching_indices=graph_branching_indices,
-        closest_branching_index_in_molecule=closest_branching_index_in_molecule,
         binding_indices=binding_indices,
         original_indices=idx + sites_and_indices.hidden_vertices,
         persistent_non_metal_bridged=sites_and_indices.persistent_non_metal_bridged_components,
@@ -136,9 +216,9 @@ def from_mof_and_indices(  # pylint:disable=too-many-arguments
             cls,
             mof,
             node_indices,
-            branching_indices,
-            binding_indices,
-            connecting_paths=connecting_paths,
+            all_branching_indices=branching_indices,
+            all_binding_indices=binding_indices,
+            all_connecting_paths=connecting_paths,
         )
 
     def __repr__(self) -> str:
diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py
index 897f911..652f434 100644
--- a/src/moffragmentor/sbu/sbu.py
+++ b/src/moffragmentor/sbu/sbu.py
@@ -55,11 +55,6 @@ class SBU:
 
         * graph_branching_indices: are the branching indices according
             to the graph-based definition. They might not be part of the molecule.
-        * closest_branching_index_in_molecule: those are always part of the molecule.
-            In case the branching index is part of the molecule,
-            they are equal to to the graph_branching_indices.
-            Otherwise they are obtained as the closest vertex of the original
-            branching vertex that is part of the molecule.
         * binding_indices: are the indices of the sites between
             the branching index and metal
         * original_indices: complete original set of indices that has been selected
@@ -90,13 +85,12 @@ class SBU:
         >>> sbu_object.search_pubchem()
     """
 
-    def __init__(  # pylint:disable=too-many-arguments
+    def __init__(
         self,
         molecule: Molecule,
         molecule_graph: MoleculeGraph,
         center: np.ndarray,
         graph_branching_indices: Collection[int],
-        closest_branching_index_in_molecule: Collection[int],
         binding_indices: Collection[int],
         original_indices: Collection[int],
         persistent_non_metal_bridged: Optional[Collection[int]] = None,
@@ -115,22 +109,9 @@ def __init__(  # pylint:disable=too-many-arguments
             center (np.ndarray): Center of the SBU.
             graph_branching_indices (Collection[int]): Branching indices
                 in the original structure.
-            closest_branching_index_in_molecule (Collection[int]):1
-                Closest branching index in the molecule.
             binding_indices (Collection[int]): Binding indices in the original structure.
             original_indices (Collection[int]): List of all indicies in the original
                 structure this SBU corresponds to.
-            persistent_non_metal_bridged (Optional[Collection[int]], optional):
-                components that are connected via a bridge both in the MOF structure
-                and building block molecule. No metal is part of the edge, i.e.,
-                bound solvents are not included in this set. Defaults to None.
-            terminal_in_mol_not_terminal_in_struct (Optional[Collection[int]], optional):
-                Tndices that are terminal in the molecule but not terminal in the structure.
-                Defaults to None.
-            connecting_paths (Optional[Collection[int]], optional):
-                Paths between node atoms and branching atoms. Defaults to None.
-            coordinates (Optional[np.ndarray], optional): Coordinates of all atoms in the molecule.
-                Defaults to None.
             molecule_original_indices_mapping (Optional[Dict[int, List[int]]], optional):
                 Mapping from molecule indices to original indices. Defaults to None.
         """
@@ -139,10 +120,7 @@ def __init__(  # pylint:disable=too-many-arguments
         self._original_indices = original_indices
         self.molecule_graph = molecule_graph
         self._original_graph_branching_indices = graph_branching_indices
-        self._original_closest_branching_index_in_molecule = closest_branching_index_in_molecule
 
-        self._persistent_non_metal_bridged = persistent_non_metal_bridged
-        self._terminal_in_mol_not_terminal_in_struct = terminal_in_mol_not_terminal_in_struct
         self._original_binding_indices = binding_indices
 
         self.mapping_from_original_indices = defaultdict(list)
@@ -158,15 +136,6 @@ def __init__(  # pylint:disable=too-many-arguments
             for v in value:
                 self.mapping_to_original_indices[v] = key
         self._indices = original_indices
-        self._original_connecting_paths = connecting_paths
-        self.connecting_paths = []
-        self._coordinates = coordinates
-        for i in connecting_paths:
-            try:
-                for index in self.mapping_from_original_indices[i]:
-                    self.connecting_paths.append(index)
-            except KeyError:
-                pass
 
     @property
     def center(self):
@@ -255,10 +224,7 @@ def composition(self):
 
     @property
     def cart_coords(self):
-        # if self._coordinates is not None:
-        #    return self._coordinates
         return self.molecule.cart_coords
-        # return np.array(self._coordinates)
 
     @cached_property
     def mol_with_coords(self):
@@ -323,16 +289,6 @@ def __eq__(self, other: "SBU") -> bool:
     def branching_coords(self):
         return self.cart_coords[self.graph_branching_indices]
 
-    @cached_property
-    def connecting_indices(self):
-        indices = []
-
-        for p in self._original_closest_branching_index_in_molecule:
-            for index in self.mapping_from_original_indices[p]:
-                indices.append(index)
-
-        return indices
-
     @property
     def original_binding_indices(self):
         return self._original_binding_indices
@@ -359,17 +315,17 @@ def get_openbabel_mol(self):
         return pm
 
     def show_molecule(self):
-        import nglview  # pylint:disable=import-outside-toplevel
+        import nglview
 
         return nglview.show_pymatgen(self.molecule)
 
     def show_connecting_structure(self):
-        import nglview  # pylint:disable=import-outside-toplevel
+        import nglview
 
         return nglview.show_pymatgen(self._get_connected_sites_structure())
 
     def show_binding_structure(self):
-        import nglview  # pylint:disable=import-outside-toplevel
+        import nglview
 
         return nglview.show_pymatgen(self._get_binding_sites_structure())
 
@@ -394,7 +350,7 @@ def smiles(self) -> str:
         try:
             canonical = Chem.CanonSmiles(smiles)
             return canonical
-        except Exception:  # pylint: disable=broad-except
+        except Exception:
             return smiles
 
     def _get_boxed_structure(self):

From e96f430a3b2ea57c037e0b4af65a18ae8ca0028f Mon Sep 17 00:00:00 2001
From: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Date: Sun, 6 Nov 2022 14:58:48 +0100
Subject: [PATCH 5/9] first refactoring done

---
 src/moffragmentor/fragmentor/linkerlocator.py |  16 +-
 src/moffragmentor/fragmentor/nodelocator.py   |  19 +-
 src/moffragmentor/fragmentor/splitter.py      |   4 +-
 src/moffragmentor/mof.py                      |   4 +
 src/moffragmentor/sbu/node.py                 | 193 ++++++------------
 src/moffragmentor/sbu/sbu.py                  |  49 ++---
 tests/conftest.py                             |  44 ++--
 tests/fragmentor/test_locator.py              |  10 +-
 tests/test_mof.py                             |   9 -
 9 files changed, 133 insertions(+), 215 deletions(-)

diff --git a/src/moffragmentor/fragmentor/linkerlocator.py b/src/moffragmentor/fragmentor/linkerlocator.py
index 2373433..71cdec1 100644
--- a/src/moffragmentor/fragmentor/linkerlocator.py
+++ b/src/moffragmentor/fragmentor/linkerlocator.py
@@ -55,16 +55,16 @@ def _create_linkers_from_node_location_result(  # pylint:disable=too-many-locals
     # First step: remove everything that is node or unbound solvent
     # bound solvent is part of the node by default
     all_node_indices = set()
-    all_persistent_non_metal_bridges = set()
+    # all_persistent_non_metal_bridges = set()
     for node_indices in node_location_result.nodes:
         all_node_indices.update(node_indices)
 
-    for node in node_collection:
-        all_persistent_non_metal_bridges.update(
-            _flatten_list_of_sets(
-                node._persistent_non_metal_bridged  # pylint:disable=protected-access
-            )
-        )
+    # for node in node_collection:
+    #     all_persistent_non_metal_bridges.update(
+    #         _flatten_list_of_sets(
+    #             node._persistent_non_metal_bridged  # pylint:disable=protected-access
+    #         )
+    #     )
 
     not_linker_indices = (
         (
@@ -109,14 +109,12 @@ def _create_linkers_from_node_location_result(  # pylint:disable=too-many-locals
         linker = Linker(
             molecule=mol,
             molecule_graph=graphs[linker_index],
-            center=center,
             graph_branching_indices=branching_indices,
             binding_indices=identify_linker_binding_indices(
                 mof,
                 node_location_result.connecting_paths,
                 idx,
             ),
-            original_indices=idx,
             molecule_original_indices_mapping=mapping,
         )
         frac_center = mof.structure.lattice.get_fractional_coords(mol.center_of_mass)
diff --git a/src/moffragmentor/fragmentor/nodelocator.py b/src/moffragmentor/fragmentor/nodelocator.py
index b7345de..0dd8964 100644
--- a/src/moffragmentor/fragmentor/nodelocator.py
+++ b/src/moffragmentor/fragmentor/nodelocator.py
@@ -184,7 +184,6 @@ def create_node_collection(mof, node_location_result: NodelocationResult) -> Nod
             node_indices=node_indices,
             branching_indices=node_location_result.branching_indices,
             binding_indices=node_location_result.binding_indices,
-            connecting_paths=node_location_result.connecting_paths,
         )
         nodes.append(node)
 
@@ -246,10 +245,10 @@ def check_node(node_indices, branching_indices, mof) -> bool:
         bool: True if the node is reasonable, False otherwise
     """
     # check if there is not way more organic than metal
-    num_carbons = len(node_indices & set(mof.c_indices))
+    num_organic = len(node_indices & set(mof.c_indices)) + len(node_indices & set(mof.n_indices))
     num_metals = len(node_indices & set(mof.metal_indices))
     branching_indices_in_node = branching_indices & node_indices
-    if num_carbons > num_metals + len(branching_indices_in_node):
+    if num_organic > num_metals + len(branching_indices_in_node):
         return False
     return True
 
@@ -307,7 +306,6 @@ def find_nodes(
     node_result = find_node_clusters(
         mof, unbound_solvent.indices, forbidden_indices=forbidden_indices
     )
-
     if create_single_metal_bus:
         # Rewrite the node result
         node_result = create_single_metal_nodes(mof, node_result)
@@ -334,16 +332,22 @@ def might_be_porphyrin(node_indices, branching_idx, mof):
     metal_in_node = _get_metal_sublist(node_indices, mof.metal_indices)
     node_indices = set(node_indices)
     branching_idx = set(branching_idx)
+    bound_to_metal = sum([mof.get_neighbor_indices(i) for i in metal_in_node], [])
+    branching_bound_to_metal = branching_idx & set(bound_to_metal)
     # ToDo: check and think if this can handle the general case
     # it should, at least if we only look at the metals
-    if (len(metal_in_node) == 1) & (len(node_indices) > 1):
+    if (len(metal_in_node) == 1) & (len(node_indices | branching_bound_to_metal) > 1):
         logger.debug(
             "metal_in_node",
             metal_in_node,
             node_indices,
         )
-        num_neighbors = len(mof.get_neighbor_indices(metal_in_node[0]))
-        if metal_and_branching_coplanar(node_indices, branching_idx, mof) & (num_neighbors > 2):
+        num_neighbors = len(bound_to_metal)
+        if (
+            metal_and_branching_coplanar(node_indices, branching_bound_to_metal, mof)
+            & (num_neighbors > 2)
+            & (len(branching_bound_to_metal) < 5)
+        ):
             logger.debug(
                 "Metal in linker found, indices: {}".format(
                     node_indices,
@@ -358,5 +362,6 @@ def detect_porphyrin(node_collection, mof):
     not_node = []
     for i, node in enumerate(node_collection):
         if might_be_porphyrin(node._original_indices, node._original_graph_branching_indices, mof):
+            logger.info("Found porphyrin in node {}".format(node._original_indices))
             not_node.append(i)
     return not_node
diff --git a/src/moffragmentor/fragmentor/splitter.py b/src/moffragmentor/fragmentor/splitter.py
index d09c987..f5e6920 100644
--- a/src/moffragmentor/fragmentor/splitter.py
+++ b/src/moffragmentor/fragmentor/splitter.py
@@ -7,8 +7,8 @@
 from pymatgen.analysis.graphs import MoleculeGraph
 from pymatgen.core import Molecule
 
-from .solventlocator import _locate_bound_solvent
-from ..utils import (
+from moffragmentor.fragmentor.solventlocator import _locate_bound_solvent
+from moffragmentor.utils import (
     _get_cartesian_coords,
     _get_molecule_edge_label,
     _get_vertices_of_smaller_component_upon_edge_break,
diff --git a/src/moffragmentor/mof.py b/src/moffragmentor/mof.py
index f6bd6cb..b334538 100644
--- a/src/moffragmentor/mof.py
+++ b/src/moffragmentor/mof.py
@@ -257,6 +257,10 @@ def h_indices(self) -> List[int]:
     def c_indices(self) -> List[int]:
         return [i for i, species in enumerate(self.structure.species) if str(species) == "C"]
 
+    @cached_property
+    def n_indices(self) -> List[int]:
+        return [i for i, species in enumerate(self.structure.species) if str(species) == "N"]
+
     def get_neighbor_indices(self, site: int) -> List[int]:
         """Get list of indices of neighboring sites."""
         return [site.index for site in self.structure_graph.get_connected_sites(site)]
diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py
index 9ff664e..a55d856 100644
--- a/src/moffragmentor/sbu/node.py
+++ b/src/moffragmentor/sbu/node.py
@@ -9,19 +9,11 @@
 import numpy as np
 from pymatgen.analysis.graphs import MoleculeGraph
 from pymatgen.core import Molecule, Structure
-from structuregraph_helpers.create import get_nx_graph_from_edge_tuples
-from structuregraph_helpers.subgraph import get_subgraphs_as_molecules
 
 from moffragmentor.fragmentor.molfromgraph import wrap_molecule
-from moffragmentor.utils import remove_all_nodes_not_in_indices
-
-from .sbu import SBU
-from ..fragmentor.splitter import _sites_and_classified_indices_from_indices
-from ..utils import (
-    _not_relevant_structure_indices,
-    _reindex_list_of_tuple,
-    get_neighbors_from_nx_graph,
-)
+from moffragmentor.fragmentor.splitter import _sites_and_classified_indices_from_indices
+from moffragmentor.sbu.sbu import SBU
+from moffragmentor.utils import _reindex_list_of_tuple
 
 __all__ = ["Node"]
 
@@ -29,24 +21,56 @@
 _BRANCHING_DUMMY = "Kr"
 
 
+def _build_mol_and_graph(mof, indices, ignore_hidden_indices=False):
+    indices = set(indices)
+    sites_and_indices = _sites_and_classified_indices_from_indices(mof, indices)
+    if not ignore_hidden_indices:
+        relevant_indices = indices - set(sites_and_indices.hidden_vertices)
+    else:
+        relevant_indices = indices
+
+    selected_positions, selected_species, selected_edge_indices = [], [], []
+
+    # give us a quick and easy way to filter the edges we want to keep
+    # for this reason we will use both edge "partners" as keys
+    # and have a list of indices in the original edges list as the value
+    edge_dict = defaultdict(list)
+    for i, egde in enumerate(sites_and_indices.edges):
+        edge_partner_1, edge_partner_2 = egde
+        edge_dict[edge_partner_1].append(i)
+        edge_dict[edge_partner_2].append(i)
+
+    for i, index in enumerate(indices):
+        if index in relevant_indices:
+            selected_positions.append(sites_and_indices.cartesian_coordinates[i])
+            selected_species.append(sites_and_indices.species[i])
+            edge_idxs = edge_dict[index]
+            for edge_idx in edge_idxs:
+                edge = sites_and_indices.edges[edge_idx]
+                if (edge[0] in relevant_indices) and (edge[1] in relevant_indices):
+                    selected_edge_indices.append(edge_idx)
+
+    index_map = dict(zip(relevant_indices, range(len(relevant_indices))))
+    molecule = Molecule(selected_species, selected_positions)
+    selected_edges = [sites_and_indices.edges[i] for i in selected_edge_indices]
+    selected_edges = _reindex_list_of_tuple(selected_edges, index_map)
+    edge_dict = dict(zip(selected_edges, [None] * len(selected_edges)))
+    molecule_graph = MoleculeGraph.with_edges(molecule, edge_dict)
+
+    mol, mapping = wrap_molecule(list(relevant_indices), mof)
+
+    return mol, molecule_graph, mapping
+
+
 def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices, mof):
     # first, create a molecule and molecule graph from those indices
     # that intersect with the node indices
     branching_in_node = [i for i in all_branching_indices if i in node_indices]
     binding_in_node = [i for i in all_binding_indices if i in node_indices]
-    graph_ = mof.structure_graph.__copy__()
-    remove_all_nodes_not_in_indices(
-        graph_, list(node_indices) + branching_in_node + binding_in_node
-    )
-    _mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules(
-        graph_,
-        return_unique=True,
-        filter_in_cell=True,
-        disable_boundary_crossing_check=False,
-        prune_long_edges=True,
+
+    mol, graph, mapping = _build_mol_and_graph(
+        mof, list(node_indices) + branching_in_node + binding_in_node
     )
-    assert len(_mols) == 1
-    mol, mapping = wrap_molecule(idxs[0], mof)
 
     # now, also create one molecule from the node indices, where we replace
     # binding and branching indices with dummy atoms
@@ -58,125 +82,46 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices,
     branching_to_binding = set(binding_neighbors).intersection(all_branching_indices)
 
     # Now, make a copy of the structure and replace the indices with dummy atoms
-    dummy_structure = Structure.from_sites(mof.sites)
+    dummy_structure = Structure.from_sites(mof.structure.sites)
     for i in binding_to_node_metal:
         dummy_structure.replace(i, _BINDING_DUMMY)
-    for i in branching_to_binding + branching_to_node_metal:
+    for i in branching_to_binding | branching_to_node_metal:
         dummy_structure.replace(i, _BRANCHING_DUMMY)
 
-    to_delete = [
-        i
-        for i in range(len(dummy_structure))
-        if i
-        not in list(node_indices)
+    mol_without_dummy, graph_w_dummy, mapping_w_dummy = _build_mol_and_graph(
+        mof,  # todo: fix with dummy mof structure
+        list(node_indices)
         + list(binding_to_node_metal)
         + list(branching_to_binding)
-        + list(branching_to_node_metal)
-    ]
-    graph_w_dummy = mof.structure_graph.__copy__()
-    graph_w_dummy.structure = dummy_structure
-    graph_w_dummy.remove_nodes_from(to_delete)
-    (
-        _mols_w_dummy,
-        graphs_w_dummy,
-        idxs_w_dummy,
-        centers_w_dummy,
-        coordinates_with_dummy,
-    ) = get_subgraphs_as_molecules(
-        graph_w_dummy,
-        return_unique=True,
-        filter_in_cell=True,
-        disable_boundary_crossing_check=False,
-        prune_long_edges=True,
+        + list(branching_to_node_metal),
+        ignore_hidden_indices=True,
     )
-    assert len(_mols_w_dummy) == 1
-    mol_w_dummy, mapping_w_dummy = wrap_molecule(idxs_w_dummy[0], mof)
+    return mol, graph, mapping, mol_without_dummy, graph_w_dummy, mapping_w_dummy
 
 
-def _find_node_hidden_indices(node_indices, all_binding_indices, all_branching_indices, mof):
-    # find the binding indices bound to node
-    node_neighbors = sum([mof.get_neighbor_indices(i) for i in node_indices], [])
-    relevant_binding_indices = [i for i in node_neighbors if i in all_binding_indices]
-    relevant_binding_indices_neighbors = sum(
-        [mof.get_neighbor_indices(i) for i in relevant_binding_indices], []
-    )
-    relevant_branching_indices = [
-        i for i in relevant_binding_indices_neighbors if i in all_branching_indices
-    ] + [i for i in all_branching_indices if i in node_neighbors]
-    hidden_indices = relevant_branching_indices + relevant_binding_indices
-    return set(hidden_indices)
-
-
-def node_from_mof_and_indices(  # pylint:disable=too-many-locals, too-many-arguments
-    cls, mof, node_indices, all_branching_indices, all_binding_indices, all_connecting_paths
-):
+def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all_binding_indices):
     """Create a node from a MOF and a list of indices of different types."""
+    node_indices = node_indices
     branching_indices = node_indices & all_branching_indices
     binding_indices = node_indices & all_binding_indices
-    connecting_paths = all_connecting_paths & node_indices
-
-    hidden_indices = _find_node_hidden_indices(
-        node_indices, all_binding_indices, all_branching_indices, mof
-    )
-
-    graph_ = mof.structure_graph.__copy__()
-    graph_.structure = Structure.from_sites(graph_.structure.sites)
-    to_delete = _not_relevant_structure_indices(mof.structure, node_indices)
-    graph_.remove_nodes(to_delete)
-
-    sites_and_indices = _sites_and_classified_indices_from_indices(mof, node_indices)
-    relevant_indices = node_indices - set(sites_and_indices.hidden_vertices)
-
-    selected_positions, selected_species, selected_edge_indices = [], [], []
-
-    # give us a quick and easy way to filter the edges we want to keep
-    # for this reason we will use both edge "partners" as keys
-    # and have a list of indices in the original edges list as the value
-    edge_dict = defaultdict(list)
-    for i, egde in enumerate(sites_and_indices.edges):
-        edge_partner_1, edge_partner_2 = egde
-        edge_dict[edge_partner_1].append(i)
-        edge_dict[edge_partner_2].append(i)
-
-    for i, index in enumerate(node_indices):
-        if index in relevant_indices:
-            selected_positions.append(sites_and_indices.cartesian_coordinates[i])
-            selected_species.append(sites_and_indices.species[i])
-            edge_idxs = edge_dict[index]
-            for edge_idx in edge_idxs:
-                edge = sites_and_indices.edges[edge_idx]
-                if (edge[0] in relevant_indices) and (edge[1] in relevant_indices):
-                    selected_edge_indices.append(edge_idx)
-
-    index_map = dict(zip(relevant_indices, range(len(relevant_indices))))
-    molecule = Molecule(selected_species, selected_positions)
-    selected_edges = [sites_and_indices.edges[i] for i in selected_edge_indices]
-    selected_edges = _reindex_list_of_tuple(selected_edges, index_map)
-    edge_dict = dict(zip(selected_edges, [None] * len(selected_edges)))
-    molecule_graph = MoleculeGraph.with_edges(molecule, edge_dict)
-    center = np.mean(molecule.cart_coords, axis=0)
     graph_branching_indices = branching_indices & node_indices
 
-    idx = [i for i in relevant_indices if i in node_indices]
-
-    # Add the branching indices here?
-    mol_w_hidden, mapping = wrap_molecule(
-        idx + sites_and_indices.hidden_vertices + list(hidden_indices), mof
+    mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy = _extract_and_wrap(
+        node_indices=node_indices,
+        all_branching_indices=all_branching_indices,
+        all_binding_indices=all_binding_indices,
+        mof=mof,
     )
 
-    # perhaps than delete here the hidden ones again to to get the mol without the hidden ones
-
     node = cls(
-        molecule=mol_w_hidden,
-        molecule_graph=molecule_graph,
-        center=center,
+        molecule=mol,
+        molecule_graph=graph,
         graph_branching_indices=graph_branching_indices,
         binding_indices=binding_indices,
-        original_indices=idx + sites_and_indices.hidden_vertices,
-        persistent_non_metal_bridged=sites_and_indices.persistent_non_metal_bridged_components,
-        terminal_in_mol_not_terminal_in_struct=sites_and_indices.hidden_vertices,
-        connecting_paths=connecting_paths,
         molecule_original_indices_mapping=mapping,
+        dummy_molecule=mol_w_dummy,
+        dummy_molecule_graph=graph_w_dummy,
+        dummy_molecule_indices_mapping=mapping_w_dummy,
     )
 
     return node
@@ -195,7 +140,6 @@ def from_mof_and_indices(  # pylint:disable=too-many-arguments
         node_indices: Set[int],
         branching_indices: Set[int],
         binding_indices: Set[int],
-        connecting_paths: Set[int],
     ):
         """Build a node object from a MOF and some intermediate outputs of the fragmentation.
 
@@ -206,8 +150,6 @@ def from_mof_and_indices(  # pylint:disable=too-many-arguments
                 that belong to this node.
             binding_indices (Set[int]): The indices of the binding points in the MOF
                 that belong to this node.
-            connecting_paths (Set[int]): The indices of the connecting paths in the MOF
-                that belong to this node.
 
         Returns:
             A node object.
@@ -218,7 +160,6 @@ def from_mof_and_indices(  # pylint:disable=too-many-arguments
             node_indices,
             all_branching_indices=branching_indices,
             all_binding_indices=binding_indices,
-            all_connecting_paths=connecting_paths,
         )
 
     def __repr__(self) -> str:
diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py
index 652f434..5e82a81 100644
--- a/src/moffragmentor/sbu/sbu.py
+++ b/src/moffragmentor/sbu/sbu.py
@@ -9,15 +9,14 @@
 import pubchempy as pcp
 from backports.cached_property import cached_property
 from loguru import logger
-from openbabel import pybel as pb
 from pymatgen.analysis.graphs import MoleculeGraph
 from pymatgen.core import Molecule, Structure
 from pymatgen.io.babel import BabelMolAdaptor
 from rdkit import Chem
 from scipy.spatial.distance import pdist
 
-from ..utils import pickle_dump
-from ..utils.mol_compare import mcs_rank
+from moffragmentor.utils import pickle_dump
+from moffragmentor.utils.mol_compare import mcs_rank
 
 
 def ob_mol_without_metals(obmol):
@@ -59,16 +58,6 @@ class SBU:
             the branching index and metal
         * original_indices: complete original set of indices that has been selected
             for this building blocks
-        * persistent_non_metal_bridged: components that are connected
-            via a bridge both in the MOF structure
-            and building block molecule. No metal is part of the edge,
-            i.e., bound solvents are not included in this set
-        * terminal_in_mol_not_terminal_in_struct: indices that are terminal
-            in the molecule but not terminal in the structure.
-            This is for instance, the case for carboxy groups that are only
-            coordinated with one O. In this case, a chemically more faithful
-            representation might be to not include the C of the carboxy
-            in the node. This collection allows us to do so.
 
     .. note::
 
@@ -89,15 +78,12 @@ def __init__(
         self,
         molecule: Molecule,
         molecule_graph: MoleculeGraph,
-        center: np.ndarray,
         graph_branching_indices: Collection[int],
         binding_indices: Collection[int],
-        original_indices: Collection[int],
-        persistent_non_metal_bridged: Optional[Collection[int]] = None,
-        terminal_in_mol_not_terminal_in_struct: Optional[Collection[int]] = None,
-        connecting_paths: Optional[Collection[int]] = None,
-        coordinates: Optional[np.ndarray] = None,
         molecule_original_indices_mapping: Optional[Dict[int, List[int]]] = None,
+        dummy_molecule: Optional[Molecule] = None,
+        dummy_molecule_graph: Optional[MoleculeGraph] = None,
+        dummy_molecule_indices_mapping: Optional[Dict[int, List[int]]] = None,
     ):
         """Initialize a secondary building block.
 
@@ -106,26 +92,27 @@ def __init__(
         Args:
             molecule (Molecule): Pymatgen molecule object.
             molecule_graph (MoleculeGraph): Pymatgen molecule graph object.
-            center (np.ndarray): Center of the SBU.
             graph_branching_indices (Collection[int]): Branching indices
                 in the original structure.
             binding_indices (Collection[int]): Binding indices in the original structure.
-            original_indices (Collection[int]): List of all indicies in the original
-                structure this SBU corresponds to.
             molecule_original_indices_mapping (Optional[Dict[int, List[int]]], optional):
                 Mapping from molecule indices to original indices. Defaults to None.
         """
         self.molecule = molecule
-        self._center = center
-        self._original_indices = original_indices
+        self._mapping = molecule_original_indices_mapping
+        self._indices = sum(list(molecule_original_indices_mapping.values()), [])
+        self._original_indices = self._indices
         self.molecule_graph = molecule_graph
         self._original_graph_branching_indices = graph_branching_indices
-
         self._original_binding_indices = binding_indices
 
+        self._dummy_molecule = dummy_molecule
+        self._dummy_molecule_graph = dummy_molecule_graph
+        self._dummy_molecule_indices_mapping = dummy_molecule_indices_mapping
+
         self.mapping_from_original_indices = defaultdict(list)
         if molecule_original_indices_mapping is None:
-            for ori_index, index in zip(self._original_indices, range(len(molecule))):
+            for ori_index, index in zip(self._indices, range(len(molecule))):
                 self.mapping_from_original_indices[ori_index].append(index)
         else:
             for k, v in molecule_original_indices_mapping.items():
@@ -135,7 +122,6 @@ def __init__(
         for key, value in self.mapping_from_original_indices.items():
             for v in value:
                 self.mapping_to_original_indices[v] = key
-        self._indices = original_indices
 
     @property
     def center(self):
@@ -310,6 +296,8 @@ def openbabel_mol(self):
         return self.get_openbabel_mol()
 
     def get_openbabel_mol(self):
+        from openbabel import pybel as pb
+
         a = BabelMolAdaptor(self.molecule)
         pm = pb.Molecule(a.openbabel_mol)
         return pm
@@ -363,13 +351,6 @@ def _get_boxed_structure(self):
         )
         return structure
 
-    def _get_connected_sites_structure(self):
-        sites = []
-        s = self._get_boxed_structure()
-        for i in self.connecting_indices:
-            sites.append(s[i])
-        return Structure.from_sites(sites)
-
     def _get_binding_sites_structure(self):
         sites = []
         s = self._get_boxed_structure()
diff --git a/tests/conftest.py b/tests/conftest.py
index 861403f..d35541d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1670,31 +1670,28 @@ def get_linker_object():
     linker = Linker(
         mol,
         mg,
-        center=np.array([-502.5498308, -406.76009494, -502.5498308]),
         graph_branching_indices={8, 13, 17},
-        closest_branching_index_in_molecule={1, 37, 46},
         binding_indices=[129, 137, 108, 141, 120, 152],
-        original_indices=[
-            96,
-            129,
-            23,
-            152,
-            60,
-            137,
-            105,
-            0,
-            1,
-            2,
-            108,
-            141,
-            69,
-            37,
-            14,
-            84,
-            120,
-            4,
-        ],
-        connecting_paths=[],
+        molecule_original_indices_mapping={
+            0: [96],
+            1: [129],
+            2: [23],
+            3: [152],
+            4: [60],
+            5: [137],
+            6: [105],
+            7: [0],
+            8: [1],
+            9: [2],
+            10: [108],
+            11: [141],
+            12: [69],
+            13: [37],
+            14: [14],
+            15: [84],
+            16: [120],
+            17: [46],
+        },
     )
     return linker
 
@@ -1707,7 +1704,6 @@ def get_node_object():
         node_indices={32, 1, 132, 134, 72, 137, 74, 139, 108, 110, 113, 115, 27, 30},
         branching_indices={1, 27, 30, 32},
         binding_indices={108, 110, 113, 115, 132, 134, 137, 139},
-        connecting_paths={1, 27, 30, 32, 108, 110, 113, 115, 132, 134, 137, 139},
     )
 
     return node
diff --git a/tests/fragmentor/test_locator.py b/tests/fragmentor/test_locator.py
index 97efd28..3a507fc 100644
--- a/tests/fragmentor/test_locator.py
+++ b/tests/fragmentor/test_locator.py
@@ -86,13 +86,15 @@ def test_find_rod_node_floating_mof_cluster(get_1d_node_with_floating):
     assert node_lengths[0] == 20
 
 
-def test_formatte_mof(get_formate_structure_and_graph):
+def test_formate_mof(get_formate_structure_and_graph):
     s, sg = get_formate_structure_and_graph
     mof = MOF(s, sg)
     bbs = mof.fragment()
-    assert len(bbs.nodes) == 1
-    assert len(bbs.capping_molecules) == 6
-    assert bbs.capping_molecules.composition == {"C1 H1 O2": 6}
+    assert len(bbs.nodes) == 2
+    assert (
+        len(bbs.capping_molecules) == 0
+    )  # since there is no linker, we assign the formate as capping molecule
+    assert bbs.linkers.composition == {"C1 H1 O2": 6}
 
 
 def test__get_solvent_molecules_bound_to_node(get_li_mof_with_floating):
diff --git a/tests/test_mof.py b/tests/test_mof.py
index f9f90fc..207899b 100644
--- a/tests/test_mof.py
+++ b/tests/test_mof.py
@@ -76,15 +76,6 @@ def test_hkust(get_hkust_mof):
     mof = get_hkust_mof
     fragments = mof.fragment()
     assert fragments.net_embedding.rcsr_code == "tbo"
-    for linker in fragments.linkers:
-        cs = linker._get_connected_sites_structure()
-        distances = _get_distances(cs)
-        assert max(distances) < 8
-
-    for node in fragments.nodes:
-        cs = node._get_connected_sites_structure()
-        distances = _get_distances(cs)
-        assert max(distances) < 8
 
 
 @pytest.mark.slow

From 7ff0cc34e607322526005565a75cd6c6501b2ad8 Mon Sep 17 00:00:00 2001
From: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Date: Sun, 6 Nov 2022 16:19:52 +0100
Subject: [PATCH 6/9] dummy seems to work, still need to use it in the net
 construction and for the construction of the inputs for pormake

---
 .flake8                                      | 40 -------------------
 setup.cfg                                    | 42 ++++++++++++++++++++
 src/moffragmentor/fragmentor/molfromgraph.py |  8 +++-
 src/moffragmentor/mof.py                     |  3 ++
 src/moffragmentor/sbu/node.py                | 39 +++++++++++++-----
 src/moffragmentor/sbu/sbu.py                 |  2 +
 6 files changed, 83 insertions(+), 51 deletions(-)
 delete mode 100644 .flake8

diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 26860e5..0000000
--- a/.flake8
+++ /dev/null
@@ -1,40 +0,0 @@
-#########################
-# Flake8 Configuration  #
-# (.flake8)             #
-#########################
-[flake8]
-ignore =
-    S301 # pickle
-    S403 # pickle
-    S404
-    S603
-    W503 # Line break before binary operator (flake8 is wrong)
-    E203  # whitespace before ':'
-    S101 # Complaining about assert statements
-    D101 # Docstring missing
-    D102 # Docstring missing
-    D103 # Docstring missing
-    D104 # Docstring missing
-    D400
-exclude =
-    .tox,
-    .git,
-    __pycache__,
-    docs/source/conf.py,
-    build,
-    dist,
-    tests/fixtures/*,
-    *.pyc,
-    *.egg-info,
-    .cache,
-    .eggs,
-    data
-max-line-length = 120
-max-complexity = 20
-import-order-style = pycharm
-application-import-names =
-    moffragmentor
-    tests
-format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s
-per-file-ignores =
-    tests/*/*.py:DAR101, D205, D100, DAR101, DAR201, D209
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 9187a96..00a1a5b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -142,3 +142,45 @@ exclude_lines =
 [darglint]
 docstring_style = google
 strictness = short
+
+
+#########################
+# Flake8 Configuration  #
+# (.flake8)             #
+#########################
+[flake8]
+ignore =
+    S301 # pickle
+    S403 # pickle
+    S404
+    S603
+    W503 # Line break before binary operator (flake8 is wrong)
+    E203  # whitespace before ':'
+    S101 # Complaining about assert statements
+    D101 # Docstring missing
+    D102 # Docstring missing
+    D103 # Docstring missing
+    D104 # Docstring missing
+    D400
+exclude =
+    .tox,
+    .git,
+    __pycache__,
+    docs/source/conf.py,
+    build,
+    dist,
+    tests/fixtures/*,
+    *.pyc,
+    *.egg-info,
+    .cache,
+    .eggs,
+    data
+max-line-length = 120
+max-complexity = 20
+import-order-style = pycharm
+application-import-names =
+    moffragmentor
+    tests
+format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s
+per-file-ignores =
+    tests/*/*.py:DAR101, D205, D100, DAR101, DAR201, D209
\ No newline at end of file
diff --git a/src/moffragmentor/fragmentor/molfromgraph.py b/src/moffragmentor/fragmentor/molfromgraph.py
index b354ee8..978bd1a 100644
--- a/src/moffragmentor/fragmentor/molfromgraph.py
+++ b/src/moffragmentor/fragmentor/molfromgraph.py
@@ -12,7 +12,10 @@
 
 
 def wrap_molecule(
-    mol_idxs: Iterable[int], mof: "mof.MOF", starting_index: Optional[int] = None  # noqa: F821
+    mol_idxs: Iterable[int],
+    mof: "mof.MOF",
+    starting_index: Optional[int] = None,
+    add_additional_site: bool = True,  # noqa: F821
 ) -> Molecule:
     """Wrap a molecule in the cell of the MOF by walking along the structure graph.
 
@@ -30,6 +33,7 @@ def wrap_molecule(
         mof (MOF): MOF object that contains the mol_idxs.
         starting_index (int, optional): Starting index for the walk.
             Defaults to 0.
+        add_additional_site (bool, optional): Whether to add an additional site
 
     Returns:
         Molecule: wrapped molecule
@@ -76,7 +80,7 @@ def wrap_molecule(
                                 - new_positions_cart[current_index]
                             )
                             > VestaCutoffDictNN._lookup_dict[species_a][species_b]
-                        ):
+                        ) & add_additional_site:
                             logger.warning(
                                 "Warning: neighbor_index {} is already in new_positions_cart, "
                                 "but the distance is too large. "
diff --git a/src/moffragmentor/mof.py b/src/moffragmentor/mof.py
index b334538..d2bff13 100644
--- a/src/moffragmentor/mof.py
+++ b/src/moffragmentor/mof.py
@@ -71,6 +71,9 @@ def _reset(self):
         self._bridges = None
         self._nx_graph = None
 
+    def __copy__(self):
+        return MOF(IStructure.from_sites(self._structure.sites), self.structure_graph.__copy__())
+
     def dump(self, path) -> None:
         """Dump this object as pickle file"""
         pickle_dump(self, path)
diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py
index a55d856..ed80893 100644
--- a/src/moffragmentor/sbu/node.py
+++ b/src/moffragmentor/sbu/node.py
@@ -6,9 +6,8 @@
 from collections import defaultdict
 from typing import Set
 
-import numpy as np
 from pymatgen.analysis.graphs import MoleculeGraph
-from pymatgen.core import Molecule, Structure
+from pymatgen.core import Molecule, Site, Structure
 
 from moffragmentor.fragmentor.molfromgraph import wrap_molecule
 from moffragmentor.fragmentor.splitter import _sites_and_classified_indices_from_indices
@@ -21,10 +20,10 @@
 _BRANCHING_DUMMY = "Kr"
 
 
-def _build_mol_and_graph(mof, indices, ignore_hidden_indices=False):
+def _build_mol_and_graph(mof, indices, ignore_hidden_indices=True, add_additional_site=True):
     indices = set(indices)
     sites_and_indices = _sites_and_classified_indices_from_indices(mof, indices)
-    if not ignore_hidden_indices:
+    if ignore_hidden_indices:
         relevant_indices = indices - set(sites_and_indices.hidden_vertices)
     else:
         relevant_indices = indices
@@ -51,13 +50,16 @@ def _build_mol_and_graph(mof, indices, ignore_hidden_indices=False):
                     selected_edge_indices.append(edge_idx)
 
     index_map = dict(zip(relevant_indices, range(len(relevant_indices))))
+
     molecule = Molecule(selected_species, selected_positions)
     selected_edges = [sites_and_indices.edges[i] for i in selected_edge_indices]
     selected_edges = _reindex_list_of_tuple(selected_edges, index_map)
     edge_dict = dict(zip(selected_edges, [None] * len(selected_edges)))
     molecule_graph = MoleculeGraph.with_edges(molecule, edge_dict)
 
-    mol, mapping = wrap_molecule(list(relevant_indices), mof)
+    mol, mapping = wrap_molecule(
+        list(relevant_indices), mof, add_additional_site=add_additional_site
+    )
 
     return mol, molecule_graph, mapping
 
@@ -88,15 +90,34 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices,
     for i in branching_to_binding | branching_to_node_metal:
         dummy_structure.replace(i, _BRANCHING_DUMMY)
 
-    mol_without_dummy, graph_w_dummy, mapping_w_dummy = _build_mol_and_graph(
-        mof,  # todo: fix with dummy mof structure
+    mol_w_dummy, graph_w_dummy, mapping_w_dummy = _build_mol_and_graph(
+        mof,
         list(node_indices)
         + list(binding_to_node_metal)
         + list(branching_to_binding)
         + list(branching_to_node_metal),
-        ignore_hidden_indices=True,
+        ignore_hidden_indices=False,
+        add_additional_site=False,
     )
-    return mol, graph, mapping, mol_without_dummy, graph_w_dummy, mapping_w_dummy
+
+    inverse_mapping = {v[0]: k for k, v in mapping_w_dummy.items()}
+    # let's replace here now the atoms with the dummys as doing it beforehand might cause issues
+    # (e.g. we do not have the distances for a cutoffdict)
+
+    for i in branching_to_binding | branching_to_node_metal:
+        if i not in node_indices & set(mof.metal_indices):
+            mol_w_dummy._sites[inverse_mapping[i]] = Site(
+                _BRANCHING_DUMMY, mol_w_dummy._sites[inverse_mapping[i]].coords
+            )
+
+    for i in binding_to_node_metal:
+        mol_w_dummy._sites[inverse_mapping[i]] = Site(
+            _BINDING_DUMMY, mol_w_dummy._sites[inverse_mapping[i]].coords
+        )
+
+    graph_w_dummy.molecule = mol_w_dummy
+
+    return mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy
 
 
 def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all_binding_indices):
diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py
index 5e82a81..53624c8 100644
--- a/src/moffragmentor/sbu/sbu.py
+++ b/src/moffragmentor/sbu/sbu.py
@@ -100,7 +100,9 @@ def __init__(
         """
         self.molecule = molecule
         self._mapping = molecule_original_indices_mapping
+
         self._indices = sum(list(molecule_original_indices_mapping.values()), [])
+
         self._original_indices = self._indices
         self.molecule_graph = molecule_graph
         self._original_graph_branching_indices = graph_branching_indices

From c6cf0f730b431634019b62ce5264fb697edd2818 Mon Sep 17 00:00:00 2001
From: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Date: Mon, 7 Nov 2022 09:53:02 +0100
Subject: [PATCH 7/9] remove the format

---
 setup.cfg                     | 1 -
 src/moffragmentor/sbu/node.py | 8 ++++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 00a1a5b..dca614c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -181,6 +181,5 @@ import-order-style = pycharm
 application-import-names =
     moffragmentor
     tests
-format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s
 per-file-ignores =
     tests/*/*.py:DAR101, D205, D100, DAR101, DAR201, D209
\ No newline at end of file
diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py
index ed80893..da3193c 100644
--- a/src/moffragmentor/sbu/node.py
+++ b/src/moffragmentor/sbu/node.py
@@ -107,12 +107,16 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices,
     for i in branching_to_binding | branching_to_node_metal:
         if i not in node_indices & set(mof.metal_indices):
             mol_w_dummy._sites[inverse_mapping[i]] = Site(
-                _BRANCHING_DUMMY, mol_w_dummy._sites[inverse_mapping[i]].coords
+                _BRANCHING_DUMMY,
+                mol_w_dummy._sites[inverse_mapping[i]].coords,
+                properties={"original_species": str(mol_w_dummy._sites[inverse_mapping[i]].specie)},
             )
 
     for i in binding_to_node_metal:
         mol_w_dummy._sites[inverse_mapping[i]] = Site(
-            _BINDING_DUMMY, mol_w_dummy._sites[inverse_mapping[i]].coords
+            _BINDING_DUMMY,
+            mol_w_dummy._sites[inverse_mapping[i]].coords,
+            properties={"original_species": str(mol_w_dummy._sites[inverse_mapping[i]].specie)},
         )
 
     graph_w_dummy.molecule = mol_w_dummy

From a20e15b3004049fc4fc8e0d2afbc95d741d8af33 Mon Sep 17 00:00:00 2001
From: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Date: Mon, 7 Nov 2022 18:13:47 +0100
Subject: [PATCH 8/9] chore: leave the small unitcell fix for later

---
 src/moffragmentor/fragmentor/linkerlocator.py | 13 +++----
 src/moffragmentor/fragmentor/molfromgraph.py  |  2 +-
 src/moffragmentor/mof.py                      |  1 +
 src/moffragmentor/net.py                      | 38 +++++++++++++++++-
 src/moffragmentor/sbu/node.py                 | 23 +++++++++--
 src/moffragmentor/sbu/sbu.py                  | 39 +++++++++++++++++--
 6 files changed, 98 insertions(+), 18 deletions(-)

diff --git a/src/moffragmentor/fragmentor/linkerlocator.py b/src/moffragmentor/fragmentor/linkerlocator.py
index 71cdec1..d6d45cd 100644
--- a/src/moffragmentor/fragmentor/linkerlocator.py
+++ b/src/moffragmentor/fragmentor/linkerlocator.py
@@ -6,9 +6,8 @@
 from pymatgen.core import Structure
 from structuregraph_helpers.subgraph import get_subgraphs_as_molecules
 
-from .molfromgraph import wrap_molecule
-from ..sbu import Linker, LinkerCollection
-from ..utils import _flatten_list_of_sets
+from moffragmentor.fragmentor.molfromgraph import wrap_molecule
+from moffragmentor.sbu import Linker, LinkerCollection
 
 __all__ = ["create_linker_collection", "identify_linker_binding_indices"]
 
@@ -87,7 +86,7 @@ def _create_linkers_from_node_location_result(  # pylint:disable=too-many-locals
 
     # Second step: extract the connected components
     # return all as molecules
-    _mols, graphs, idxs, centers, coordinates = get_subgraphs_as_molecules(
+    _mols, graphs, idxs, _centers, coordinates = get_subgraphs_as_molecules(
         graph_,
         return_unique=False,
         filter_in_cell=False,
@@ -97,13 +96,11 @@ def _create_linkers_from_node_location_result(  # pylint:disable=too-many-locals
 
     # Third: pick those molecules that are closest to the UC
     # ToDo: we should be able to skip this
-    linker_indices, coords = _pick_central_linker_indices(mof, coordinates)
+    linker_indices, _coords = _pick_central_linker_indices(mof, coordinates)
     found_frac_centers = set()
     found_hashes = set()
-    for i, linker_index in enumerate(linker_indices):
+    for linker_index in linker_indices:
         idx = idxs[linker_index]
-        center = centers[linker_index]
-        coords_ = coords[i]
         branching_indices = node_location_result.branching_indices & set(idx)
         mol, mapping = wrap_molecule(idx, mof)
         linker = Linker(
diff --git a/src/moffragmentor/fragmentor/molfromgraph.py b/src/moffragmentor/fragmentor/molfromgraph.py
index 978bd1a..b29a7d1 100644
--- a/src/moffragmentor/fragmentor/molfromgraph.py
+++ b/src/moffragmentor/fragmentor/molfromgraph.py
@@ -33,7 +33,7 @@ def wrap_molecule(
         mof (MOF): MOF object that contains the mol_idxs.
         starting_index (int, optional): Starting index for the walk.
             Defaults to 0.
-        add_additional_site (bool, optional): Whether to add an additional site
+        add_additional_site (bool): Whether to add an additional site
 
     Returns:
         Molecule: wrapped molecule
diff --git a/src/moffragmentor/mof.py b/src/moffragmentor/mof.py
index d2bff13..b249ffd 100644
--- a/src/moffragmentor/mof.py
+++ b/src/moffragmentor/mof.py
@@ -72,6 +72,7 @@ def _reset(self):
         self._nx_graph = None
 
     def __copy__(self):
+        """Make a a new MOF object with copies of the same structure and structure graph."""
         return MOF(IStructure.from_sites(self._structure.sites), self.structure_graph.__copy__())
 
     def dump(self, path) -> None:
diff --git a/src/moffragmentor/net.py b/src/moffragmentor/net.py
index f756d53..4a0482e 100644
--- a/src/moffragmentor/net.py
+++ b/src/moffragmentor/net.py
@@ -54,6 +54,35 @@ def is_3d_parallel(v1: np.array, v2: np.array, eps: float = 1e-5) -> bool:
     return np.allclose(cp, np.zeros(3), atol=eps)
 
 
+def get_image_if_on_edge(frac_coords):
+    # exactly one --> return the -1 image
+    images = np.zeros(3, dtype=int)
+    is_exactly_one = np.argwhere(np.abs(frac_coords - 1) < 1e-4)
+    if is_exactly_one.size > 0:
+        images[is_exactly_one] = -1
+    is_exactly_zero = np.argwhere(np.abs(frac_coords) < 1e-4)
+    if is_exactly_zero.size > 0:
+        images[is_exactly_zero] = 1
+    return images
+
+
+def sanitize_graph(structure_graph):
+    # if there is a node with only one edge, it is probably because the binding partner is at an edge or corner of the structure
+    # that is, at least one fractional coordinate is 0 or 1
+    # in this case, we add a bond to the image of this site
+    new_edges = []
+    for i, _ in enumerate(structure_graph.structure):
+        connected_sites = structure_graph.get_connected_sites(i)
+        if len(connected_sites) == 1:
+            frac_coords = connected_sites[0].site.frac_coords
+            images = get_image_if_on_edge(frac_coords)
+            if np.any(images != 0):
+                new_edges.append((i, connected_sites[0].index, images))
+
+    for edge in new_edges:
+        structure_graph.add_edge(edge[0], edge[1], to_jimage=edge[2])
+
+
 class VoltageEdge:
     """Representation of an edge in a labeled quotient graph (LQG).
 
@@ -268,12 +297,17 @@ def get_dummy_structure(self) -> Structure:
             lattice=self.lattice, species=symbols, coords=coords, coords_are_cartesian=True
         )
 
-    def get_pmg_structure_graph(self, simplify: bool = True) -> StructureGraph:
+    def get_pmg_structure_graph(
+        self, simplify: bool = True, sanitize: bool = False
+    ) -> StructureGraph:
         """Return a StructureGraph object from the Net object
 
         Args:
             simplify (bool): Whether to simplify the graph. Defaults to True.
                 If True, 2c vertices are removed.
+            sanitize (bool): Whether to sanitize the graph. Defaults to False.
+                If True, add edge to periodic image for sites with only one neighbor
+                if this neighbor is at the egde of the unit cell.
 
         Returns:
             StructureGraph: The StructureGraph object.
@@ -292,6 +326,8 @@ def get_pmg_structure_graph(self, simplify: bool = True) -> StructureGraph:
             ] = None
 
         structure_graph = StructureGraph.with_edges(s, edge_dict)
+        if sanitize:
+            sanitize_graph(structure_graph)
         if simplify:
             structure_graph = _simplify_structure_graph(structure_graph)
         return structure_graph
diff --git a/src/moffragmentor/sbu/node.py b/src/moffragmentor/sbu/node.py
index da3193c..ee12f95 100644
--- a/src/moffragmentor/sbu/node.py
+++ b/src/moffragmentor/sbu/node.py
@@ -79,7 +79,13 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices,
     node_metal_atoms = [i for i in node_indices if i in mof.metal_indices]
     binding_to_node_metal = set(sum([mof.get_neighbor_indices(i) for i in node_metal_atoms], []))
     binding_to_node_metal = binding_to_node_metal.intersection(all_binding_indices)
-    branching_to_node_metal = set(sum([mof.get_neighbor_indices(i) for i in node_metal_atoms], []))
+
+    branching_to_node_metal = set(
+        sum(
+            [mof.get_neighbor_indices(i) for i in node_metal_atoms if i in all_branching_indices],
+            [],
+        )
+    )
     binding_neighbors = sum([mof.get_neighbor_indices(i) for i in binding_to_node_metal], [])
     branching_to_binding = set(binding_neighbors).intersection(all_branching_indices)
 
@@ -89,7 +95,7 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices,
         dummy_structure.replace(i, _BINDING_DUMMY)
     for i in branching_to_binding | branching_to_node_metal:
         dummy_structure.replace(i, _BRANCHING_DUMMY)
-
+    dummy_branching_sites = branching_to_binding | branching_to_node_metal
     mol_w_dummy, graph_w_dummy, mapping_w_dummy = _build_mol_and_graph(
         mof,
         list(node_indices)
@@ -121,7 +127,7 @@ def _extract_and_wrap(node_indices, all_branching_indices, all_binding_indices,
 
     graph_w_dummy.molecule = mol_w_dummy
 
-    return mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy
+    return mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy, dummy_branching_sites
 
 
 def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all_binding_indices):
@@ -131,7 +137,15 @@ def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all
     binding_indices = node_indices & all_binding_indices
     graph_branching_indices = branching_indices & node_indices
 
-    mol, graph, mapping, mol_w_dummy, graph_w_dummy, mapping_w_dummy = _extract_and_wrap(
+    (
+        mol,
+        graph,
+        mapping,
+        mol_w_dummy,
+        graph_w_dummy,
+        mapping_w_dummy,
+        dummy_branching_sites,
+    ) = _extract_and_wrap(
         node_indices=node_indices,
         all_branching_indices=all_branching_indices,
         all_binding_indices=all_binding_indices,
@@ -147,6 +161,7 @@ def node_from_mof_and_indices(cls, mof, node_indices, all_branching_indices, all
         dummy_molecule=mol_w_dummy,
         dummy_molecule_graph=graph_w_dummy,
         dummy_molecule_indices_mapping=mapping_w_dummy,
+        dummy_branching_indices=dummy_branching_sites,
     )
 
     return node
diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py
index 53624c8..6cd9895 100644
--- a/src/moffragmentor/sbu/sbu.py
+++ b/src/moffragmentor/sbu/sbu.py
@@ -67,6 +67,12 @@ class SBU:
 
         To obtain the "original" coordinates, use the `_coordinates` attribute.
 
+    .. note:: Dummy molecules
+
+        In dummy molecules the binding and branching sites are replaces by
+        dummy atoms (noble gas). They also have special properties that indicate
+        the original species.
+
     Examples:
         >>> # visualize the molecule
         >>> sbu_object.show_molecule()
@@ -84,6 +90,7 @@ def __init__(
         dummy_molecule: Optional[Molecule] = None,
         dummy_molecule_graph: Optional[MoleculeGraph] = None,
         dummy_molecule_indices_mapping: Optional[Dict[int, List[int]]] = None,
+        dummy_branching_indices: Optional[Collection[int]] = None,
     ):
         """Initialize a secondary building block.
 
@@ -97,6 +104,13 @@ def __init__(
             binding_indices (Collection[int]): Binding indices in the original structure.
             molecule_original_indices_mapping (Optional[Dict[int, List[int]]], optional):
                 Mapping from molecule indices to original indices. Defaults to None.
+            dummy_molecule (Optional[Molecule], optional): Dummy molecule. Defaults to None.
+            dummy_molecule_graph (Optional[MoleculeGraph], optional): Dummy molecule graph.
+                Defaults to None.
+            dummy_molecule_indices_mapping (Optional[Dict[int, List[int]]], optional):
+                Dummy molecule indices mapping. Defaults to None.
+            dummy_branching_indices (Optional[Collection[int]], optional):
+                Dummy branching indices. Defaults to None.
         """
         self.molecule = molecule
         self._mapping = molecule_original_indices_mapping
@@ -111,6 +125,7 @@ def __init__(
         self._dummy_molecule = dummy_molecule
         self._dummy_molecule_graph = dummy_molecule_graph
         self._dummy_molecule_indices_mapping = dummy_molecule_indices_mapping
+        self._dummy_branching_indices = dummy_branching_indices
 
         self.mapping_from_original_indices = defaultdict(list)
         if molecule_original_indices_mapping is None:
@@ -120,6 +135,17 @@ def __init__(
             for k, v in molecule_original_indices_mapping.items():
                 for i in v:
                     self.mapping_from_original_indices[i].append(k)
+
+        if dummy_molecule:
+            self.dummy_mapping_from_original_indices = defaultdict(list)
+            if dummy_molecule_indices_mapping is None:
+                for ori_index, index in zip(self._indices, range(len(dummy_molecule))):
+                    self.dummy_mapping_from_original_indices[ori_index].append(index)
+            else:
+                for k, v in dummy_molecule_indices_mapping.items():
+                    for i in v:
+                        self.dummy_mapping_from_original_indices[i].append(k)
+
         self.mapping_to_original_indices = {}
         for key, value in self.mapping_from_original_indices.items():
             for v in value:
@@ -232,9 +258,14 @@ def original_graph_branching_indices(self):
     @property
     def graph_branching_indices(self):
         indices = []
-        for i in self.original_graph_branching_indices:
-            for index in self.mapping_from_original_indices[i]:
-                indices.append(index)
+        if self._dummy_branching_indices is None:
+            for i in self.original_graph_branching_indices:
+                for index in self.mapping_from_original_indices[i]:
+                    indices.append(index)
+        else:
+            for i in self._dummy_branching_indices:
+                for index in self.dummy_mapping_from_original_indices[i]:
+                    indices.append(index)
         return indices
 
     @cached_property
@@ -275,7 +306,7 @@ def __eq__(self, other: "SBU") -> bool:
 
     @cached_property
     def branching_coords(self):
-        return self.cart_coords[self.graph_branching_indices]
+        return self.cart_coords[self.graph_branching_indices] if self._dummy_branching_indices is None else self._dummy_molecule.cart_coords[self.graph_branching_indices]
 
     @property
     def original_binding_indices(self):

From 95374a4834232b4e22e2156eb8a96076c0a41427 Mon Sep 17 00:00:00 2001
From: Kevin Maik Jablonka <kevin.jablonka@epfl.ch>
Date: Mon, 7 Nov 2022 18:15:11 +0100
Subject: [PATCH 9/9] chore: remove lint

---
 src/moffragmentor/sbu/sbu.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/moffragmentor/sbu/sbu.py b/src/moffragmentor/sbu/sbu.py
index 6cd9895..1214a9b 100644
--- a/src/moffragmentor/sbu/sbu.py
+++ b/src/moffragmentor/sbu/sbu.py
@@ -306,7 +306,11 @@ def __eq__(self, other: "SBU") -> bool:
 
     @cached_property
     def branching_coords(self):
-        return self.cart_coords[self.graph_branching_indices] if self._dummy_branching_indices is None else self._dummy_molecule.cart_coords[self.graph_branching_indices]
+        return (
+            self.cart_coords[self.graph_branching_indices]
+            if self._dummy_branching_indices is None
+            else self._dummy_molecule.cart_coords[self.graph_branching_indices]
+        )
 
     @property
     def original_binding_indices(self):