Skip to content

Commit

Permalink
fix: in rare cases, not all fragments were connected
Browse files Browse the repository at this point in the history
when more > fragments are present and the first fragment is not
connected in the first while loop cycle then it remained unconnected.

no we also try to avoid HH-interfragment bonds by default
  • Loading branch information
Johannes Steinmetzer committed Aug 9, 2023
1 parent a5f295a commit e5b37c2
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 27 deletions.
26 changes: 26 additions & 0 deletions pysisyphus/geom_library/azobenzene_fragmentation.xyz
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
24
-564.58764035 ,
C -2.06618165 -2.74160982 -1.96941073
C -2.28186249 -3.64676791 -0.92075636
C -0.94776144 -2.30373215 0.66394015
H -2.48998513 -2.89960782 -2.96752643
H -1.92695459 -4.16280963 1.17613845
C 2.30970598 2.43402365 2.08685122
C -0.87543028 -1.59465211 -0.48604750
C -1.75104361 -3.45163322 0.36118106
C -1.28036998 -1.56028325 -1.77776585
H -2.88880011 -4.53559748 -1.10938862
H -0.50888543 -2.09126806 1.64646231
H -1.08730013 -0.80860797 -2.55286599
N -2.74283034 0.98932764 0.05937817
C 2.88528390 2.58951054 -0.29600751
C 1.52181731 2.41931865 -0.69648637
H 1.17407471 2.41791003 -1.73746321
N -2.68085569 1.76558417 -0.71914772
H 0.13640927 2.13072836 2.56039380
H 2.60116497 2.44588559 3.14293335
C 0.81256974 2.28347692 0.44960128
H 4.29761755 2.74235557 1.32141211
H 3.62466495 2.72423025 -1.09318111
C 0.91792617 2.25657695 1.79957857
C 3.24702432 2.59764212 1.05817590
26 changes: 26 additions & 0 deletions pysisyphus/geom_library/azobenzene_fragmentation_2.xyz
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
24
FINAL HEAT OF FORMATION = -564.540856
C -1.846765 -2.167683 -1.871511
C -2.189943 -3.071483 -0.858771
C -0.744746 -1.887502 0.749874
H -2.256765 -2.278973 -2.891462
H -1.963335 -3.633337 1.235395
C 1.872802 2.148807 1.882957
C -0.564383 -1.101935 -0.335393
C -1.698321 -2.921258 0.435219
C -0.907419 -1.102875 -1.646641
H -2.868661 -3.904594 -1.079026
H -0.368620 -1.797854 1.763459
H -0.688935 -0.397496 -2.446781
N -2.413735 1.388891 0.476522
C 2.427143 2.331681 -0.475061
C 1.516126 1.269280 -0.818776
H 1.448220 0.938602 -1.851578
N -1.810728 1.999910 -0.206109
H 0.406566 0.621232 2.459812
H 1.971371 2.501356 2.923278
C 0.975693 0.774447 0.318072
H 3.225511 3.616451 1.060285
H 2.978572 2.821821 -1.298520
C 0.940253 1.079297 1.631813
C 2.560097 2.773216 0.842942
29 changes: 29 additions & 0 deletions pysisyphus/geom_library/na_cube_54_bonds.xyz
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
27

Na -4.39217085 -4.39217085 -4.39217085
Na -4.39217085 -4.39217085 0.00000000
Na -4.39217085 -4.39217085 4.39217085
Na -4.39217085 0.00000000 -4.39217085
Na -4.39217085 0.00000000 0.00000000
Na -4.39217085 0.00000000 4.39217085
Na -4.39217085 4.39217085 -4.39217085
Na -4.39217085 4.39217085 0.00000000
Na -4.39217085 4.39217085 4.39217085
Na 0.00000000 -4.39217085 -4.39217085
Na 0.00000000 -4.39217085 0.00000000
Na 0.00000000 -4.39217085 4.39217085
Na 0.00000000 0.00000000 -4.39217085
Na 0.00000000 0.00000000 0.00000000
Na 0.00000000 0.00000000 4.39217085
Na 0.00000000 4.39217085 -4.39217085
Na 0.00000000 4.39217085 0.00000000
Na 0.00000000 4.39217085 4.39217085
Na 4.39217085 -4.39217085 -4.39217085
Na 4.39217085 -4.39217085 0.00000000
Na 4.39217085 -4.39217085 4.39217085
Na 4.39217085 0.00000000 -4.39217085
Na 4.39217085 0.00000000 0.00000000
Na 4.39217085 0.00000000 4.39217085
Na 4.39217085 4.39217085 -4.39217085
Na 4.39217085 4.39217085 0.00000000
Na 4.39217085 4.39217085 4.39217085
93 changes: 69 additions & 24 deletions pysisyphus/intcoords/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@
# The efficient optimization of molecular geometries using redundant internal
# coordinates
# Bakken, Helgaker, 2002
# [2] https://doi.org/10.1063/1.479510
# Geometry optimization in generalized natural internal coordinates
# von Arnim, Ahlrichs, 1999
# [3] https://doi.org/10.1002/jcc.21494
# The choice of internal coordinates in complex chemical systems
# Nemeth, Challacombe, Van Veenendaal, 2010

from collections import namedtuple
import itertools as it
from typing import Optional
from typing import FrozenSet, Optional, Set
import math

import numpy as np
from scipy.spatial.distance import pdist, squareform
Expand Down Expand Up @@ -277,8 +284,13 @@ def connect_fragments_ahlrichs(
min_dist_scale=1.1,
scale=1.2,
avoid_h=False,
avoid_hh=True,
logger=None,
max_dist=3.0 / BOHR2ANG,
):
"""Ahlrich/von Arnim connection scheme.
See II. A in [2] and p. 2082 in [3]."""
atoms = [atom.lower() for atom in atoms]
if len(fragments) > 1:
log(
Expand All @@ -287,41 +299,54 @@ def connect_fragments_ahlrichs(
)
dist_mat = squareform(cdm)

frag_pairs = list()
# Tracks already connected fragments
connected_fragments: Set[FrozenSet] = set()
# Will contain interfragment atom pairs.
interfrag_inds = list()
max_dist = 3.0 / BOHR2ANG
h_sums = list()
all_fragment_inds = [i for i, _ in enumerate(fragments)]
# Leave out the first fragment, so we can also handle only 1 fragment here.
unconnected_fragment_inds = all_fragment_inds.copy()[1:]
h_inds = set([i for i, atom in enumerate(atoms) if atom.lower() == "h"])
while True:
for i, j in it.product(all_fragment_inds, unconnected_fragment_inds):
# Don't try to connect a fragment to itself and don't try to connect
# fragments two times, e.g., (0 and 2) as well as (2 and 0).
if i >= j:
continue

# In the beginning, all fragments are unconnected
unconnected_fragment_inds = all_fragment_inds.copy()
h_inds = set([i for i, atom in enumerate(atoms) if atom == "h"])
h_mask = np.array([1 if atom == "h" else 0 for atom in atoms])

def get_pairs(unconnected, all_fragments):
candidates = set(
[
frozenset((i, j))
for i, j in it.product(unconnected, all_fragments)
if i != j
]
)
return candidates - connected_fragments

cycle = 0
more_than_one_frag = len(fragments) > 1
# Skip loop when only one fragment is present.
# Loop until all fragments are somehow connected.
while more_than_one_frag and len(unconnected_fragment_inds) > 0:
candidate_pairs = get_pairs(unconnected_fragment_inds, all_fragment_inds)
for i, j in candidate_pairs:
frag1 = fragments[i]
frag2 = fragments[j]
log(
logger,
f"\tConnecting {len(frag1)}-atom and {len(frag2)}-atom fragments.",
)
# Pairs of possible interfragment bonds
inds = np.array(
[(i1, i2) for i1, i2 in it.product(frag1, frag2)], dtype=int
)
distances = np.array([dist_mat[k, l] for k, l in inds])

frag_pairs.append((i, j))
# Determine minimum distance bond
min_ind = distances.argmin()
min_dist = distances[min_ind]

# If we want to avoid interfragment bonds involving hydrogen we check
# if 'min_ind' and the associated 'min_dist' must be updated to involve
# a bond without hydrogen.
# Alternatively, 'no_hh' can also be set to True; this only avoids
# interfragment bonds between two hydrogens.
if avoid_h:
sort_inds = np.argsort(distances, kind="stable")
# Try to avoid interfragment bonds involving hydrogen.
for (k, dist) in zip(sort_inds, distances[sort_inds]):
for k, dist in zip(sort_inds, distances[sort_inds]):
if set(inds[k]) & h_inds:
continue
if dist >= 1.5 * min_dist:
Expand All @@ -330,25 +355,43 @@ def connect_fragments_ahlrichs(
min_dist = distances[k]
break

# If the minimum distances is below the current allowed maximum distance
# we connect the fragments and try to define additional interfragment bonds
# below or equal to 'min_dist + offset'.
if min_dist <= max_dist:
# Scaling of bigger 'min_dist' values by a fixed factor can lead
# to definition of many additional bonds. We restrict the offset
# to at most 1 Å.
offset = min((min_dist_scale - 1.0) * min_dist, 1.0 / BOHR2ANG)
mask = distances <= (min_dist + offset)
interfrag_inds.extend(inds[mask])
cur_interfrag_inds = inds[mask]
cur_h_sums = h_mask[inds[mask]].sum(axis=1)
no_hh = cur_h_sums < 2
# If fragment pairs is connected by any no-HH bond we drop all
# HH-interfragment bonds.
if avoid_hh and any(no_hh):
cur_interfrag_inds = cur_interfrag_inds[no_hh]
cur_h_sums = cur_h_sums[no_hh]
interfrag_inds.extend(cur_interfrag_inds)
h_sums.extend(cur_h_sums)
# Indicate that the just connected fragments don't have to be
# connected anymore. In the current cycle of the while loop additional
# bonds to the just connected fragments can still be defined.
unconnected_fragment_inds = [
k for k in unconnected_fragment_inds if k not in (i, j)
]
# Leave the outer while loop when all fragments are connected
if len(unconnected_fragment_inds) == 0:
break
connected_fragments.add(frozenset((i, j)))
log(
logger,
f"\tMacro cycle {cycle}, connected fragments {i} ({len(frag1)} atom) "
f"and fragment {j} ({len(frag2)} atoms). Min. dist is "
f"{min_dist:.4f} au; current allowed max. dist. is {max_dist:.4f} au.",
)
# If there are still unconnected fragments present we allow longer
# interfragment bonds.
max_dist *= scale
cycle += 1
assert max_dist != math.inf, "Max. dist grew to infinity. Something went wrong!"

interfrag_inds = np.array(interfrag_inds, dtype=int)
return interfrag_inds, list()
Expand Down Expand Up @@ -712,12 +755,14 @@ def keep_coords(prims, prim_cls):
]
# Check for unbonded single atoms and create fragments for them.
bonded_set = set(tuple(np.ravel(bonds)))
# Set of unbonded, single atoms
unbonded_set = set(range(len(atoms))) - bonded_set - freeze_atom_set
log(
logger,
f"Merging bonded atoms yielded {len(fragments)} fragment(s) and "
f"{len(unbonded_set)} atoms.",
)
# Create an additional single atom set for all unbonded single atoms
fragments.extend([frozenset((atom,)) for atom in unbonded_set])

interfrag_bonds = list()
Expand Down
31 changes: 28 additions & 3 deletions tests/test_intcoords/test_intcoords.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
#!/usr/bin/env python3

import numpy as np
import pytest
from pytest import approx
from scipy.spatial.distance import pdist

from pysisyphus.calculators.PySCF import PySCF
from pysisyphus.calculators import XTB
from pysisyphus.helpers import geom_loader
from pysisyphus.intcoords.PrimTypes import PrimTypes
from pysisyphus.intcoords.setup import get_fragments, setup_redundant
from pysisyphus.intcoords.setup import (
connect_fragments_ahlrichs,
get_fragments,
setup_redundant,
)
from pysisyphus.intcoords.valid import check_typed_prims
from pysisyphus.io.zmat import geom_from_zmat_str
from pysisyphus.optimizers.RFOptimizer import RFOptimizer
Expand Down Expand Up @@ -305,3 +308,25 @@ def test_hydrogen_bonds():
def test_hybrid_internals(coord_type, tp_num):
geom = geom_loader("lib:h2o.xyz", coord_type=coord_type)
assert len(geom.internal.typed_prims) == tp_num


@pytest.mark.parametrize(
"fn, ref_nbonds",
(
("lib:SulfateInSolution.xyz", 82),
("lib:azobenzene_fragmentation.xyz", 8),
("lib:azobenzene_fragmentation_2.xyz", 5),
# Artificial 3x3x3 sodium cube resulting in only interfrag bonds
("lib:na_cube_54_bonds.xyz", 54),
),
)
def test_ahlrich_interfragment(fn, ref_nbonds):
geom = geom_loader(fn, coord_type="redund")
cdm = pdist(geom.coords3d)
fragments = geom.internal.fragments

interfrag_bonds, _ = connect_fragments_ahlrichs(
cdm, fragments, geom.atoms, avoid_hh=True
)
nbonds = len(interfrag_bonds)
assert nbonds == ref_nbonds

0 comments on commit e5b37c2

Please sign in to comment.