Merge pull request #116 from marcelmbn/dev/gp3_ipea

Update `dev/gp3_ipea` to latest state
grimme-lab · Jan 29, 2025 · 8945ab2 · 8945ab2
2 parents 8b82a86 + bd6198f
commit 8945ab2
Show file tree

Hide file tree

Showing 26 changed files with 1,096 additions and 425 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,13 +9,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - to set the elemental composition it is now possible to use dicts with not only int but also the element symbols (str)
 - dict keys for elemental compositions will now always be checked for validity
 - Renamed GP3-xTB to g-xTB
+- Moved constants and (empirical) parameters to the `data` module
+
+### Deprecated
 - Nothing will be printed while multiple molecules are generated in parallel, tqdm-based progress bar instead
 - Some debugging statements from generate had to be removed (esp. w.r.t. early stopping)
 
 ### Added
 - `GXTBConfig` class for the g-xTB method, supporting SCF cycles check
-- support for TURBOMOLE as QM engine.
+- support for TURBOMOLE as QM engine
 - updated the parallelization to work over the number of molecules
+- possibility to generate symmetrical molecules (choice from rotation, inversion, mirroring)
 
 ### Fixed
 - version string is now correctly formatted and printed

diff --git a/environment.yml b/environment.yml
@@ -3,12 +3,22 @@ name: mindlessgen
 channels:
   - conda-forge
 dependencies:
-  - ruff
+  # runtime dependencies
+  - python>=3.12
+  - tqdm
+  - numpy>=2.0.0
+  - toml
+  - networkx
+  # development dependencies
+  - ruff==0.5.7
   - coverage
-  - numpy
   - pre-commit
   - pytest
   - tox
-  - tqdm
+  - mypy
+  - covdefaults
+  - types-toml
+  - types-tqdm
+  - setuptools_scm>=8.0.0
   - pip:
-      - covdefaults
+      - build
diff --git a/mindlessgen.toml b/mindlessgen.toml
@@ -8,7 +8,7 @@
 # > Verbosity level defining the printout: Options: -1 = super-silent, 0 = silent, 1 = default, 2 = verbose, 3 = debug
 verbosity = 1
 # > Number of parallel processes to use. Corresponds to the number of physical CPU cores used. Options: <int>
-parallel = 1
+parallel = 4
 # > Maximum number of generation & optimization try-and-error cycles per molecule. Options: <int>
 max_cycles = 200
 # > Number of molecules to generate. Options: <int>
@@ -17,6 +17,9 @@ num_molecules = 1
 postprocess = false
 # > Switch molecule structure XYZ writing on and off. Default: true. Options: <bool>
 write_xyz = true
+# > Switch generation of symmetrical molecules consisting of substructures on and off. Defaul: false. Options: <bool>
+# > Postprocessing is recommended. Choosing "TURBOMOLE" as postprocessing engine is recommended due to symmetry handling.
+symmetrization = false
 
 [generate]
 # > Minimum number of atoms in the generated molecule. Options: <int>
@@ -59,7 +62,7 @@ hlgap = 0.5
 # > If `debug` is true, the process is terminated after the first (successful or not) refinement step.
 debug = false
 # > Number of cores to be used for geometry optimizations. Single-points will continue to use one core each.
-ncores = 4
+ncores = 2
 
 [postprocess]
 # > Engine for the post-processing part. Options: 'xtb', 'orca', 'turbomole'
@@ -73,7 +76,7 @@ opt_cycles = 5
 # > Note: This option is only relevant if the 'postprocess' option in the 'general' section is set to 'true'.
 debug = false
 # > Number of cores to be used for both single-point calculations and geometry optimizations.
-ncores = 4
+ncores = 2
 
 [xtb]
 # > Path to the xtb executable. The names `xtb` and `xtb_dev` are automatically searched for. Options: <str | Path>
@@ -104,3 +107,9 @@ functional = "PBE"
 basis = "def2-SVP"
 # > Maximum number of SCF cycles: Options: <int>
 scf_cycles = 100
+
+[symmetrization]
+# > Distance of the symmetric sub-structures on the translation axis (x). Options: <float>
+distance = 3.0
+# > Only one symmetry operation at a time can be chosen. Options: <mirror>, <inversion> and <c_<n>_rotation>. For n Options: <int>.
+operation = "mirror"
diff --git a/src/mindlessgen/cli/cli_parser.py b/src/mindlessgen/cli/cli_parser.py
@@ -87,16 +87,11 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
         help="Do not write the molecules to xyz files.",
     )
     parser.add_argument(
-        "--scale-fragment-detection",
-        type=float,
-        required=False,
-        help="Scaling factor for the fragment detection based on the van der Waals radii.",
-    )
-    parser.add_argument(
-        "--scale-minimal-distance",
-        type=float,
+        "--symmetrization",
+        action="store_true",
+        default=None,
         required=False,
-        help="Minimum atom distance scaling factor.",
+        help="Generate symmetric MLM complexes.",
     )
 
     ### Molecule generation arguments ###
@@ -125,6 +120,18 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
         help="Factor with which the coordinate scaling factor is increased "
         + "after a failed attempt.",
     )
+    parser.add_argument(
+        "--scale-fragment-detection",
+        type=float,
+        required=False,
+        help="Scaling factor for the fragment detection based on the van der Waals radii.",
+    )
+    parser.add_argument(
+        "--scale-minimal-distance",
+        type=float,
+        required=False,
+        help="Minimum atom distance scaling factor.",
+    )
     parser.add_argument(
         "--element-composition",
         type=str,
@@ -271,6 +278,22 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
         required=False,
         help="Maximum number of SCF cycles in g-xTB.",
     )
+
+    ### Symmetrization specific arguments ###
+    parser.add_argument(
+        "--symmetrization-distance",
+        type=float,
+        required=False,
+        help="Define the distance of the symmetric fragments.",
+    )
+    parser.add_argument(
+        "--symmetry-operation",
+        type=str,
+        required=False,
+        help="Define the symmetry operation to use.",
+    )
+
+    ### Parse arguments ###
     args = parser.parse_args(argv)
     args_dict = vars(args)
 
@@ -286,6 +309,7 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
         "num_molecules": args_dict["num_molecules"],
         "postprocess": args_dict["postprocess"],
         "write_xyz": args_dict["write_xyz"],
+        "symmetrization": args_dict["symmetrization"],
     }
     # Refinement arguments
     rev_args_dict["refine"] = {
@@ -309,8 +333,10 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
         "fixed_composition": args_dict["fixed_composition"],
     }
     # XTB specific arguments
-    rev_args_dict["xtb"] = {"xtb_path": args_dict["xtb_path"]}
-    rev_args_dict["xtb"]["level"] = args_dict["xtb_level"]
+    rev_args_dict["xtb"] = {
+        "xtb_path": args_dict["xtb_path"],
+        "level": args_dict["xtb_level"],
+    }
     # ORCA specific arguments
     rev_args_dict["orca"] = {
         "orca_path": args_dict["orca_path"],
@@ -331,5 +357,10 @@ def cli_parser(argv: Sequence[str] | None = None) -> dict:
         "opt_cycles": args_dict["postprocess_opt_cycles"],
         "debug": args_dict["postprocess_debug"],
     }
+    # Symmetrization specific arguments
+    rev_args_dict["symmetrization"] = {
+        "distance": args_dict["symmetrization_distance"],
+        "operation": args_dict["symmetry_operation"],
+    }
 
     return rev_args_dict
diff --git a/src/mindlessgen/data/__init__.py b/src/mindlessgen/data/__init__.py
@@ -0,0 +1,24 @@
+"""
+This file is used to import all the constants from the constants.py file and the parameters.py file.
+"""
+
+from mindlessgen.data.constants import (
+    PSE,
+    PSE_NUMBERS,
+    PSE_SYMBOLS,
+    BOHR2AA,
+    AA2BOHR,
+)
+
+from mindlessgen.data.parameters import (
+    MAX_ELEM,
+)
+
+__all__ = [
+    "PSE",
+    "PSE_NUMBERS",
+    "PSE_SYMBOLS",
+    "BOHR2AA",
+    "AA2BOHR",
+    "MAX_ELEM",
+]
diff --git a/src/mindlessgen/data/constants.py b/src/mindlessgen/data/constants.py
@@ -0,0 +1,132 @@
+"""
+This module contains the constans which are used in the project.
+"""
+
+PSE: dict[int, str] = {
+    0: "X",
+    1: "H",
+    2: "He",
+    3: "Li",
+    4: "Be",
+    5: "B",
+    6: "C",
+    7: "N",
+    8: "O",
+    9: "F",
+    10: "Ne",
+    11: "Na",
+    12: "Mg",
+    13: "Al",
+    14: "Si",
+    15: "P",
+    16: "S",
+    17: "Cl",
+    18: "Ar",
+    19: "K",
+    20: "Ca",
+    21: "Sc",
+    22: "Ti",
+    23: "V",
+    24: "Cr",
+    25: "Mn",
+    26: "Fe",
+    27: "Co",
+    28: "Ni",
+    29: "Cu",
+    30: "Zn",
+    31: "Ga",
+    32: "Ge",
+    33: "As",
+    34: "Se",
+    35: "Br",
+    36: "Kr",
+    37: "Rb",
+    38: "Sr",
+    39: "Y",
+    40: "Zr",
+    41: "Nb",
+    42: "Mo",
+    43: "Tc",
+    44: "Ru",
+    45: "Rh",
+    46: "Pd",
+    47: "Ag",
+    48: "Cd",
+    49: "In",
+    50: "Sn",
+    51: "Sb",
+    52: "Te",
+    53: "I",
+    54: "Xe",
+    55: "Cs",
+    56: "Ba",
+    57: "La",
+    58: "Ce",
+    59: "Pr",
+    60: "Nd",
+    61: "Pm",
+    62: "Sm",
+    63: "Eu",
+    64: "Gd",
+    65: "Tb",
+    66: "Dy",
+    67: "Ho",
+    68: "Er",
+    69: "Tm",
+    70: "Yb",
+    71: "Lu",
+    72: "Hf",
+    73: "Ta",
+    74: "W",
+    75: "Re",
+    76: "Os",
+    77: "Ir",
+    78: "Pt",
+    79: "Au",
+    80: "Hg",
+    81: "Tl",
+    82: "Pb",
+    83: "Bi",
+    84: "Po",
+    85: "At",
+    86: "Rn",
+    87: "Fr",
+    88: "Ra",
+    89: "Ac",
+    90: "Th",
+    91: "Pa",
+    92: "U",
+    93: "Np",
+    94: "Pu",
+    95: "Am",
+    96: "Cm",
+    97: "Bk",
+    98: "Cf",
+    99: "Es",
+    100: "Fm",
+    101: "Md",
+    102: "No",
+    103: "Lr",
+    104: "Rf",
+    105: "Db",
+    106: "Sg",
+    107: "Bh",
+    108: "Hs",
+    109: "Mt",
+    110: "Ds",
+    111: "Rg",
+    112: "Cn",
+    113: "Nh",
+    114: "Fl",
+    115: "Mc",
+    116: "Lv",
+    117: "Ts",
+    118: "Og",
+}
+PSE_NUMBERS: dict[str, int] = {k.lower(): v for v, k in PSE.items()}
+PSE_SYMBOLS: dict[int, str] = {v: k.lower() for v, k in PSE.items()}
+
+BOHR2AA = (
+    0.529177210544  # taken from https://physics.nist.gov/cgi-bin/cuu/Value?bohrrada0
+)
+AA2BOHR = 1 / BOHR2AA