grimme-lab · lmseidler · Jan 20, 2025 · Jan 14, 2025 · Jan 15, 2025 · Jan 15, 2025
@@ -9,9 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - to set the elemental composition it is now possible to use dicts with not only int but also the element symbols (str)
 - dict keys for elemental compositions will now always be checked for validity
 - Renamed GP3-xTB to g-xTB
+- Nothing will be printed while multiple molecules are generated in parallel, tqdm-based progress bar instead
+- Some debugging statements from generate had to be removed (esp. w.r.t. early stopping)
 
 ### Added
 - `GXTBConfig` class for the g-xTB method, supporting SCF cycles check
+- updated the parallelization to work over the number of molecules
 
 ### Fixed
 - version string is now correctly formatted and printed

@@ -9,5 +9,6 @@ dependencies:
   - pre-commit
   - pytest
   - tox
+  - tqdm
   - pip:
       - covdefaults
@@ -58,6 +58,8 @@ hlgap = 0.5
 # > Debug this step. Leads to more verbose output as soon as the refinement part is reached. Options: <bool>
 # > If `debug` is true, the process is terminated after the first (successful or not) refinement step.
 debug = false
+# > Number of cores to be used for geometry optimizations. Single-points will continue to use one core each.
+ncores = 4
 
 [postprocess]
 # > Engine for the post-processing part. Options: 'xtb', 'orca', 'turbomole'
@@ -70,6 +72,8 @@ opt_cycles = 5
 # > If `debug` is true, the process is terminated after the first (successful or not) post-processing step.
 # > Note: This option is only relevant if the 'postprocess' option in the 'general' section is set to 'true'.
 debug = false
+# > Number of cores to be used for both single-point calculations and geometry optimizations.
+ncores = 4
 
 [xtb]
 # > Path to the xtb executable. The names `xtb` and `xtb_dev` are automatically searched for. Options: <str | Path>

@@ -22,7 +22,7 @@ classifiers = [
     "Topic :: Scientific/Engineering",
     "Typing :: Typed",
 ]
-dependencies = ["numpy", "networkx", "toml"]
+dependencies = ["numpy", "networkx", "toml", "tqdm"]
 dynamic = ["version"]
 
 [project.urls]
@@ -41,6 +41,7 @@ dev = [
     "tox",
     "setuptools_scm>=8",
     "types-toml",
+    "types-tqdm",
 ]
 
 [project.scripts]

@@ -4,11 +4,20 @@
 
 from __future__ import annotations
 
+# Python standard library
 from collections.abc import Callable
+from concurrent.futures import Future, as_completed
 from pathlib import Path
 import multiprocessing as mp
+from threading import Event
 import warnings
+from time import perf_counter
+from datetime import timedelta
 
+# External packages
+from tqdm import tqdm
+
+# Internal modules
 from ..molecules import generate_random_molecule, Molecule
 from ..qm import (
     XTB,
@@ -23,14 +32,13 @@
     get_gxtb_path,
 )
 from ..molecules import iterative_optimization, postprocess_mol
-from ..prog import ConfigManager
-
+from ..prog import ConfigManager, setup_managers, ResourceMonitor, setup_blocks
 from ..__version__ import __version__
 
 MINDLESS_MOLECULES_FILE = "mindless.molecules"
 
 
-def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]:
+def generator(config: ConfigManager) -> tuple[list[Molecule], int]:
     """
     Generate a molecule.
     """
@@ -44,12 +52,14 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]:
     #                __/ |
     #               |___/
 
+    start = perf_counter()
+
     if config.general.verbosity > 0:
         print(header(str(__version__)))
 
     if config.general.print_config:
         print(config)
-        return None, 0
+        return [], 0
 
     # Import and set up required engines
     refine_engine: QMMethod = setup_engines(
@@ -83,95 +93,174 @@ def generator(config: ConfigManager) -> tuple[list[Molecule] | None, int]:
     if config.general.verbosity > 0:
         print(f"Running with {num_cores} cores.")
 
-    # Check if the file "mindless.molecules" exists. If yes, append to it.
+    # Check if the file {MINDLESS_MOLECULES_FILE} exists. If yes, append to it.
     if Path(MINDLESS_MOLECULES_FILE).is_file():
         if config.general.verbosity > 0:
             print(f"\n--- Appending to existing file '{MINDLESS_MOLECULES_FILE}'. ---")
+
     exitcode = 0
     optimized_molecules: list[Molecule] = []
-    for molcount in range(config.general.num_molecules):
-        # print a decent header for each molecule iteration
-        if config.general.verbosity > 0:
-            print(f"\n{'=' * 80}")
-            print(
-                f"{'=' * 22} Generating molecule {molcount + 1:<4} of "
-                + f"{config.general.num_molecules:<4} {'=' * 24}"
-            )
-            print(f"{'=' * 80}")
-        manager = mp.Manager()
-        stop_event = manager.Event()
-        cycles = range(config.general.max_cycles)
-        backup_verbosity: int | None = None
-        if num_cores > 1 and config.general.verbosity > 0:
-            backup_verbosity = (
-                config.general.verbosity
-            )  # Save verbosity level for later
-            config.general.verbosity = 0  # Disable verbosity if parallel
-
-        if config.general.verbosity == 0:
-            print("Cycle... ", end="", flush=True)
-        with mp.Pool(processes=num_cores) as pool:
-            results = pool.starmap(
-                single_molecule_generator,
-                [
-                    (config, refine_engine, postprocess_engine, cycle, stop_event)
-                    for cycle in cycles
-                ],
-            )
-        if config.general.verbosity == 0:
-            print("")
-
-        # Restore verbosity level if it was changed
-        if backup_verbosity is not None:
-            config.general.verbosity = backup_verbosity
 
-        # Filter out None values and return the first successful molecule
-        optimized_molecule: Molecule | None = None
-        for i, result in enumerate(results):
+    # Initialize parallel blocks here
+    blocks = setup_blocks(
+        num_cores,
+        config.general.num_molecules,
+        min(config.refine.ncores, config.postprocess.ncores),
+    )
+    blocks.sort(key=lambda x: x.ncores)
+
+    backup_verbosity: int | None = None
+    if len(blocks) > 1 and config.general.verbosity > 0:
+        backup_verbosity = config.general.verbosity  # Save verbosity level for later
+        config.general.verbosity = 0  # Disable verbosity if parallel
+        # NOTE: basically no messages will be printed if generation is run in parallel
+
+    # Set up parallel blocks environment
+    with setup_managers(num_cores // blocks[0].ncores, num_cores) as (
+        executor,
+        _,
+        resources,
+    ):
+        # The following creates a queue of futures which occupy a certain number of cores each
+        # as defined by each block
+        # Each future represents the generation of one molecule
+        # NOTE: proceeding this way assures that each molecule gets a static number of cores
+        # a dynamic setting would also be thinkable and straightforward to implement
+        tasks: list[Future[Molecule | None]] = []
+        for block in blocks:
+            for _ in range(block.num_molecules):
+                tasks.append(
+                    executor.submit(
+                        single_molecule_generator,
+                        len(tasks),
+                        config,
+                        resources,
+                        refine_engine,
+                        postprocess_engine,
+                        block.ncores,
+                    )
+                )
+
+        # Collect results of all tries to create a molecule
+        for future in tqdm(
+            as_completed(tasks),
+            total=len(tasks),
+            desc="Generating Molecules ...",
+        ):
+            result: Molecule | None = future.result()
             if result is not None:
-                cycles_needed = i + 1
-                optimized_molecule = result
-                break
-
-        if optimized_molecule is None:
-            warnings.warn(
-                "Molecule generation including optimization (and postprocessing) "
-                + f"failed for all cycles for molecule {molcount + 1}."
-            )
-            exitcode = 1
-            continue
-        if config.general.verbosity > 0:
-            print(f"Optimized mindless molecule found in {cycles_needed} cycles.")
-            print(optimized_molecule)
-        if config.general.write_xyz:
-            optimized_molecule.write_xyz_to_file()
-            if config.general.verbosity > 0:
-                print(f"Written molecule file 'mlm_{optimized_molecule.name}.xyz'.\n")
-            with open("mindless.molecules", "a", encoding="utf8") as f:
-                f.write(f"mlm_{optimized_molecule.name}\n")
-        optimized_molecules.append(optimized_molecule)
+                optimized_molecules.append(result)
+            else:
+                exitcode = 1
+
+    # Restore verbosity level if it was changed
+    if backup_verbosity is not None:
+        config.general.verbosity = backup_verbosity
+
+    end = perf_counter()
+    runtime = end - start
+
+    print(f"\nSuccessfully generated {len(optimized_molecules)} molecules:")
+    for optimized_molecule in optimized_molecules:
+        print(optimized_molecule.name)
+
+    time = timedelta(seconds=int(runtime))
+    hours, r = divmod(time.seconds, 3600)
+    minutes, seconds = divmod(r, 60)
+    if time.days:
+        hours += time.days * 24
+
+    print(f"\nRan MindlessGen in {hours:02d}:{minutes:02d}:{seconds:02d} (HH:MM:SS)")
 
     return optimized_molecules, exitcode
 
 
 def single_molecule_generator(
+    molcount: int,
     config: ConfigManager,
+    resources: ResourceMonitor,
     refine_engine: QMMethod,
     postprocess_engine: QMMethod | None,
-    cycle: int,
-    stop_event,
+    ncores: int,
 ) -> Molecule | None:
     """
-    Generate a single molecule.
+    Generate a single molecule (from start to finish).
     """
+
+    # Wait for enough cores (cores freed automatically upon leaving managed context)
+    with resources.occupy_cores(ncores):
+        # print a decent header for each molecule iteration
+        if config.general.verbosity > 0:
+            print(f"\n{'='*80}")
+            print(
+                f"{'='*22} Generating molecule {molcount + 1:<4} of "
+                + f"{config.general.num_molecules:<4} {'='*24}"
+            )
+            print(f"{'='*80}")
+
+        with setup_managers(ncores, ncores) as (executor, manager, resources_local):
+            stop_event = manager.Event()
+            # Launch worker processes to find molecule
+            cycles = range(config.general.max_cycles)
+            tasks: list[Future[Molecule | None]] = []
+            for cycle in cycles:
+                tasks.append(
+                    executor.submit(
+                        single_molecule_step,
+                        config,
+                        resources_local,
+                        refine_engine,
+                        postprocess_engine,
+                        cycle,
+                        stop_event,
+                    )
+                )
+
+            results = [task.result() for task in as_completed(tasks)]
+
+    optimized_molecule: Molecule | None = None
+    for i, result in enumerate(results):
+        if result is not None:
+            cycles_needed = i + 1
+            optimized_molecule = result
+            if config.general.verbosity > 0:
+                print(f"Optimized mindless molecule found in {cycles_needed} cycles.")
+                print(optimized_molecule)
+            break
+
+    # Write out molecule if requested
+    if optimized_molecule is not None and config.general.write_xyz:
+        optimized_molecule.write_xyz_to_file()
+        with open(MINDLESS_MOLECULES_FILE, "a", encoding="utf8") as f:
+            f.write(f"mlm_{optimized_molecule.name}\n")
+        if config.general.verbosity > 0:
+            print(f"Written molecule file 'mlm_{optimized_molecule.name}.xyz'.\n")
+    elif optimized_molecule is None:
+        # TODO: will this conflict with progress bar?
+        warnings.warn(
+            "Molecule generation including optimization (and postprocessing) "
+            + f"failed for all cycles for molecule {molcount + 1}."
+        )
+
+    return optimized_molecule
+
+
+def single_molecule_step(
+    config: ConfigManager,
+    resources_local: ResourceMonitor,
+    refine_engine: QMMethod,
+    postprocess_engine: QMMethod | None,
+    cycle: int,
+    stop_event: Event,
+) -> Molecule | None:
+    """Execute one step in a single molecule generation"""
+
     if stop_event.is_set():
         return None  # Exit early if a molecule has already been found
 
-    if config.general.verbosity == 0:
-        # print the cycle in one line, not starting a new line
-        print("✔", end="", flush=True)
-    elif config.general.verbosity > 0:
-        print(f"Cycle {cycle + 1}:")
+    if config.general.verbosity > 0:
+        print(f"Starting cycle {cycle + 1:<3}...")
+
     #   _____                           _
     #  / ____|                         | |
     # | |  __  ___ _ __   ___ _ __ __ _| |_ ___  _ __
@@ -209,10 +298,11 @@ def single_molecule_generator(
         #         | |
         #         |_|
         optimized_molecule = iterative_optimization(
-            mol=mol,
-            engine=refine_engine,
-            config_generate=config.generate,
-            config_refine=config.refine,
+            mol,
+            refine_engine,
+            config.generate,
+            config.refine,
+            resources_local,
             verbosity=config.general.verbosity,
         )
     except RuntimeError as e:
@@ -231,7 +321,8 @@ def single_molecule_generator(
                 optimized_molecule,
                 postprocess_engine,  # type: ignore
                 config.postprocess,
-                config.general.verbosity,
+                resources_local,
+                verbosity=config.general.verbosity,
             )
         except RuntimeError as e:
             if config.general.verbosity > 0:
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,5 +9,6 @@ dependencies: @@
       - pre-commit
       - pytest
       - tox
+      - tqdm
       - pip:
           - covdefaults