From 075371b1790e6e8299c9298fc19693c9d9917bcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcel=20M=C3=BCller?= Date: Sun, 18 Aug 2024 22:01:08 +0200 Subject: [PATCH] refactor generation of atom list to make it more agnostic w.r.t. to number of atoms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marcel Müller --- src/mindlessgen/generator/main.py | 2 +- .../molecules/generate_molecule.py | 250 ++++++++++++------ 2 files changed, 173 insertions(+), 79 deletions(-) diff --git a/src/mindlessgen/generator/main.py b/src/mindlessgen/generator/main.py index c19dd16..3a7daf5 100644 --- a/src/mindlessgen/generator/main.py +++ b/src/mindlessgen/generator/main.py @@ -181,7 +181,7 @@ def header(version: str) -> str: "║ ██║ ╚═╝ ██║██║██║ ╚████║██████╔╝███████╗███████╗███████║███████║╚██████╔╝███████╗██║ ╚████║ ║\n" "║ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝╚═════╝ ╚══════╝╚══════╝╚══════╝╚══════╝ ╚═════╝ ╚══════╝╚═╝ ╚═══╝ ║\n" "║ ║\n" - f"║ MindlessGen v{version[:5]} ║\n" + f"║ MindlessGen v{version[:5]} ║\n" "║ Semi-Automated Molecule Generator ║\n" "║ ║\n" "║ Licensed under the Apache License, Version 2.0 ║\n" diff --git a/src/mindlessgen/molecules/generate_molecule.py b/src/mindlessgen/molecules/generate_molecule.py index 15c423b..9b28670 100644 --- a/src/mindlessgen/molecules/generate_molecule.py +++ b/src/mindlessgen/molecules/generate_molecule.py @@ -32,7 +32,6 @@ def generate_random_molecule( mol.charge = set_random_charge(mol.ati, verbosity) mol.set_name_from_formula() - # if verbosity > 1, print the molecule if verbosity > 1: print(mol) @@ -56,86 +55,181 @@ def generate_atom_list(cfg: GenerateConfig, verbosity: int = 1) -> np.ndarray: 102, dtype=int ) # 102 is the number of accessible elements in the periodic table - numatoms_all = np.random.randint(1, 7) - for _ in range(numatoms_all): - # Define the atom type to be added via a random choice from the set of valid elements - ati = np.random.choice(list(valid_elems)) - if verbosity > 1: - print(f"Adding atom type {ati}...") - # Add a random number of atoms of the defined type - natoms[ati] = natoms[ati] + np.random.randint(0, 3) - - # > If too many alkaline and alkine earth metals are included, restart generation - group_one_two = get_alkali_metals() + get_alkaline_earth_metals() - nmetals = 0 - for i in group_one_two: - nmetals += natoms[i] - # reduce number of metals starting from 2, going to 55 - while nmetals > 3: + # Reasoning for the parameters in the following sections: + # - The number of the atoms added by default (DefaultRandom + AddOrganicAtoms) + # should match the minimum number of atoms in the molecule (if defined). + # - The number of the atoms added by default (DefaultRandom + AddOrganicAtoms) + # should not exceed the maximum number of atoms in the molecule (if defined). + # if the maximum number of atoms is not defined, the number of atoms added by default + # should not exceed the minimum number of atoms + 20. + # The current default value are the overall minimum if no other values are defined. + # In general, the ratio of "default_random" atoms vs. "add_organic_atoms" atoms is 2:1. + + # With both 'add_random' and 'add_organic', we want to have ca. 60 % of the min_num_atoms + # Reasoning: For an example of 6 atoms after 'add_random' and 'add_organic', + # the mean number of atoms added by 'add_hydrogen' is ca. 4 + # Thus: Round to the nearest integer of 0.6 * 2/3 = 0.4 of cfg.min_num_atoms + if cfg.min_num_atoms: + low_lim_default_random = round(cfg.min_num_atoms * (0.4)) + lim_organic = round(low_lim_default_random * 0.5) + else: + low_lim_default_random = 1 # default value if nothing is defined + lim_organic = 5 + if cfg.max_num_atoms: + max_lim_default_random = round((cfg.max_num_atoms + 10) * (0.4)) + else: + if cfg.min_num_atoms: + max_lim_default_random = round((cfg.min_num_atoms + 20) * (0.4)) + else: + max_lim_default_random = 7 # default value if nothing is defined + + def add_random(min_adds: int, max_adds: int, min_nat: int, max_nat: int): + """ + Default random atom generation. + """ + numatoms_all = np.random.randint( + min_adds, max_adds + ) # with range(1, 7) -> mean value: 3.5 + for _ in range(numatoms_all): + # Define the atom type to be added via a random choice from the set of valid elements + ati = np.random.choice(list(valid_elems)) + if verbosity > 1: + print(f"Adding atom type {ati}...") + # Add a random number of atoms of the defined type + natoms[ati] = natoms[ati] + np.random.randint( + min_nat, max_nat + ) # with range(0, 3) -> mean value: 1 + # max value of this section with commented settings: 12 + + def add_organic(num_adds: int, min_nat: int, max_nat: int): + """ + Add organic elements. + """ + # Add Elements between B and F (5-9) + for _ in range(num_adds): # with range(5) -> mean value 1.5 + ati = np.random.randint(4, 10) + if verbosity > 1: + print(f"Adding atom type {ati}...") + natoms[ati] = natoms[ati] + np.random.randint( + min_nat, max_nat + ) # with range(0, 3) -> mean value: 1 + # max value of this section with commented settings: 8 + + def remove_group_onetwo(): + # > If too many alkaline and alkine earth metals are included, restart generation + group_one_two = get_alkali_metals() + get_alkaline_earth_metals() + nmetals = 0 for i in group_one_two: - if natoms[i] > 0: - natoms[i] = natoms[i] - 1 - nmetals -= 1 - if nmetals <= 3: - break - - # If the sum of all other metals is larger than three, reduce the number of metals - other_metals = ( - get_three_d_metals() - + get_four_d_metals() - + get_five_d_metals() - + get_lanthanides() - ) - n_othermetals = 0 - for i in other_metals: - n_othermetals += natoms[i] - while n_othermetals > 3: + nmetals += natoms[i] + # reduce number of metals starting from 2, going to 55 + while nmetals > 3: + for i in group_one_two: + if natoms[i] > 0: + natoms[i] = natoms[i] - 1 + if verbosity > 1: + print(f"Removing group 1/2 metal of type: {i}...") + nmetals -= 1 + if nmetals <= 3: + break + + def remove_metals(): + # If the sum of all other metals is larger than three, reduce the number of metals + other_metals = ( + get_three_d_metals() + + get_four_d_metals() + + get_five_d_metals() + + get_lanthanides() + ) + n_othermetals = 0 for i in other_metals: + n_othermetals += natoms[i] + while n_othermetals > 3: + for i in other_metals: + if natoms[i] > 0: + natoms[i] = natoms[i] - 1 + if verbosity > 1: + print(f"Removing transition metal/LN of type: {i}...") + n_othermetals -= 1 + if n_othermetals <= 3: + break + + def add_hydrogen(): + # If no H is included, add H atoms + if natoms[0] == 0: + nat = np.sum(natoms) + randint = np.random.rand() + j = 1 + round(randint * nat * 1.2) + natoms[0] = natoms[0] + j + # Example: For 5 atoms at this point, + # the mean number of added H atoms is (mean(1, 2, 3, 4, 5, 6))=3.5 + + def check_min_max_atoms(): + # If the number of atoms is smaller than the minimum number of atoms, add atoms + while np.sum(natoms) < cfg.min_num_atoms: + if verbosity > 1: + print( + f"Minimal number of atoms: {cfg.min_num_atoms}; Actual number of atoms: {np.sum(natoms)}.\nAdding atoms..." + ) + ati = np.random.choice(list(valid_elems)) + max_limit = cfg.element_composition.get(ati, (None, None))[1] + if max_limit is not None and natoms[ati] >= max_limit: + continue + natoms[ati] = natoms[ati] + 1 + # If the number of atoms is larger than the maximum number of atoms, remove atoms randomly + tmp_count = 0 + while np.sum(natoms) > cfg.max_num_atoms: + tmp_count += 1 + if tmp_count > 100: + raise RuntimeError( + "Could not generate a molecule with the given constraints." + ) + if verbosity > 1: + print( + f"Max number of atoms: {cfg.max_num_atoms}; Actual number of atoms: {np.sum(natoms)}.\nRemoving atoms..." + ) + # generate a list of all atom types that are included in the molecule with at least one atom + # if the occurrence is > 1, add it multiple times to the list + atom_list = [] + for i, count in enumerate(natoms): + if count > 0: + atom_list.extend([i] * count) + # randomly select an atom type from the list, thereby weighting the selection for reduction by the current occurrence + # generate a random number between 0 and the number of atoms in the list + random_index = np.random.randint(len(atom_list)) + i = atom_list[int(random_index)] if natoms[i] > 0: - natoms[i] = natoms[i] - 1 - n_othermetals -= 1 - if n_othermetals <= 3: - break - - # Add Elements between B and F (5-9) - for _ in range(5): - i = np.random.randint(4, 10) - natoms[i] = natoms[i] + np.random.randint(0, 3) - - # If no H is included, add H atoms - if natoms[0] == 0: - nat = np.sum(natoms) - minnat = min(nat, 10) - randint = np.random.rand() - j = 1 + int(randint * minnat * 1.2) - natoms[0] = natoms[0] + j - - # Align with the given element_composition: - # CAUTION: The setting to min/max count may violate the metal count restrictions - for elem, count_range in cfg.element_composition.items(): - min_count, max_count = count_range - if min_count is not None and natoms[elem] < min_count: - natoms[elem] = min_count - elif max_count is not None and natoms[elem] > max_count: - natoms[elem] = max_count - - # If the number of atoms is smaller than the minimum number of atoms, add atoms - while np.sum(natoms) < cfg.min_num_atoms: - ati = np.random.choice(list(valid_elems)) - max_limit = cfg.element_composition.get(ati, (None, None))[1] - if max_limit is not None and natoms[ati] >= max_limit: - continue - natoms[ati] = natoms[ati] + 1 - # If the number of atoms is larger than the maximum number of atoms, remove atoms randomly - while np.sum(natoms) > cfg.max_num_atoms: - print(f"Number of atoms: {np.sum(natoms)}") - print(f"Max number of atoms: {cfg.max_num_atoms}") - i = np.random.randint(0, MAX_ELEM) - if natoms[i] > 0: - min_limit = cfg.element_composition.get(i, (None, None))[0] - if min_limit is not None and natoms[i] > min_limit: - print(f"Removing atom type {i}...") - natoms[i] = natoms[i] - 1 + min_limit = cfg.element_composition.get(i, (None, None))[0] + if verbosity > 1: + print(f"Trying to remove atom type {i}...") + if min_limit is None or natoms[i] > min_limit: + natoms[i] = natoms[i] - 1 + + def check_composition(): + # Align with the given element_composition: + # CAUTION: The setting to min/max count may violate the metal count restrictions + for elem, count_range in cfg.element_composition.items(): + min_count, max_count = count_range + if min_count is not None and natoms[elem] < min_count: + natoms[elem] = min_count + elif max_count is not None and natoms[elem] > max_count: + natoms[elem] = max_count + + ### ACTUAL WORKFLOW START ### + # Add a random number of atoms of random types + add_random(low_lim_default_random, max_lim_default_random, 0, 3) + # Check for too many group 1 and 2 metals + remove_group_onetwo() + # Check for too many transition and lanthanide metals + remove_metals() + # Add organic elements (B, C, N, O, F) + add_organic(lim_organic, 0, 3) + # Add hydrogen if not included + add_hydrogen() + # Check if pre-defined atom type counts are within the defined limits + check_composition() + # Check if the number of atoms is within the defined limits + check_min_max_atoms() + ### ACTUAL WORKFLOW END ### return natoms