Skip to content

Commit

Permalink
refactor generation of atom list to make it more agnostic w.r.t. to n…
Browse files Browse the repository at this point in the history
…umber of atoms

Signed-off-by: Marcel Müller <[email protected]>
  • Loading branch information
marcelmbn committed Aug 18, 2024
1 parent 050c931 commit 075371b
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 79 deletions.
2 changes: 1 addition & 1 deletion src/mindlessgen/generator/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def header(version: str) -> str:
"║ ██║ ╚═╝ ██║██║██║ ╚████║██████╔╝███████╗███████╗███████║███████║╚██████╔╝███████╗██║ ╚████║ ║\n"
"║ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝╚═════╝ ╚══════╝╚══════╝╚══════╝╚══════╝ ╚═════╝ ╚══════╝╚═╝ ╚═══╝ ║\n"
"║ ║\n"
f"║ MindlessGen v{version[:5]}\n"
f"║ MindlessGen v{version[:5]} \n"
"║ Semi-Automated Molecule Generator ║\n"
"║ ║\n"
"║ Licensed under the Apache License, Version 2.0 ║\n"
Expand Down
250 changes: 172 additions & 78 deletions src/mindlessgen/molecules/generate_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def generate_random_molecule(
mol.charge = set_random_charge(mol.ati, verbosity)
mol.set_name_from_formula()

# if verbosity > 1, print the molecule
if verbosity > 1:
print(mol)

Expand All @@ -56,86 +55,181 @@ def generate_atom_list(cfg: GenerateConfig, verbosity: int = 1) -> np.ndarray:
102, dtype=int
) # 102 is the number of accessible elements in the periodic table

numatoms_all = np.random.randint(1, 7)
for _ in range(numatoms_all):
# Define the atom type to be added via a random choice from the set of valid elements
ati = np.random.choice(list(valid_elems))
if verbosity > 1:
print(f"Adding atom type {ati}...")
# Add a random number of atoms of the defined type
natoms[ati] = natoms[ati] + np.random.randint(0, 3)

# > If too many alkaline and alkine earth metals are included, restart generation
group_one_two = get_alkali_metals() + get_alkaline_earth_metals()
nmetals = 0
for i in group_one_two:
nmetals += natoms[i]
# reduce number of metals starting from 2, going to 55
while nmetals > 3:
# Reasoning for the parameters in the following sections:
# - The number of the atoms added by default (DefaultRandom + AddOrganicAtoms)
# should match the minimum number of atoms in the molecule (if defined).
# - The number of the atoms added by default (DefaultRandom + AddOrganicAtoms)
# should not exceed the maximum number of atoms in the molecule (if defined).
# if the maximum number of atoms is not defined, the number of atoms added by default
# should not exceed the minimum number of atoms + 20.
# The current default value are the overall minimum if no other values are defined.
# In general, the ratio of "default_random" atoms vs. "add_organic_atoms" atoms is 2:1.

# With both 'add_random' and 'add_organic', we want to have ca. 60 % of the min_num_atoms
# Reasoning: For an example of 6 atoms after 'add_random' and 'add_organic',
# the mean number of atoms added by 'add_hydrogen' is ca. 4
# Thus: Round to the nearest integer of 0.6 * 2/3 = 0.4 of cfg.min_num_atoms
if cfg.min_num_atoms:
low_lim_default_random = round(cfg.min_num_atoms * (0.4))
lim_organic = round(low_lim_default_random * 0.5)
else:
low_lim_default_random = 1 # default value if nothing is defined
lim_organic = 5
if cfg.max_num_atoms:
max_lim_default_random = round((cfg.max_num_atoms + 10) * (0.4))
else:
if cfg.min_num_atoms:
max_lim_default_random = round((cfg.min_num_atoms + 20) * (0.4))
else:
max_lim_default_random = 7 # default value if nothing is defined

def add_random(min_adds: int, max_adds: int, min_nat: int, max_nat: int):
"""
Default random atom generation.
"""
numatoms_all = np.random.randint(
min_adds, max_adds
) # with range(1, 7) -> mean value: 3.5
for _ in range(numatoms_all):
# Define the atom type to be added via a random choice from the set of valid elements
ati = np.random.choice(list(valid_elems))
if verbosity > 1:
print(f"Adding atom type {ati}...")
# Add a random number of atoms of the defined type
natoms[ati] = natoms[ati] + np.random.randint(
min_nat, max_nat
) # with range(0, 3) -> mean value: 1
# max value of this section with commented settings: 12

def add_organic(num_adds: int, min_nat: int, max_nat: int):
"""
Add organic elements.
"""
# Add Elements between B and F (5-9)
for _ in range(num_adds): # with range(5) -> mean value 1.5
ati = np.random.randint(4, 10)
if verbosity > 1:
print(f"Adding atom type {ati}...")
natoms[ati] = natoms[ati] + np.random.randint(
min_nat, max_nat
) # with range(0, 3) -> mean value: 1
# max value of this section with commented settings: 8

def remove_group_onetwo():
# > If too many alkaline and alkine earth metals are included, restart generation
group_one_two = get_alkali_metals() + get_alkaline_earth_metals()
nmetals = 0
for i in group_one_two:
if natoms[i] > 0:
natoms[i] = natoms[i] - 1
nmetals -= 1
if nmetals <= 3:
break

# If the sum of all other metals is larger than three, reduce the number of metals
other_metals = (
get_three_d_metals()
+ get_four_d_metals()
+ get_five_d_metals()
+ get_lanthanides()
)
n_othermetals = 0
for i in other_metals:
n_othermetals += natoms[i]
while n_othermetals > 3:
nmetals += natoms[i]
# reduce number of metals starting from 2, going to 55
while nmetals > 3:
for i in group_one_two:
if natoms[i] > 0:
natoms[i] = natoms[i] - 1
if verbosity > 1:
print(f"Removing group 1/2 metal of type: {i}...")
nmetals -= 1
if nmetals <= 3:
break

def remove_metals():
# If the sum of all other metals is larger than three, reduce the number of metals
other_metals = (
get_three_d_metals()
+ get_four_d_metals()
+ get_five_d_metals()
+ get_lanthanides()
)
n_othermetals = 0
for i in other_metals:
n_othermetals += natoms[i]
while n_othermetals > 3:
for i in other_metals:
if natoms[i] > 0:
natoms[i] = natoms[i] - 1
if verbosity > 1:
print(f"Removing transition metal/LN of type: {i}...")
n_othermetals -= 1
if n_othermetals <= 3:
break

def add_hydrogen():
# If no H is included, add H atoms
if natoms[0] == 0:
nat = np.sum(natoms)
randint = np.random.rand()
j = 1 + round(randint * nat * 1.2)
natoms[0] = natoms[0] + j
# Example: For 5 atoms at this point,
# the mean number of added H atoms is (mean(1, 2, 3, 4, 5, 6))=3.5

def check_min_max_atoms():
# If the number of atoms is smaller than the minimum number of atoms, add atoms
while np.sum(natoms) < cfg.min_num_atoms:
if verbosity > 1:
print(
f"Minimal number of atoms: {cfg.min_num_atoms}; Actual number of atoms: {np.sum(natoms)}.\nAdding atoms..."
)
ati = np.random.choice(list(valid_elems))
max_limit = cfg.element_composition.get(ati, (None, None))[1]
if max_limit is not None and natoms[ati] >= max_limit:
continue
natoms[ati] = natoms[ati] + 1
# If the number of atoms is larger than the maximum number of atoms, remove atoms randomly
tmp_count = 0
while np.sum(natoms) > cfg.max_num_atoms:
tmp_count += 1
if tmp_count > 100:
raise RuntimeError(
"Could not generate a molecule with the given constraints."
)
if verbosity > 1:
print(
f"Max number of atoms: {cfg.max_num_atoms}; Actual number of atoms: {np.sum(natoms)}.\nRemoving atoms..."
)
# generate a list of all atom types that are included in the molecule with at least one atom
# if the occurrence is > 1, add it multiple times to the list
atom_list = []
for i, count in enumerate(natoms):
if count > 0:
atom_list.extend([i] * count)
# randomly select an atom type from the list, thereby weighting the selection for reduction by the current occurrence
# generate a random number between 0 and the number of atoms in the list
random_index = np.random.randint(len(atom_list))
i = atom_list[int(random_index)]
if natoms[i] > 0:
natoms[i] = natoms[i] - 1
n_othermetals -= 1
if n_othermetals <= 3:
break

# Add Elements between B and F (5-9)
for _ in range(5):
i = np.random.randint(4, 10)
natoms[i] = natoms[i] + np.random.randint(0, 3)

# If no H is included, add H atoms
if natoms[0] == 0:
nat = np.sum(natoms)
minnat = min(nat, 10)
randint = np.random.rand()
j = 1 + int(randint * minnat * 1.2)
natoms[0] = natoms[0] + j

# Align with the given element_composition:
# CAUTION: The setting to min/max count may violate the metal count restrictions
for elem, count_range in cfg.element_composition.items():
min_count, max_count = count_range
if min_count is not None and natoms[elem] < min_count:
natoms[elem] = min_count
elif max_count is not None and natoms[elem] > max_count:
natoms[elem] = max_count

# If the number of atoms is smaller than the minimum number of atoms, add atoms
while np.sum(natoms) < cfg.min_num_atoms:
ati = np.random.choice(list(valid_elems))
max_limit = cfg.element_composition.get(ati, (None, None))[1]
if max_limit is not None and natoms[ati] >= max_limit:
continue
natoms[ati] = natoms[ati] + 1
# If the number of atoms is larger than the maximum number of atoms, remove atoms randomly
while np.sum(natoms) > cfg.max_num_atoms:
print(f"Number of atoms: {np.sum(natoms)}")
print(f"Max number of atoms: {cfg.max_num_atoms}")
i = np.random.randint(0, MAX_ELEM)
if natoms[i] > 0:
min_limit = cfg.element_composition.get(i, (None, None))[0]
if min_limit is not None and natoms[i] > min_limit:
print(f"Removing atom type {i}...")
natoms[i] = natoms[i] - 1
min_limit = cfg.element_composition.get(i, (None, None))[0]
if verbosity > 1:
print(f"Trying to remove atom type {i}...")
if min_limit is None or natoms[i] > min_limit:
natoms[i] = natoms[i] - 1

def check_composition():
# Align with the given element_composition:
# CAUTION: The setting to min/max count may violate the metal count restrictions
for elem, count_range in cfg.element_composition.items():
min_count, max_count = count_range
if min_count is not None and natoms[elem] < min_count:
natoms[elem] = min_count
elif max_count is not None and natoms[elem] > max_count:
natoms[elem] = max_count

### ACTUAL WORKFLOW START ###
# Add a random number of atoms of random types
add_random(low_lim_default_random, max_lim_default_random, 0, 3)
# Check for too many group 1 and 2 metals
remove_group_onetwo()
# Check for too many transition and lanthanide metals
remove_metals()
# Add organic elements (B, C, N, O, F)
add_organic(lim_organic, 0, 3)
# Add hydrogen if not included
add_hydrogen()
# Check if pre-defined atom type counts are within the defined limits
check_composition()
# Check if the number of atoms is within the defined limits
check_min_max_atoms()
### ACTUAL WORKFLOW END ###

return natoms

Expand Down

0 comments on commit 075371b

Please sign in to comment.