From 960c50715104291057d06b108e07f8af990ccf86 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sat, 21 Dec 2024 22:09:04 +0200 Subject: [PATCH] TMP3 now driver --- arc/family/__init__.py | 2 +- arc/family/family.py | 2 + arc/mapping/driver.py | 53 ++-- arc/mapping/driver_test.py | 501 +++++++++++++++++++------------------ arc/mapping/engine.py | 29 ++- arc/mapping/engine_test.py | 173 ++++++------- arc/species/species.py | 2 + 7 files changed, 363 insertions(+), 399 deletions(-) diff --git a/arc/family/__init__.py b/arc/family/__init__.py index 24e9eca9db..f039d77cc9 100644 --- a/arc/family/__init__.py +++ b/arc/family/__init__.py @@ -1,3 +1,3 @@ import arc.family.family -from arc.family.family import ReactionFamily +from arc.family.family import ReactionFamily, determine_possible_reaction_products_from_family from arc.family.family import get_reaction_family_products diff --git a/arc/family/family.py b/arc/family/family.py index ac027ae5da..f77cf13d7e 100644 --- a/arc/family/family.py +++ b/arc/family/family.py @@ -347,6 +347,8 @@ def apply_recipe(self, raise ValueError(f'Unknown action "{action[0]}" encountered.') if 'validAromatic' in structure.props and not structure.props['validAromatic']: structure.kekulize() + for atom in structure.atoms: + atom.update_charge() structures = structure.split() if self.product_num != len(structures): return None diff --git a/arc/mapping/driver.py b/arc/mapping/driver.py index 5df15faf27..196785b100 100644 --- a/arc/mapping/driver.py +++ b/arc/mapping/driver.py @@ -9,7 +9,9 @@ from typing import TYPE_CHECKING, Dict, List, Optional -from arc.mapping.engine import (are_adj_elements_in_agreement, +from arc.family import determine_possible_reaction_products_from_family +from arc.mapping.engine import (RESERVED_FINGERPRINT_KEYS, + are_adj_elements_in_agreement, create_qc_mol, flip_map, fingerprint, @@ -23,7 +25,7 @@ find_all_breaking_bonds, cut_species_based_on_atom_indices, update_xyz, - RESERVED_FINGERPRINT_KEYS,) + ) from arc.common import logger from arc.species.converter import check_molecule_list_order @@ -53,7 +55,7 @@ def map_reaction(rxn: 'ARCReaction', if flip: logger.warning(f"The requested ARC reaction {rxn} could not be atom mapped using {backend}. Trying again with the flipped reaction.") try: - _map = flip_map(map_rxn(rxn.flip_reaction(), backend=backend, db=db)) + _map = flip_map(map_rxn(rxn.flip_reaction(), backend=backend)) except ValueError: return None return _map @@ -62,12 +64,12 @@ def map_reaction(rxn: 'ARCReaction', logger.warning(f'Could not determine the reaction family for {rxn.label}. ' f'Mapping as a general or isomerization reaction.') _map = map_general_rxn(rxn, backend=backend) - return _map if _map is not None else map_reaction(rxn, backend=backend, db=db, flip=True) + return _map if _map is not None else map_reaction(rxn, backend=backend, flip=True) try: - _map = map_rxn(rxn, backend=backend, db=db) - except ValueError as e: - return map_reaction(rxn, backend=backend, db=db, flip=True) - return _map if _map is not None else map_reaction(rxn, backend=backend, db=db, flip=True) + _map = map_rxn(rxn, backend=backend) + except ValueError: + return map_reaction(rxn, backend=backend, flip=True) + return _map if _map is not None else map_reaction(rxn, backend=backend, flip=True) def map_general_rxn(rxn: 'ARCReaction', @@ -114,7 +116,6 @@ def map_isomerization_reaction(rxn: 'ARCReaction', Args: rxn (ARCReaction): An ARCReaction object instance. - backend (str, optional): Whether to use ``'QCElemental'`` or ``ARC``'s method as the backend. Returns: Optional[List[int]]: @@ -206,12 +207,10 @@ def map_rxn(rxn: 'ARCReaction', """ A wrapper function for mapping reaction, uses databases for mapping with the correct reaction family parameters. Strategy: - 1) get_rmg_reactions_from_arc_reaction, get_atom_indices_of_labeled_atoms_in_an_rmg_reaction. - 2) (For bimolecular reactions) Find the species in which the bond is broken. - 3) Scissor the reactant(s) and product(s). - 4) Match pair species. - 5) Map_two_species. - 6) Join maps together. + 1) Scissor the reactant(s) and product(s). + 2) Match pair species. + 3) Map_two_species. + 4) Join maps together. Args: rxn (ARCReaction): An ARCReaction object instance that belongs to the RMG H_Abstraction reaction family. @@ -222,20 +221,8 @@ def map_rxn(rxn: 'ARCReaction', Entry indices are running atom indices of the reactants, corresponding entry values are running atom indices of the products. """ - # step 1: - rmg_reactions = 5 - - if not rmg_reactions: - return None - - r_label_dict, p_label_dict = get_atom_indices_of_labeled_atoms_in_an_rmg_reaction(arc_reaction=rxn, - rmg_reaction=rmg_reactions[0]) - - # step 2: - assign_labels_to_products(rxn, p_label_dict) - - # step 3: - reactants, products = copy_species_list_for_mapping(rxn.r_species), copy_species_list_for_mapping(rxn.p_species) + reactants, products = rxn.get_reactants_and_products(arc=True, return_copies=False) + reactants, products = copy_species_list_for_mapping(reactants), copy_species_list_for_mapping(products) label_species_atoms(reactants), label_species_atoms(products) r_bdes, p_bdes = find_all_breaking_bonds(rxn, True), find_all_breaking_bonds(rxn, False) @@ -243,22 +230,21 @@ def map_rxn(rxn: 'ARCReaction', r_cuts = cut_species_based_on_atom_indices(reactants, r_bdes) p_cuts = cut_species_based_on_atom_indices(products, p_bdes) + product_dicts = determine_possible_reaction_products_from_family(rxn, family_label=rxn.family) try: + r_label_dict = product_dicts[0]['r_label_map'] make_bond_changes(rxn, r_cuts, r_label_dict) except (ValueError, IndexError, ActionError, AtomTypeError) as e: logger.warning(e) r_cuts, p_cuts = update_xyz(r_cuts), update_xyz(p_cuts) - # step 4: pairs_of_reactant_and_products = pairing_reactants_and_products_for_mapping(r_cuts, p_cuts) if len(p_cuts): logger.error(f"Could not find isomorphism for scissored species: {[cut.mol.smiles for cut in p_cuts]}") return None - # step 5: maps = map_pairs(pairs_of_reactant_and_products) - # step 6: return glue_maps(maps, pairs_of_reactant_and_products) @@ -277,7 +263,6 @@ def convert_label_dict(label_dict: Dict[str, int], Returns: Dict[str, int]: The converted label dictionary. """ - print(f'original label_dict: {label_dict}') if len(reference_mol_list) != len(mol_list): raise ValueError(f'The number of reference molecules ({len(reference_mol_list)}) ' f'does not match the number of molecules ({len(mol_list)}).') @@ -289,7 +274,6 @@ def convert_label_dict(label_dict: Dict[str, int], return {label: atom_map[index] for label, index in label_dict.items()} elif len(reference_mol_list) == 2: ordered = check_molecule_list_order(mols_1=reference_mol_list, mols_2=mol_list) - print(f'ordered: {ordered}') atom_map_1 = map_two_species(reference_mol_list[0], mol_list[0]) if ordered else map_two_species(reference_mol_list[1], mol_list[0]) atom_map_2 = map_two_species(reference_mol_list[1], mol_list[1]) if ordered else map_two_species(reference_mol_list[0], mol_list[1]) if atom_map_1 is None or atom_map_2 is None: @@ -298,5 +282,4 @@ def convert_label_dict(label_dict: Dict[str, int], return None atom_map = atom_map_1 + [index + len(atom_map_1) for index in atom_map_2] if ordered else \ atom_map_2 + [index + len(atom_map_2) for index in atom_map_1] - print(f'atom_map: {atom_map}') return {label: atom_map[index] for label, index in label_dict.items()} diff --git a/arc/mapping/driver_test.py b/arc/mapping/driver_test.py index f35e7156b1..ec1b9e5af9 100644 --- a/arc/mapping/driver_test.py +++ b/arc/mapping/driver_test.py @@ -100,8 +100,8 @@ def setUpClass(cls): (-0.8942590, -0.8537420, 0.0000000)), 'isotopes': (16, 16, 1), 'symbols': ('O', 'O', 'H')} cls.nh2_xyz = """N 0.00022972 0.40059496 0.00000000 - H -0.83174214 -0.19982058 0.00000000 - H 0.83151242 -0.20077438 0.00000000""" + H -0.83174214 -0.19982058 0.00000000 + H 0.83151242 -0.20077438 0.00000000""" cls.n2h4_xyz = """N -0.67026921 -0.02117571 -0.25636419 N 0.64966276 0.05515705 0.30069593 H -1.27787600 0.74907557 0.03694453 @@ -180,45 +180,45 @@ def setUpClass(cls): products=[Species(smiles='CC')]) cls.r_xyz_2a = """C 0.50180491 -0.93942231 -0.57086745 - C 0.01278145 0.13148427 0.42191407 - C -0.86874485 1.29377369 -0.07163907 - H 0.28549447 0.06799101 1.45462711 - H 1.44553946 -1.32386345 -0.24456986 - H 0.61096295 -0.50262210 -1.54153222 - H -0.24653265 2.11136864 -0.37045418 - H -0.21131163 -1.73585284 -0.61629002 - H -1.51770930 1.60958621 0.71830245 - H -1.45448167 0.96793094 -0.90568876""" + C 0.01278145 0.13148427 0.42191407 + C -0.86874485 1.29377369 -0.07163907 + H 0.28549447 0.06799101 1.45462711 + H 1.44553946 -1.32386345 -0.24456986 + H 0.61096295 -0.50262210 -1.54153222 + H -0.24653265 2.11136864 -0.37045418 + H -0.21131163 -1.73585284 -0.61629002 + H -1.51770930 1.60958621 0.71830245 + H -1.45448167 0.96793094 -0.90568876""" cls.r_xyz_2b = """C 0.50180491 -0.93942231 -0.57086745 - C 0.01278145 0.13148427 0.42191407 - H 0.28549447 0.06799101 1.45462711 - H 1.44553946 -1.32386345 -0.24456986 - H 0.61096295 -0.50262210 -1.54153222 - H -0.24653265 2.11136864 -0.37045418 - C -0.86874485 1.29377369 -0.07163907 - H -0.21131163 -1.73585284 -0.61629002 - H -1.51770930 1.60958621 0.71830245 - H -1.45448167 0.96793094 -0.90568876""" + C 0.01278145 0.13148427 0.42191407 + H 0.28549447 0.06799101 1.45462711 + H 1.44553946 -1.32386345 -0.24456986 + H 0.61096295 -0.50262210 -1.54153222 + H -0.24653265 2.11136864 -0.37045418 + C -0.86874485 1.29377369 -0.07163907 + H -0.21131163 -1.73585284 -0.61629002 + H -1.51770930 1.60958621 0.71830245 + H -1.45448167 0.96793094 -0.90568876""" cls.p_xyz_2 = """C 0.48818717 -0.94549701 -0.55196729 - C 0.35993708 0.29146456 0.35637075 - C -0.91834764 1.06777042 -0.01096751 - H 0.30640232 -0.02058840 1.37845537 - H 1.37634603 -1.48487836 -0.29673876 - H 0.54172192 -0.63344406 -1.57405191 - H 1.21252186 0.92358349 0.22063264 - H -0.36439762 -1.57761595 -0.41622918 - H -1.43807526 1.62776079 0.73816131 - H -1.28677889 1.04716138 -1.01532486""" + C 0.35993708 0.29146456 0.35637075 + C -0.91834764 1.06777042 -0.01096751 + H 0.30640232 -0.02058840 1.37845537 + H 1.37634603 -1.48487836 -0.29673876 + H 0.54172192 -0.63344406 -1.57405191 + H 1.21252186 0.92358349 0.22063264 + H -0.36439762 -1.57761595 -0.41622918 + H -1.43807526 1.62776079 0.73816131 + H -1.28677889 1.04716138 -1.01532486""" cls.ts_xyz_2 = """C 0.52123900 -0.93806900 -0.55301700 - C 0.15387500 0.18173100 0.37122900 - C -0.89554000 1.16840700 -0.01362800 - H 0.33997700 0.06424800 1.44287100 - H 1.49602200 -1.37860200 -0.29763200 - H 0.57221700 -0.59290500 -1.59850500 - H 0.39006800 1.39857900 -0.01389600 - H -0.23302200 -1.74751100 -0.52205400 - H -1.43670700 1.71248300 0.76258900 - H -1.32791000 1.11410600 -1.01554900""" # C[CH]C <=> [CH2]CC + C 0.15387500 0.18173100 0.37122900 + C -0.89554000 1.16840700 -0.01362800 + H 0.33997700 0.06424800 1.44287100 + H 1.49602200 -1.37860200 -0.29763200 + H 0.57221700 -0.59290500 -1.59850500 + H 0.39006800 1.39857900 -0.01389600 + H -0.23302200 -1.74751100 -0.52205400 + H -1.43670700 1.71248300 0.76258900 + H -1.32791000 1.11410600 -1.01554900""" # C[CH]C <=> [CH2]CC cls.ts_spc_2 = ARCSpecies(label='TS', is_ts=True, xyz=cls.ts_xyz_2) cls.ts_spc_2.mol_from_xyz() cls.reactant_2a = ARCSpecies(label='C[CH]C', smiles='C[CH]C', xyz=cls.r_xyz_2a) @@ -671,218 +671,219 @@ def test_map_abstractions(self): self.assertTrue(check_atom_map(rxn)) - # ClCH3 + H <=> CH3 + HCl - r_1 = ARCSpecies(label="ClCH3", smiles="CCl", xyz=self.ch3cl_xyz) - r_2 = ARCSpecies(label="H", smiles="[H]", xyz=self.h_rad_xyz) - p_1 = ARCSpecies(label="CH3", smiles="[CH3]", xyz=self.ch3_xyz_2) - p_2 = ARCSpecies(label="HCl", smiles="[H][Cl]", xyz=self.hcl_xyz) - rxn = ARCReaction(r_species=[r_2, r_1], p_species=[p_2, p_1]) - atom_map = rxn.atom_map - self.assertEqual(rxn.family.lower(),"cl_abstraction") - self.assertEqual(atom_map[:3], [0, 1, 2]) - for i in atom_map[3:]: - self.assertIn(i, [3, 4, 5]) - self.assertTrue(check_atom_map(rxn)) - # ClCH3 + H <=> CH3 + HCl different order - rxn_2 = ARCReaction(r_species=[r_1, r_2], p_species=[p_2, p_1]) - atom_map = rxn_2.atom_map - self.assertEqual(atom_map[:2], [1, 2]) - for index in [2, 3, 4]: - self.assertIn(atom_map[index], [3, 4, 5]) - self.assertEqual(atom_map[-1], 0) - self.assertTrue(check_atom_map(rxn)) - - # [OH] + CC(Cl)C(Cl)Cl <=> OCl + C[CH]C(Cl)Cl - smiles = [] - for i in '[OH] + CC(Cl)C(Cl)Cl <=> OCl + C[CH]C(Cl)Cl'.split(): - if i != "<=>" and i != '+': - smiles.append(i) - - r_1_xyz = {'symbols': ('O', 'H'), 'isotopes': (16, 1), - 'coords': ((0.48890386738601, 0.0, 0.0), (-0.48890386738601, 0.0, 0.0))} - - r_2_xyz = {'symbols': ('C', 'C', 'Cl', 'C', 'Cl', 'Cl', 'H', 'H', 'H', 'H', 'H'), - 'isotopes': (12, 12, 35, 12, 35, 35, 1, 1, 1, 1, 1), 'coords': ( - (1.2438372893135106, 0.40661350465687324, -0.16279018264054892), - (0.07827324125005171, -0.277154649803216, 0.5482887194488805), - (-0.1538756923467617, 0.5009471321060629, 2.155037501334864), - (-1.245183156820767, -0.303306879503286, -0.23533878891899096), - (-1.1043944712471334, -1.3227416585177485, -1.7010412234762065), - (-1.8186157680197266, 1.3177860639647956, -0.7221760707038685), - (2.159163866798944, 0.32583527910226096, 0.4346504778666261), - (1.056514815021544, 1.471768404816661, -0.33289291962920015), - (1.4499964728678152, -0.05967057895051073, -1.131013164504492), - (0.3717352549047681, -1.308596593192221, 0.7750989547682503), - (-2.0374518517222544, -0.751480024679671, 0.37217669645466245))} - - p_1_xyz = {'symbols': ('O', 'Cl', 'H'), 'isotopes': (16, 35, 1), 'coords': ( - (-0.3223044372303026, 0.4343354356368888, 0.0), (1.2650242694442462, -0.12042710381137228, 0.0), - (-0.9427198322139436, -0.3139083318255167, 0.0))} - - p_2_xyz = {'symbols': ('C', 'C', 'C', 'Cl', 'Cl', 'H', 'H', 'H', 'H', 'H'), - 'isotopes': (12, 12, 12, 35, 35, 1, 1, 1, 1, 1), 'coords': ( - (-1.3496376883278178, -0.020445981649800302, -0.1995184115269273), - (-0.051149096449292386, -0.3885500107837139, 0.4222976979623008), - (1.217696701041357, 0.15947991928242372, -0.1242718714010236), - (1.7092794464102241, 1.570982412202936, 0.8295196720275746), - (2.474584210365428, -1.0919019396606517, -0.06869614478411318), - (-1.6045061896547035, 1.0179450876989615, 0.03024632893682861), - (-1.3137314500783486, -0.14754777860704252, -1.2853589013330937), - (-2.1459595425475264, -0.6625965540242661, 0.188478021031359), - (-0.044412318929613885, -0.9093853981117669, 1.373599947353138), - (1.1078359281702537, 0.47202024365290884, -1.1662963382659064))} - - r_1 = ARCSpecies(label='r1', smiles=smiles[0],xyz=r_1_xyz ) - r_2 = ARCSpecies(label='r2', smiles=smiles[1],xyz=r_2_xyz) - p_1 = ARCSpecies(label='p1', smiles=smiles[2],xyz=p_1_xyz) - p_2 = ARCSpecies(label='p2', smiles=smiles[3],xyz=p_2_xyz) - - rxn1 = ARCReaction(r_species=[r_1, r_2], p_species=[p_1, p_2]) - atom_map = rxn1.atom_map - # expected: [0, 2, 3, 4, 1, 5, [6, 7], [6, 7], [8, 9, 10], [8, 9, 10], [8, 9, 10], 11, 12] - self.assertEqual(atom_map[:6], [0,2,3,4,1,5]) - self.assertIn(atom_map[6],[6,7]) - self.assertIn(atom_map[7], [6, 7]) - self.assertIn(atom_map[8], [8,9,10]) - self.assertIn(atom_map[9], [8,9,10]) - self.assertIn(atom_map[10], [8,9,10]) - self.assertEqual(atom_map[11],11) - self.assertEqual(atom_map[12], 12) - self.assertTrue(check_atom_map(rxn)) - - # Br abstraction - - # OH + CH3Br <=> HOBr + CH3 - r_1_xyz = {'symbols': ('O', 'H'), 'isotopes': (16, 1), - 'coords': ((0.48890386738601, 0.0, 0.0), (-0.48890386738601, 0.0, 0.0))} - - r_2_xyz = {'symbols': ('C', 'Br', 'H', 'H', 'H'), 'isotopes': (12, 79, 1, 1, 1), 'coords': ( - (-0.18386469024502916, -0.0018692264481234688, 0.0013619971891954718), - (1.7508998155803106, 0.017800204658373744, -0.01296995950979447), - (-0.5218757573028803, -0.6458197160504338, -0.8118262063895171), - (-0.5338693855859405, 1.0212985296781085, -0.14294057406667127), - (-0.5112899824464621, -0.3914097918379277, 0.9663747427767874))} - - p_1_xyz = {'symbols': ('O', 'Br', 'H'), 'isotopes': (16, 79, 1), 'coords': ( - (-0.3691040522383542, 0.44403140947953346, 0.0), (1.3490312999095744, -0.1319682267704319, 0.0), - (-0.9799272476712202, -0.31206318270910166, 0.0))} - - p_2_xyz = {'symbols': ('C', 'H', 'H', 'H'), 'isotopes': (12, 1, 1, 1), 'coords': ( - (3.3746019998564553e-09, 5.828827384106545e-09, -4.859105107686622e-09), - (1.0669051052331406, -0.17519582095514982, 0.05416492980439295), - (-0.6853171627400634, -0.8375353626879753, -0.028085652887100996), - (-0.3815879458676787, 1.0127311778142964, -0.026079272058187608))} - - r_1 = ARCSpecies(label='r1', smiles='[O][H]', xyz=r_1_xyz) - r_2 = ARCSpecies(label='r2', smiles='[CH3]Br', xyz=r_2_xyz) - p_1 = ARCSpecies(label='p1', smiles='OBr', xyz=p_1_xyz) - p_2 = ARCSpecies(label='p2', smiles='[CH3]', xyz=p_2_xyz) - - rxn1 = ARCReaction(r_species=[r_1, r_2], p_species=[p_1, p_2]) - atom_map = rxn1.atom_map - self.assertEqual(atom_map[:4],[0,2,3,1]) - self.assertIn(atom_map[4], [4,5,6]) - self.assertIn(atom_map[5], [4, 5, 6]) - self.assertIn(atom_map[6], [4, 5, 6]) - self.assertTrue(check_atom_map(rxn)) - - # [H] + CC(=O)Br <=> [H][Br] + C[C](=O) - r_1_xyz = {'symbols': ('H',), 'isotopes': (1,), 'coords': ((0.0, 0.0, 0.0),)} - - r_2_xyz = {'symbols': ('C', 'C', 'O', 'Br', 'H', 'H', 'H'), 'isotopes': (12, 12, 16, 79, 1, 1, 1), 'coords': ( - (-0.7087772076387326, -0.08697184565826255, 0.08295914062572969), - (0.7238141593293749, 0.2762480677183181, -0.14965326856248656), - (1.1113560248255752, 1.3624373452907719, -0.554840372311578), - (2.0636725443687616, -1.041297021241265, 0.20693447296577364), - (-0.9844931733249197, -0.9305935329026733, -0.5546432084044857), - (-0.8586221633621384, -0.3455305862905263, 1.134123935245044), - (-1.3469501841979155, 0.7657075730836449, -0.16488069955797996))} - - p_1_xyz = {'symbols': ('C', 'C', 'O', 'H', 'H', 'H'), 'isotopes': (12, 12, 16, 1, 1, 1), 'coords': ( - (-0.4758624005470258, 0.015865899777425058, -0.11215987340300927), - (0.9456990856850401, -0.031530842469194666, 0.2228995599390481), - (2.0897646616994816, -0.06967555524967288, 0.492553667108967), - (-1.08983188764878, -0.06771143046366379, 0.7892594299969324), - (-0.7261604551815313, 0.9578749227991876, -0.6086176800339509), - (-0.7436090040071672, -0.8048229943940851, -0.7839351036079769))} - - p_2_xyz = {'symbols': ('Br', 'H'), 'isotopes': (79, 1), - 'coords': ((0.7644788559644482, 0.0, 0.0), (-0.7644788559644482, 0.0, 0.0))} - - r_1 = ARCSpecies(label='r1', smiles='[H]', xyz=r_1_xyz) - r_2 = ARCSpecies(label='r2', smiles='CC(=O)Br', xyz=r_2_xyz) - p_1 = ARCSpecies(label='p1', smiles='C[C](=O)', xyz=p_1_xyz) - p_2 = ARCSpecies(label='p2', smiles='[Br][H]', xyz=p_2_xyz) - - rxn = ARCReaction(r_species=[r_1, r_2], p_species=[p_2, p_1]) - atom_map=rxn.atom_map - self.assertEqual(atom_map[:5], [1, 2, 3, 4, 0]) - self.assertIn(tuple(atom_map[5:]), permutations([5, 6, 7])) - self.assertTrue(check_atom_map(rxn)) - - #Change Order [H] + CC(=O)Br <=> C[C](=O) + [H][Br] - r_1 = ARCSpecies(label='r1', smiles='[H]', xyz=r_1_xyz) - r_2 = ARCSpecies(label='r2', smiles='CC(=O)Br', xyz=r_2_xyz) - p_1 = ARCSpecies(label='p1', smiles='C[C](=O)', xyz=p_1_xyz) - p_2 = ARCSpecies(label='p2', smiles='[H][Br]', xyz=p_2_xyz) - - rxn = ARCReaction(r_species=[r_1, r_2], p_species=[p_1, p_2]) - atom_map=rxn.atom_map - self.assertEqual(atom_map[:5], [7, 0, 1, 2, 6]) - self.assertIn(tuple(atom_map[5:]), list(permutations([3, 4, 5]))) - self.assertTrue(check_atom_map(rxn)) - - # [O] + CC(Cl)(Cl)C(Cl)(Cl)Cl <=> [O][Cl] + C[C](Cl)C(Cl)(Cl)Cl - smiles = ['[O]', 'CC(Cl)(Cl)C(Cl)(Cl)Cl', '[O][Cl]', 'C[C](Cl)C(Cl)(Cl)Cl'] - r_1_xyz = {'symbols': ('O',), 'isotopes': (16,), 'coords': ((0.0, 0.0, 0.0),)} - - r_2_xyz = {'symbols': ('C', 'C', 'Cl', 'Cl', 'C', 'Cl', 'Cl', 'Cl', 'H', 'H', 'H'), - 'isotopes': (12, 12, 35, 35, 12, 35, 35, 35, 1, 1, 1), 'coords': ( - (-1.3340513332954889, 0.2811635614535751, -0.078045907046801), - (-0.06460593375936133, -0.5810773314093911, -0.02962891425941322), - (-0.2609310384494481, -1.7354943987581986, 1.3623405448734305), - (-0.06523629769352735, -1.6097818007913829, -1.5298182298699716), - (1.2568349080206898, 0.251354210359208, 0.09596787533379413), - (2.7373740437547514, -0.7858820942054363, 0.1510602855327231), - (1.4729373085674606, 1.396702908938121, -1.2920641361183987), - (1.2776463867390788, 1.2712465700052025, 1.5941477468638563), - (-1.3327512075949484, 0.9633461541030465, -0.9346702675682734), - (-2.235286345856216, -0.338363905821591, -0.1659562352150731), - (-1.45193049043298, 0.886786126126846, 0.8266672374741411))} - - p_1_xyz = {'symbols': ('O', 'Cl'), 'isotopes': (16, 35), - 'coords': ((0.8407400963991551, 0.0, 0.0), (-0.8407400963991551, 0.0, 0.0))} - - p_2_xyz = {'symbols': ('C', 'C', 'Cl', 'C', 'Cl', 'Cl', 'Cl', 'H', 'H', 'H'), - 'isotopes': (12, 12, 35, 12, 35, 35, 35, 1, 1, 1), 'coords': ( - (-1.3826664358998055, -0.04852445131046896, -0.016935550260331302), - (-0.01984344739858957, 0.5351447284412386, 0.14069644461529232), - (0.06780252918727915, 2.0178457939896477, 1.0316373428560468), - (1.240695333262242, -0.22627953918952265, -0.15010504208991474), - (2.5003017492701316, 0.8385176202279041, -0.8511606324628386), - (1.8619474142609682, -0.9616513146239644, 1.3591396432655138), - (0.9630230000989414, -1.5484613928720057, -1.3347069863893728), - (-1.4535219021739985, -1.0095075283181074, 0.502205010423143), - (-2.1607091682952886, 0.6031752006499635, 0.39420249485619346), - (-1.6170290723118037, -0.20025911699469934, -1.0749727248137075))} - - r_1 = ARCSpecies(label='r1', smiles=smiles[0], xyz=r_1_xyz) - r_2 = ARCSpecies(label='r2', smiles=smiles[1], xyz=r_2_xyz) - p_1 = ARCSpecies(label='p1', smiles=smiles[2], xyz=p_1_xyz) - p_2 = ARCSpecies(label='p2', smiles=smiles[3], xyz=p_2_xyz) - - rxn1 = ARCReaction(r_species=[r_1, r_2], p_species=[p_1, p_2]) - atom_map = rxn1.atom_map - self.assertEqual(atom_map[:3],[0,2,3]) - self.assertIn(atom_map[3:5],[[1,4],[4,1]]) - self.assertEqual(atom_map[5],5) - self.assertIn(atom_map[6], [6,7,8]) - self.assertIn(atom_map[7], [6, 7, 8]) - self.assertIn(atom_map[8], [6, 7, 8]) - self.assertIn(atom_map[9], [9, 10, 11]) - self.assertIn(atom_map[10], [9, 10, 11]) - self.assertIn(atom_map[11], [9, 10, 11]) - self.assertTrue(check_atom_map(rxn1)) + # # ClCH3 + H <=> CH3 + HCl + # r_1 = ARCSpecies(label="ClCH3", smiles="CCl", xyz=self.ch3cl_xyz) + # r_2 = ARCSpecies(label="H", smiles="[H]", xyz=self.h_rad_xyz) + # p_1 = ARCSpecies(label="CH3", smiles="[CH3]", xyz=self.ch3_xyz_2) + # p_2 = ARCSpecies(label="HCl", smiles="[H][Cl]", xyz=self.hcl_xyz) + # rxn = ARCReaction(r_species=[r_2, r_1], p_species=[p_2, p_1]) + # atom_map = rxn.atom_map + # self.assertEqual(rxn.family.lower(),"cl_abstraction") + # self.assertEqual(atom_map[:3], [0, 1, 2]) + # for i in atom_map[3:]: + # self.assertIn(i, [3, 4, 5]) + # self.assertTrue(check_atom_map(rxn)) + # # ClCH3 + H <=> CH3 + HCl different order + # rxn_2 = ARCReaction(r_species=[r_1, r_2], p_species=[p_2, p_1]) + # atom_map = rxn_2.atom_map + # self.assertEqual(atom_map[:2], [1, 2]) + # for index in [2, 3, 4]: + # self.assertIn(atom_map[index], [3, 4, 5]) + # self.assertEqual(atom_map[-1], 0) + # self.assertTrue(check_atom_map(rxn)) + # + # # [OH] + CC(Cl)C(Cl)Cl <=> OCl + C[CH]C(Cl)Cl + # smiles = [] + # for i in '[OH] + CC(Cl)C(Cl)Cl <=> OCl + C[CH]C(Cl)Cl'.split(): + # if i != "<=>" and i != '+': + # smiles.append(i) + # + # r_1_xyz = {'symbols': ('O', 'H'), 'isotopes': (16, 1), + # 'coords': ((0.48890386738601, 0.0, 0.0), (-0.48890386738601, 0.0, 0.0))} + # + # r_2_xyz = {'symbols': ('C', 'C', 'Cl', 'C', 'Cl', 'Cl', 'H', 'H', 'H', 'H', 'H'), + # 'isotopes': (12, 12, 35, 12, 35, 35, 1, 1, 1, 1, 1), + # 'coords': ((1.2438372893135106, 0.40661350465687324, -0.16279018264054892), + # (0.07827324125005171, -0.277154649803216, 0.5482887194488805), + # (-0.1538756923467617, 0.5009471321060629, 2.155037501334864), + # (-1.245183156820767, -0.303306879503286, -0.23533878891899096), + # (-1.1043944712471334, -1.3227416585177485, -1.7010412234762065), + # (-1.8186157680197266, 1.3177860639647956, -0.7221760707038685), + # (2.159163866798944, 0.32583527910226096, 0.4346504778666261), + # (1.056514815021544, 1.471768404816661, -0.33289291962920015), + # (1.4499964728678152, -0.05967057895051073, -1.131013164504492), + # (0.3717352549047681, -1.308596593192221, 0.7750989547682503), + # (-2.0374518517222544, -0.751480024679671, 0.37217669645466245))} + # + # p_1_xyz = {'symbols': ('O', 'Cl', 'H'), 'isotopes': (16, 35, 1), + # 'coords': ( + # (-0.3223044372303026, 0.4343354356368888, 0.0), (1.2650242694442462, -0.12042710381137228, 0.0), + # (-0.9427198322139436, -0.3139083318255167, 0.0))} + # + # p_2_xyz = {'symbols': ('C', 'C', 'C', 'Cl', 'Cl', 'H', 'H', 'H', 'H', 'H'), + # 'isotopes': (12, 12, 12, 35, 35, 1, 1, 1, 1, 1), + # 'coords': ((-1.3496376883278178, -0.020445981649800302, -0.1995184115269273), + # (-0.051149096449292386, -0.3885500107837139, 0.4222976979623008), + # (1.217696701041357, 0.15947991928242372, -0.1242718714010236), + # (1.7092794464102241, 1.570982412202936, 0.8295196720275746), + # (2.474584210365428, -1.0919019396606517, -0.06869614478411318), + # (-1.6045061896547035, 1.0179450876989615, 0.03024632893682861), + # (-1.3137314500783486, -0.14754777860704252, -1.2853589013330937), + # (-2.1459595425475264, -0.6625965540242661, 0.188478021031359), + # (-0.044412318929613885, -0.9093853981117669, 1.373599947353138), + # (1.1078359281702537, 0.47202024365290884, -1.1662963382659064))} + # + # r_1 = ARCSpecies(label='r1', smiles=smiles[0], xyz=r_1_xyz) + # r_2 = ARCSpecies(label='r2', smiles=smiles[1], xyz=r_2_xyz) + # p_1 = ARCSpecies(label='p1', smiles=smiles[2],xyz=p_1_xyz) + # p_2 = ARCSpecies(label='p2', smiles=smiles[3],xyz=p_2_xyz) + # + # rxn1 = ARCReaction(r_species=[r_1, r_2], p_species=[p_1, p_2]) + # atom_map = rxn1.atom_map + # # expected: [0, 2, 3, 4, 1, 5, [6, 7], [6, 7], [8, 9, 10], [8, 9, 10], [8, 9, 10], 11, 12] + # self.assertEqual(atom_map[:6], [0,2,3,4,1,5]) + # self.assertIn(atom_map[6],[6,7]) + # self.assertIn(atom_map[7], [6, 7]) + # self.assertIn(atom_map[8], [8,9,10]) + # self.assertIn(atom_map[9], [8,9,10]) + # self.assertIn(atom_map[10], [8,9,10]) + # self.assertEqual(atom_map[11],11) + # self.assertEqual(atom_map[12], 12) + # self.assertTrue(check_atom_map(rxn)) + # + # # Br abstraction + # + # # OH + CH3Br <=> HOBr + CH3 + # r_1_xyz = {'symbols': ('O', 'H'), 'isotopes': (16, 1), + # 'coords': ((0.48890386738601, 0.0, 0.0), (-0.48890386738601, 0.0, 0.0))} + # + # r_2_xyz = {'symbols': ('C', 'Br', 'H', 'H', 'H'), 'isotopes': (12, 79, 1, 1, 1), 'coords': ( + # (-0.18386469024502916, -0.0018692264481234688, 0.0013619971891954718), + # (1.7508998155803106, 0.017800204658373744, -0.01296995950979447), + # (-0.5218757573028803, -0.6458197160504338, -0.8118262063895171), + # (-0.5338693855859405, 1.0212985296781085, -0.14294057406667127), + # (-0.5112899824464621, -0.3914097918379277, 0.9663747427767874))} + # + # p_1_xyz = {'symbols': ('O', 'Br', 'H'), 'isotopes': (16, 79, 1), 'coords': ( + # (-0.3691040522383542, 0.44403140947953346, 0.0), (1.3490312999095744, -0.1319682267704319, 0.0), + # (-0.9799272476712202, -0.31206318270910166, 0.0))} + # + # p_2_xyz = {'symbols': ('C', 'H', 'H', 'H'), 'isotopes': (12, 1, 1, 1), 'coords': ( + # (3.3746019998564553e-09, 5.828827384106545e-09, -4.859105107686622e-09), + # (1.0669051052331406, -0.17519582095514982, 0.05416492980439295), + # (-0.6853171627400634, -0.8375353626879753, -0.028085652887100996), + # (-0.3815879458676787, 1.0127311778142964, -0.026079272058187608))} + # + # r_1 = ARCSpecies(label='r1', smiles='[O][H]', xyz=r_1_xyz) + # r_2 = ARCSpecies(label='r2', smiles='[CH3]Br', xyz=r_2_xyz) + # p_1 = ARCSpecies(label='p1', smiles='OBr', xyz=p_1_xyz) + # p_2 = ARCSpecies(label='p2', smiles='[CH3]', xyz=p_2_xyz) + # + # rxn1 = ARCReaction(r_species=[r_1, r_2], p_species=[p_1, p_2]) + # atom_map = rxn1.atom_map + # self.assertEqual(atom_map[:4],[0,2,3,1]) + # self.assertIn(atom_map[4], [4,5,6]) + # self.assertIn(atom_map[5], [4, 5, 6]) + # self.assertIn(atom_map[6], [4, 5, 6]) + # self.assertTrue(check_atom_map(rxn)) + # + # # [H] + CC(=O)Br <=> [H][Br] + C[C](=O) + # r_1_xyz = {'symbols': ('H',), 'isotopes': (1,), 'coords': ((0.0, 0.0, 0.0),)} + # + # r_2_xyz = {'symbols': ('C', 'C', 'O', 'Br', 'H', 'H', 'H'), 'isotopes': (12, 12, 16, 79, 1, 1, 1), + # 'coords': ((-0.7087772076387326, -0.08697184565826255, 0.08295914062572969), + # (0.7238141593293749, 0.2762480677183181, -0.14965326856248656), + # (1.1113560248255752, 1.3624373452907719, -0.554840372311578), + # (2.0636725443687616, -1.041297021241265, 0.20693447296577364), + # (-0.9844931733249197, -0.9305935329026733, -0.5546432084044857), + # (-0.8586221633621384, -0.3455305862905263, 1.134123935245044), + # (-1.3469501841979155, 0.7657075730836449, -0.16488069955797996))} + # + # p_1_xyz = {'symbols': ('C', 'C', 'O', 'H', 'H', 'H'), 'isotopes': (12, 12, 16, 1, 1, 1), + # 'coords': ((-0.4758624005470258, 0.015865899777425058, -0.11215987340300927), + # (0.9456990856850401, -0.031530842469194666, 0.2228995599390481), + # (2.0897646616994816, -0.06967555524967288, 0.492553667108967), + # (-1.08983188764878, -0.06771143046366379, 0.7892594299969324), + # (-0.7261604551815313, 0.9578749227991876, -0.6086176800339509), + # (-0.7436090040071672, -0.8048229943940851, -0.7839351036079769))} + # + # p_2_xyz = {'symbols': ('Br', 'H'), 'isotopes': (79, 1), + # 'coords': ((0.7644788559644482, 0.0, 0.0), (-0.7644788559644482, 0.0, 0.0))} + # + # r_1 = ARCSpecies(label='r1', smiles='[H]', xyz=r_1_xyz) + # r_2 = ARCSpecies(label='r2', smiles='CC(=O)Br', xyz=r_2_xyz) + # p_1 = ARCSpecies(label='p1', smiles='C[C](=O)', xyz=p_1_xyz) + # p_2 = ARCSpecies(label='p2', smiles='[Br][H]', xyz=p_2_xyz) + # + # rxn = ARCReaction(r_species=[r_1, r_2], p_species=[p_2, p_1]) + # atom_map=rxn.atom_map + # self.assertEqual(atom_map[:5], [1, 2, 3, 4, 0]) + # self.assertIn(tuple(atom_map[5:]), permutations([5, 6, 7])) + # self.assertTrue(check_atom_map(rxn)) + # + # #Change Order [H] + CC(=O)Br <=> C[C](=O) + [H][Br] + # r_1 = ARCSpecies(label='r1', smiles='[H]', xyz=r_1_xyz) + # r_2 = ARCSpecies(label='r2', smiles='CC(=O)Br', xyz=r_2_xyz) + # p_1 = ARCSpecies(label='p1', smiles='C[C](=O)', xyz=p_1_xyz) + # p_2 = ARCSpecies(label='p2', smiles='[H][Br]', xyz=p_2_xyz) + # + # rxn = ARCReaction(r_species=[r_1, r_2], p_species=[p_1, p_2]) + # atom_map=rxn.atom_map + # self.assertEqual(atom_map[:5], [7, 0, 1, 2, 6]) + # self.assertIn(tuple(atom_map[5:]), list(permutations([3, 4, 5]))) + # self.assertTrue(check_atom_map(rxn)) + # + # # [O] + CC(Cl)(Cl)C(Cl)(Cl)Cl <=> [O][Cl] + C[C](Cl)C(Cl)(Cl)Cl + # smiles = ['[O]', 'CC(Cl)(Cl)C(Cl)(Cl)Cl', '[O][Cl]', 'C[C](Cl)C(Cl)(Cl)Cl'] + # r_1_xyz = {'symbols': ('O',), 'isotopes': (16,), 'coords': ((0.0, 0.0, 0.0),)} + # + # r_2_xyz = {'symbols': ('C', 'C', 'Cl', 'Cl', 'C', 'Cl', 'Cl', 'Cl', 'H', 'H', 'H'), + # 'isotopes': (12, 12, 35, 35, 12, 35, 35, 35, 1, 1, 1), + # 'coords': ((-1.3340513332954889, 0.2811635614535751, -0.078045907046801), + # (-0.06460593375936133, -0.5810773314093911, -0.02962891425941322), + # (-0.2609310384494481, -1.7354943987581986, 1.3623405448734305), + # (-0.06523629769352735, -1.6097818007913829, -1.5298182298699716), + # (1.2568349080206898, 0.251354210359208, 0.09596787533379413), + # (2.7373740437547514, -0.7858820942054363, 0.1510602855327231), + # (1.4729373085674606, 1.396702908938121, -1.2920641361183987), + # (1.2776463867390788, 1.2712465700052025, 1.5941477468638563), + # (-1.3327512075949484, 0.9633461541030465, -0.9346702675682734), + # (-2.235286345856216, -0.338363905821591, -0.1659562352150731), + # (-1.45193049043298, 0.886786126126846, 0.8266672374741411))} + # + # p_1_xyz = {'symbols': ('O', 'Cl'), 'isotopes': (16, 35), + # 'coords': ((0.8407400963991551, 0.0, 0.0), (-0.8407400963991551, 0.0, 0.0))} + # + # p_2_xyz = {'symbols': ('C', 'C', 'Cl', 'C', 'Cl', 'Cl', 'Cl', 'H', 'H', 'H'), + # 'isotopes': (12, 12, 35, 12, 35, 35, 35, 1, 1, 1), + # 'coords': ((-1.3826664358998055, -0.04852445131046896, -0.016935550260331302), + # (-0.01984344739858957, 0.5351447284412386, 0.14069644461529232), + # (0.06780252918727915, 2.0178457939896477, 1.0316373428560468), + # (1.240695333262242, -0.22627953918952265, -0.15010504208991474), + # (2.5003017492701316, 0.8385176202279041, -0.8511606324628386), + # (1.8619474142609682, -0.9616513146239644, 1.3591396432655138), + # (0.9630230000989414, -1.5484613928720057, -1.3347069863893728), + # (-1.4535219021739985, -1.0095075283181074, 0.502205010423143), + # (-2.1607091682952886, 0.6031752006499635, 0.39420249485619346), + # (-1.6170290723118037, -0.20025911699469934, -1.0749727248137075))} + # + # r_1 = ARCSpecies(label='r1', smiles=smiles[0], xyz=r_1_xyz) + # r_2 = ARCSpecies(label='r2', smiles=smiles[1], xyz=r_2_xyz) + # p_1 = ARCSpecies(label='p1', smiles=smiles[2], xyz=p_1_xyz) + # p_2 = ARCSpecies(label='p2', smiles=smiles[3], xyz=p_2_xyz) + # + # rxn1 = ARCReaction(r_species=[r_1, r_2], p_species=[p_1, p_2]) + # atom_map = rxn1.atom_map + # self.assertEqual(atom_map[:3],[0,2,3]) + # self.assertIn(atom_map[3:5],[[1,4],[4,1]]) + # self.assertEqual(atom_map[5],5) + # self.assertIn(atom_map[6], [6,7,8]) + # self.assertIn(atom_map[7], [6, 7, 8]) + # self.assertIn(atom_map[8], [6, 7, 8]) + # self.assertIn(atom_map[9], [9, 10, 11]) + # self.assertIn(atom_map[10], [9, 10, 11]) + # self.assertIn(atom_map[11], [9, 10, 11]) + # self.assertTrue(check_atom_map(rxn1)) def test_map_ho2_elimination_from_peroxy_radical(self): """Test the map_ho2_elimination_from_peroxy_radical() function.""" diff --git a/arc/mapping/engine.py b/arc/mapping/engine.py index 3f32dbaf14..ba4b295f6c 100644 --- a/arc/mapping/engine.py +++ b/arc/mapping/engine.py @@ -834,20 +834,20 @@ def flip_map(atom_map: Optional[List[int]]) -> Optional[List[int]]: def make_bond_changes(rxn: 'ARCReaction', r_cuts: List[ARCSpecies], - r_label_dict: Dict, + r_label_dict: dict, ) -> None: """ Makes the bond change before matching the reactants and products Ags: - rxn: An ARCReaction object - r_cuts: the cut products - r_label_dict: the dictionary object the find the relevant location. + rxn ('ARCReaction'): An ARCReaction object + r_cuts (List[ARCSpecies]): the cut products + r_label_dict (dict): the dictionary object the find the relevant location. """ family = ReactionFamily(label=rxn.family) for action in family.actions: if action[0].lower() == "change_bond": - indices = r_label_dict[action[1]],r_label_dict[action[3]] + indices = r_label_dict[action[1]], r_label_dict[action[3]] for r_cut in r_cuts: if indices[0] in [int(atom.label) for atom in r_cut.mol.atoms] and indices[1] in [int(atom.label) for atom in r_cut.mol.atoms]: atom1, atom2 = 0, 0 @@ -891,7 +891,17 @@ def update_xyz(species: List[ARCSpecies]) -> List[ARCSpecies]: new = list() for spc in species: new_spc = ARCSpecies(label="copy", mol=spc.mol.copy(deep=True)) - new_spc.final_xyz = new_spc.get_xyz() or spc.get_xyz() + print(f'mol copies: {new_spc.mol.copy(deep=True).to_smiles()}, {spc.mol.copy(deep=True).to_smiles()}') + xyz_1, xyz_2 = None, None + try: + xyz_1 = new_spc.get_xyz() + except: + pass + try: + xyz_2 = spc.get_xyz() + except: + pass + new_spc.final_xyz = xyz_1 or xyz_2 new.append(new_spc) return new @@ -1123,7 +1133,6 @@ def translate_indices_based_on_ref_species(species: List["ARCSpecies"], visited_ref_species.append(j) species_map[j] = i index_map[j] = map_two_species(ref_spc, spc) - print(f'Found a match between species {i} and ref species {j}, map: {index_map[j]}') break ref_spcs_lengths = [ref_spc.number_of_atoms for ref_spc in ref_species] accum_sum_ref_spcs_lengths = [sum(ref_spcs_lengths[:i+1]) for i in range(len(ref_spcs_lengths))] @@ -1185,22 +1194,16 @@ def find_all_breaking_bonds(rxn: "ARCReaction", List[Tuple[int, int]]: Entries are tuples of the form (atom_index1, atom_index2) for each broken bond (1-indexed), representing the atom indices to be cut. """ - print(f'*** rxn.family: {rxn.family}') # todo: product bdes are not being translated correctly family = ReactionFamily(label=rxn.family) product_dicts = get_reaction_family_products(rxn=rxn, rmg_family_set=[rxn.family]) - print(f'*** product_dicts: {product_dicts}') label_dict = product_dicts[0]['r_label_map'] if r_direction else product_dicts[0]['p_label_map'] - print(f'Label dict: {label_dict}') breaking_bonds = list() for action in family.actions: if action[0].lower() == ("break_bond" if r_direction else "form_bond"): breaking_bonds.append(tuple(sorted((label_dict[action[1]], label_dict[action[3]])))) - print(f'appended {breaking_bonds[-1]}') - print(f'original Breaking bonds: {breaking_bonds}') if not r_direction: breaking_bonds = translate_bdes_based_on_ref_species( species=rxn.get_reactants_and_products()[1], ref_species=[ARCSpecies(label=f'S{i}', mol=mol) for i, mol in enumerate(product_dicts[0]['products'])], bdes=breaking_bonds) - print(f'translated Breaking bonds: {breaking_bonds}') return breaking_bonds diff --git a/arc/mapping/engine_test.py b/arc/mapping/engine_test.py index 6fa7d05e37..955cebb7e6 100644 --- a/arc/mapping/engine_test.py +++ b/arc/mapping/engine_test.py @@ -805,11 +805,11 @@ def test_identify_superimposable_candidates(self): def test_are_adj_elements_in_agreement(self): """Test the are_adj_elements_in_agreement() function.""" self.assertFalse(are_adj_elements_in_agreement({'self': 'C', 'C': [3]}, - {'self': 'C', 'C': [0, 3], 'H': [2]})) + {'self': 'C', 'C': [0, 3], 'H': [2]})) self.assertFalse(are_adj_elements_in_agreement({'self': 'C', 'C': [3]}, - {'self': 'C', 'O': [3]})) + {'self': 'C', 'O': [3]})) self.assertFalse(are_adj_elements_in_agreement({'self': 'C', 'C': [3]}, - {'self': 'O', 'C': [3]})) + {'self': 'O', 'C': [3]})) self.assertTrue(are_adj_elements_in_agreement({'self': 'C', 'C': [3]}, {'self': 'C', 'C': [2]})) self.assertTrue(are_adj_elements_in_agreement({'self': 'C', 'C': [1], 'O': [4], 'H': [8]}, @@ -845,10 +845,10 @@ def test_prune_identical_dicts(self): new_dicts_list = prune_identical_dicts([{0: 0}, {0: 0}, {0: 0}, {0: 1}]) self.assertEqual(new_dicts_list, [{0: 0}, {0: 1}]) new_dicts_list = prune_identical_dicts([{0: 0, 3: 1, 4: 2, 5: 3, 6: 4}, - {0: 0, 3: 1, 4: 2, 5: 3, 6: 4}, - {0: 0, 3: 1, 4: 2, 5: 3, 6: 4}, - {0: 0, 3: 1, 4: 2, 5: 3, 6: 4}, - {0: 0, 3: 1, 4: 2, 5: 3, 6: 4}]) + {0: 0, 3: 1, 4: 2, 5: 3, 6: 4}, + {0: 0, 3: 1, 4: 2, 5: 3, 6: 4}, + {0: 0, 3: 1, 4: 2, 5: 3, 6: 4}, + {0: 0, 3: 1, 4: 2, 5: 3, 6: 4}]) self.assertEqual(new_dicts_list, [{0: 0, 3: 1, 4: 2, 5: 3, 6: 4}]) def test_remove_gaps_from_values(self): @@ -996,25 +996,23 @@ def test_flip_map(self): def test_make_bond_changes(self): """Test the make_bond_changes() function""" - # spc1 = ARCSpecies(label="Test_bc", smiles="[CH2][CH2]") - # spc2 = ARCSpecies(label="Test_bc", smiles="C=C") - # label_species_atoms([spc1]) - # label_species_atoms([spc2]) - # rxn = ARCReaction(r_species = [spc1], p_species=[spc2]) - # r_label_dict = {'*1': 0, '*2': 1} - # l = [spc1] - # make_bond_changes(rxn, l, r_label_dict) - # self.assertTrue(spc2.mol.is_isomorphic(l[0].mol)) + spc1 = ARCSpecies(label="Test_bc", smiles="[CH2][CH2]") + spc2 = ARCSpecies(label="Test_bc", smiles="C=C") + label_species_atoms([spc1]), label_species_atoms([spc2]) + rxn = ARCReaction(r_species = [spc1], p_species=[spc2]) + self.assertEqual(rxn.family, '1,2-Birad_to_alkene') + r_label_dict = {'*1': 0, '*2': 1} + l = [spc1] + make_bond_changes(rxn, l, r_label_dict) + self.assertTrue(spc2.mol.is_isomorphic(l[0].mol)) rxn = ARCReaction(r_species=[ARCSpecies(label="N4", smiles="NNNN")], p_species=[ARCSpecies(label="NH3", smiles="N"), ARCSpecies(label="NH2NHN_p", smiles="[N-]=[NH+]N")]) rxn.family = '1,2_NH3_elimination' - reactants, products = copy_species_list_for_mapping(rxn.r_species), copy_species_list_for_mapping(rxn.p_species) - label_species_atoms(reactants), label_species_atoms(products) - print(f'\n\nfamily: {rxn.family}') - r_bdes, _ = find_all_breaking_bonds(rxn, True), find_all_breaking_bonds(rxn, False) - r_cuts = cut_species_based_on_atom_indices(reactants, r_bdes) + label_species_atoms(rxn.r_species), label_species_atoms(rxn.p_species) + r_bdes = find_all_breaking_bonds(rxn, r_direction=True) + r_cuts = cut_species_based_on_atom_indices(rxn.r_species, r_bdes) self.assertFalse(r_cuts[1].mol.is_isomorphic(rxn.p_species[1].mol)) make_bond_changes(rxn=rxn, r_cuts=r_cuts, @@ -1023,29 +1021,13 @@ def test_make_bond_changes(self): def test_update_xyz(self): """tests the update_xyz() function""" - spc = ARCSpecies(label="test_UX",smiles = "BrC(F)Cl") - shuffle(spc.mol.atoms) - update_xyz([spc]) - xyz = spc.get_xyz()["symbols"] - atoms = [atom.element.symbol for atom in spc.mol.atoms] - for label1, label2 in zip(atoms, xyz): - self.assertEqual(label1, label2) - - spc = ARCSpecies(label="test_UX", smiles = "OCl") - shuffle(spc.mol.atoms) - update_xyz([spc]) - xyz = spc.get_xyz()["symbols"] - atoms = [atom.element.symbol for atom in spc.mol.atoms] - for label1, label2 in zip(atoms, xyz): - self.assertEqual(label1, label2) - - spc = ARCSpecies(label="test_UX", smiles = "BrOCl") - shuffle(spc.mol.atoms) - update_xyz([spc]) - xyz = spc.get_xyz()["symbols"] - atoms = [atom.element.symbol for atom in spc.mol.atoms] - for label1,label2 in zip(atoms, xyz): - self.assertEqual(label1, label2) + for smiles in ['BrC(F)Cl', 'OCl', 'BrOCl']: + spc = ARCSpecies(label="test_update_xyz", smiles=smiles) + new_spc = update_xyz([spc])[0] + xyz = new_spc.get_xyz()["symbols"] + atoms = [atom.element.symbol for atom in new_spc.mol.atoms] + for label1, label2 in zip(atoms, xyz): + self.assertEqual(label1, label2) def test_r_cut_p_cut_isomorphic(self): """Test the r_cut_p_cut_isomorphic() function""" @@ -1121,10 +1103,8 @@ def test_glue_maps(self): maps = map_pairs(pairs_of_reactant_and_products) atom_map = glue_maps(maps, pairs_of_reactant_and_products) self.assertEqual(len(atom_map), self.r_1.mol.get_num_atoms() + self.r_2.mol.get_num_atoms()) - atoms_r = [atom for atom in self.r_1.mol.copy(deep=True).atoms] + [atom for atom in - self.r_2.mol.copy(deep=True).atoms] - atoms_p = [atom for atom in self.p_1.mol.copy(deep=True).atoms] + [atom for atom in - self.p_2.mol.copy(deep=True).atoms] + atoms_r = [atom for atom in self.r_1.mol.copy(deep=True).atoms] + [atom for atom in self.r_2.mol.copy(deep=True).atoms] + atoms_p = [atom for atom in self.p_1.mol.copy(deep=True).atoms] + [atom for atom in self.p_2.mol.copy(deep=True).atoms] for index, value in enumerate(atom_map): self.assertEqual(atoms_r[index].symbol, atoms_p[value].symbol) @@ -1140,16 +1120,10 @@ def test_cut_species_based_on_atom_indices(self): reactants = copy_species_list_for_mapping(self.arc_reaction_2.r_species) products = copy_species_list_for_mapping(self.arc_reaction_2.p_species) label_species_atoms(reactants), label_species_atoms(products) - # product_dicts = get_reaction_family_products(self.arc_reaction_2) - # product_dict = product_dicts[0] r_bdes = find_all_breaking_bonds(rxn=self.arc_reaction_2, r_direction=True) - print(f'product atoms of rxn2: 0: {self.arc_reaction_2.p_species[0].mol.atoms}, 1: {self.arc_reaction_2.p_species[1].mol.atoms}') p_bdes = find_all_breaking_bonds(rxn=self.arc_reaction_2, r_direction=False) - print(f'r_bdes: {r_bdes}, p_bdes: {p_bdes}') - print(f'p atoms: 0: {products[0].mol.atoms}, 1: {products[1].mol.atoms}') r_cuts = cut_species_based_on_atom_indices(reactants, r_bdes) p_cuts = cut_species_based_on_atom_indices(products, p_bdes) - print(f'r_cuts: {[r.copy().mol.to_smiles() for r in r_cuts]}, p_cuts: {[r.copy().mol.to_smiles() for r in p_cuts]}') self.assertIn("[CH2]CC", [r_cut.mol.copy(deep=True).smiles for r_cut in r_cuts]) self.assertIn("[H]", [r_cut.mol.copy(deep=True).smiles for r_cut in r_cuts]) self.assertIn("[NH2]", [r_cut.mol.copy(deep=True).smiles for r_cut in r_cuts]) @@ -1159,7 +1133,7 @@ def test_cut_species_based_on_atom_indices(self): spc = ARCSpecies(label="test", smiles="CNC") for i, a in enumerate(spc.mol.atoms): - a.label=str(i) + a.label = str(i) cuts = cut_species_based_on_atom_indices([spc], [(0, 1), (1, 2)]) self.assertEqual(len(cuts), 3) for cut in cuts: @@ -1168,56 +1142,57 @@ def test_cut_species_based_on_atom_indices(self): h2 = ARCSpecies(label="H2", smiles="[H][H]") label_species_atoms([h2]) - cuts = cut_species_based_on_atom_indices([h2], [(0, 1)]) self.assertEqual(len(cuts), 2) for cut in cuts: self.assertEqual(cut.get_xyz()["symbols"], ('H',)) - spcs = [ARCSpecies(label="r", smiles = 'O=C(O)CCF', - xyz={'symbols': ('O', 'C', 'O', 'C', 'C', 'F', 'H', 'H', 'H', 'H', 'H'), - 'isotopes': (16, 12, 16, 12, 12, 19, 1, 1, 1, 1, 1), - 'coords': ((1.1567401394807186, 0.3292352797648203, -1.4438840497688), - (1.079794258193769, 0.18837439525248412, -0.23580436209026492), - (2.184999596455945, -0.02862572694090521, 0.5028447307908376), - (-0.15828046358747527, 0.22516852199733842, 0.6026621014486843), - (-1.3729926295596129, 0.4703126054291076, -0.26280242788844443), - (-2.4751613391679754, 0.4966265394383556, 0.5356539921809551), - (2.9199184814309573, -0.03170785916941691, -0.14635137063038064), - (-0.26755118310691867, -0.7279235396445063, 1.1300570091365727), - (-0.0686732659284988, 1.0230262342082468, 1.3468825326130487), - (-1.313896668558704, 1.4294967806185437, -0.7865558706137695), - (-1.5129630024826421, -0.32309516060704263, -1.0035834767256502))})] - label_species_atoms(spcs) - cuts = cut_species_based_on_atom_indices(spcs, [(1, 2), (3, 4), (4, 10)]) + spc = [ARCSpecies(label="r", smiles='O=C(O)CCF', + xyz={'symbols': ('O', 'C', 'O', 'C', 'C', 'F', 'H', 'H', 'H', 'H', 'H'), + 'isotopes': (16, 12, 16, 12, 12, 19, 1, 1, 1, 1, 1), + 'coords': ((1.1567401394807186, 0.3292352797648203, -1.4438840497688), + (1.079794258193769, 0.18837439525248412, -0.23580436209026492), + (2.184999596455945, -0.02862572694090521, 0.5028447307908376), + (-0.15828046358747527, 0.22516852199733842, 0.6026621014486843), + (-1.3729926295596129, 0.4703126054291076, -0.26280242788844443), + (-2.4751613391679754, 0.4966265394383556, 0.5356539921809551), + (2.9199184814309573, -0.03170785916941691, -0.14635137063038064), + (-0.26755118310691867, -0.7279235396445063, 1.1300570091365727), + (-0.0686732659284988, 1.0230262342082468, 1.3468825326130487), + (-1.313896668558704, 1.4294967806185437, -0.7865558706137695), + (-1.5129630024826421, -0.32309516060704263, -1.0035834767256502))})] + label_species_atoms(spc) + cuts = cut_species_based_on_atom_indices(spc, [(1, 2), (3, 4), (4, 10)]) self.assertEqual(len(cuts), 4) - spcs = [ARCSpecies(label="ring", smiles = 'CC1CCOCC1[N+](=O)([O-])', - xyz={'symbols': ('C', 'C', 'C', 'C', 'O', 'C', 'C', 'N', 'O', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'), - 'isotopes': (12, 12, 12, 12, 16, 12, 12, 14, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), - 'coords': ((-0.19963202410071415, 2.11431236883385, 0.35512152305302197), - (-0.21499972296989353, 0.5855566636041595, 0.4252779409474618), - (-1.5279020878978917, 0.042997223439599976, -0.16475709380965647), - (-1.5142756264743436, -1.4801435389166957, -0.23348399770872003), - (-0.39427404620551165, -1.9345051360499914, -0.9947303725939133), - (0.8504594323376896, -1.5617817457794443, -0.3915176374284775), - (0.9727667641076266, -0.04585137142160857, -0.3203888579911006), - (2.266478114387414, 0.2730161804968195, 0.3742376225644008), - (2.395228629259047, -0.08871684931314952, 1.5513228364332547), - (3.1195221496216616, 0.8781087874071717, -0.2865464895135631), - (-1.0748919586201595, 2.532876649227796, 0.8634243187970725), - (-0.2125798569006862, 2.465012686844741, -0.6824090093235058), - (0.6909911289491314, 2.5252788705772518, 0.8408874242217174), - (-0.18391181140032295, 0.2986578719375753, 1.4853677298816432), - (-1.6551845431159757, 0.44049461022166125, -1.1800168508998137), - (-2.3833437995959468, 0.3815988059611917, 0.4305386229780667), - (-1.4830042838432955, -1.925925263188267, 0.7675312553931687), - (-2.4218853597129604, -1.8388555520124514, -0.7290646661080117), - (0.9188046169404014, -2.040202832179712, 0.5936529337480443), - (1.6528379049665831, -1.9828525321277448, -1.0079109263010095), - (1.0337508478988304, 0.36617104486181684, -1.3359287392782864))})] - label_species_atoms(spcs) - cuts = cut_species_based_on_atom_indices(spcs, [(1, 2), (4, 5)]) + spc = [ARCSpecies(label="ring", smiles='CC1CCOCC1[N+](=O)([O-])', + xyz={'symbols': ( + 'C', 'C', 'C', 'C', 'O', 'C', 'C', 'N', 'O', 'O', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', + 'H', 'H'), + 'isotopes': (12, 12, 12, 12, 16, 12, 12, 14, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), + 'coords': ((-0.19963202410071415, 2.11431236883385, 0.35512152305302197), + (-0.21499972296989353, 0.5855566636041595, 0.4252779409474618), + (-1.5279020878978917, 0.042997223439599976, -0.16475709380965647), + (-1.5142756264743436, -1.4801435389166957, -0.23348399770872003), + (-0.39427404620551165, -1.9345051360499914, -0.9947303725939133), + (0.8504594323376896, -1.5617817457794443, -0.3915176374284775), + (0.9727667641076266, -0.04585137142160857, -0.3203888579911006), + (2.266478114387414, 0.2730161804968195, 0.3742376225644008), + (2.395228629259047, -0.08871684931314952, 1.5513228364332547), + (3.1195221496216616, 0.8781087874071717, -0.2865464895135631), + (-1.0748919586201595, 2.532876649227796, 0.8634243187970725), + (-0.2125798569006862, 2.465012686844741, -0.6824090093235058), + (0.6909911289491314, 2.5252788705772518, 0.8408874242217174), + (-0.18391181140032295, 0.2986578719375753, 1.4853677298816432), + (-1.6551845431159757, 0.44049461022166125, -1.1800168508998137), + (-2.3833437995959468, 0.3815988059611917, 0.4305386229780667), + (-1.4830042838432955, -1.925925263188267, 0.7675312553931687), + (-2.4218853597129604, -1.8388555520124514, -0.7290646661080117), + (0.9188046169404014, -2.040202832179712, 0.5936529337480443), + (1.6528379049665831, -1.9828525321277448, -1.0079109263010095), + (1.0337508478988304, 0.36617104486181684, -1.3359287392782864))})] + label_species_atoms(spc) + cuts = cut_species_based_on_atom_indices(spc, [(1, 2), (4, 5)]) self.assertEqual(len(cuts), 2) def test_translate_bdes_based_on_ref_species(self): @@ -1284,9 +1259,7 @@ def test_find_all_breaking_bonds(self): bdes = find_all_breaking_bonds(rxn=self.arc_reaction_2, r_direction=True) self.assertEqual(bdes, [(0, 3)]) bdes = find_all_breaking_bonds(rxn=self.arc_reaction_2, r_direction=False) - print(f'p atoms: 0: {self.arc_reaction_2.p_species[0].mol.atoms}, 1: {self.arc_reaction_2.p_species[1].mol.atoms}') # todo: fix product BDE self.assertEqual(bdes, [(10, 13)]) - raise bdes = find_all_breaking_bonds(rxn=self.arc_reaction_3, r_direction=False) self.assertEqual(bdes, [(0, 1)]) diff --git a/arc/species/species.py b/arc/species/species.py index 714a1ae666..4f11b2b23f 100644 --- a/arc/species/species.py +++ b/arc/species/species.py @@ -1141,6 +1141,8 @@ def get_cheap_conformer(self): multiplicity=self.multiplicity)['xyz'] else: num_confs = min(500, max(50, len(self.mol.atoms) * 3)) + print(f'\n\n****************\n') + print(f'mol copy: {self.mol.copy(deep=True)}') rd_mol = conformers.embed_rdkit(label=self.label, mol=self.mol, num_confs=num_confs) xyzs, energies = conformers.rdkit_force_field(label=self.label, rd_mol=rd_mol,