This repository was archived by the owner on Dec 8, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 86
/
Copy pathrepresentations.py
722 lines (592 loc) · 28.4 KB
/
representations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
# MIT License
#
# Copyright (c) 2017-2018 Anders Steen Christensen, Lars Andersen Bratholm, Bing Huang
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from __future__ import print_function
import numpy as np
import itertools as itl
from .frepresentations import fgenerate_coulomb_matrix
from .frepresentations import fgenerate_unsorted_coulomb_matrix
from .frepresentations import fgenerate_local_coulomb_matrix
from .frepresentations import fgenerate_atomic_coulomb_matrix
from .frepresentations import fgenerate_eigenvalue_coulomb_matrix
from .frepresentations import fgenerate_bob
from qml.utils import NUCLEAR_CHARGE
from .slatm import get_boa
from .slatm import get_sbop
from .slatm import get_sbot
from .facsf import fgenerate_acsf, fgenerate_acsf_and_gradients
from .facsf import fgenerate_fchl_acsf, fgenerate_fchl_acsf_and_gradients
def vector_to_matrix(v):
""" Converts a representation from 1D vector to 2D square matrix.
:param v: 1D input representation.
:type v: numpy array
:return: Square matrix representation.
:rtype: numpy array
"""
if not (np.sqrt(8*v.shape[0]+1) == int(np.sqrt(8*v.shape[0]+1))):
print("ERROR: Can not make a square matrix.")
exit(1)
n = v.shape[0]
l = (-1 + int(np.sqrt(8*n+1)))//2
M = np.empty((l,l))
index = 0
for i in range(l):
for j in range(l):
if j > i:
continue
M[i,j] = v[index]
M[j,i] = M[i,j]
index += 1
return M
def generate_coulomb_matrix(nuclear_charges, coordinates, size = 23, sorting = "row-norm"):
""" Creates a Coulomb Matrix representation of a molecule.
Sorting of the elements can either be done by ``sorting="row-norm"`` or ``sorting="unsorted"``.
A matrix :math:`M` is constructed with elements
.. math::
M_{ij} =
\\begin{cases}
\\tfrac{1}{2} Z_{i}^{2.4} & \\text{if } i = j \\\\
\\frac{Z_{i}Z_{j}}{\\| {\\bf R}_{i} - {\\bf R}_{j}\\|} & \\text{if } i \\neq j
\\end{cases},
where :math:`i` and :math:`j` are atom indices, :math:`Z` is nuclear charge and
:math:`\\bf R` is the coordinate in euclidean space.
If ``sorting = 'row-norm'``, the atom indices are reordered such that
:math:`\\sum_j M_{1j}^2 \\geq \\sum_j M_{2j}^2 \\geq ... \\geq \\sum_j M_{nj}^2`
The upper triangular of M, including the diagonal, is concatenated to a 1D
vector representation.
If ``sorting = 'unsorted``, the elements are sorted in the same order as the input coordinates
and nuclear charges.
The representation is calculated using an OpenMP parallel Fortran routine.
:param nuclear_charges: Nuclear charges of the atoms in the molecule
:type nuclear_charges: numpy array
:param coordinates: 3D Coordinates of the atoms in the molecule
:type coordinates: numpy array
:param size: The size of the largest molecule supported by the representation
:type size: integer
:param sorting: How the atom indices are sorted ('row-norm', 'unsorted')
:type sorting: string
:return: 1D representation - shape (size(size+1)/2,)
:rtype: numpy array
"""
if (sorting == "row-norm"):
return fgenerate_coulomb_matrix(nuclear_charges, \
coordinates, size)
elif (sorting == "unsorted"):
return fgenerate_unsorted_coulomb_matrix(nuclear_charges, \
coordinates, size)
else:
print("ERROR: Unknown sorting scheme requested")
raise SystemExit
def generate_atomic_coulomb_matrix(nuclear_charges, coordinates, size = 23, sorting = "distance",
central_cutoff = 1e6, central_decay = -1, interaction_cutoff = 1e6, interaction_decay = -1,
indices = None):
""" Creates a Coulomb Matrix representation of the local environment of a central atom.
For each central atom :math:`k`, a matrix :math:`M` is constructed with elements
.. math::
M_{ij}(k) =
\\begin{cases}
\\tfrac{1}{2} Z_{i}^{2.4} \\cdot f_{ik}^2 & \\text{if } i = j \\\\
\\frac{Z_{i}Z_{j}}{\\| {\\bf R}_{i} - {\\bf R}_{j}\\|} \\cdot f_{ik}f_{jk}f_{ij} & \\text{if } i \\neq j
\\end{cases},
where :math:`i`, :math:`j` and :math:`k` are atom indices, :math:`Z` is nuclear charge and
:math:`\\bf R` is the coordinate in euclidean space.
:math:`f_{ij}` is a function that masks long range effects:
.. math::
f_{ij} =
\\begin{cases}
1 & \\text{if } \\|{\\bf R}_{i} - {\\bf R}_{j} \\| \\leq r - \Delta r \\\\
\\tfrac{1}{2} \\big(1 + \\cos\\big(\\pi \\tfrac{\\|{\\bf R}_{i} - {\\bf R}_{j} \\|
- r + \Delta r}{\Delta r} \\big)\\big)
& \\text{if } r - \Delta r < \\|{\\bf R}_{i} - {\\bf R}_{j} \\| \\leq r - \Delta r \\\\
0 & \\text{if } \\|{\\bf R}_{i} - {\\bf R}_{j} \\| > r
\\end{cases},
where the parameters ``central_cutoff`` and ``central_decay`` corresponds to the variables
:math:`r` and :math:`\Delta r` respectively for interactions involving the central atom,
and ``interaction_cutoff`` and ``interaction_decay`` corresponds to the variables
:math:`r` and :math:`\Delta r` respectively for interactions not involving the central atom.
if ``sorting = 'row-norm'``, the atom indices are ordered such that
:math:`\\sum_j M_{1j}(k)^2 \\geq \\sum_j M_{2j}(k)^2 \\geq ... \\geq \\sum_j M_{nj}(k)^2`
if ``sorting = 'distance'``, the atom indices are ordered such that
.. math::
\\|{\\bf R}_{1} - {\\bf R}_{k}\\| \\leq \\|{\\bf R}_{2} - {\\bf R}_{k}\\|
\\leq ... \\leq \\|{\\bf R}_{n} - {\\bf R}_{k}\\|
The upper triangular of M, including the diagonal, is concatenated to a 1D
vector representation.
The representation can be calculated for a subset by either specifying
``indices = [0,1,...]``, where :math:`[0,1,...]` are the requested atom indices,
or by specifying ``indices = 'C'`` to only calculate central carbon atoms.
The representation is calculated using an OpenMP parallel Fortran routine.
:param nuclear_charges: Nuclear charges of the atoms in the molecule
:type nuclear_charges: numpy array
:param coordinates: 3D Coordinates of the atoms in the molecule
:type coordinates: numpy array
:param size: The size of the largest molecule supported by the representation
:type size: integer
:param sorting: How the atom indices are sorted ('row-norm', 'distance')
:type sorting: string
:param central_cutoff: The distance from the central atom, where the coulomb interaction
element will be zero
:type central_cutoff: float
:param central_decay: The distance over which the the coulomb interaction decays from full to none
:type central_decay: float
:param interaction_cutoff: The distance between two non-central atom, where the coulomb interaction
element will be zero
:type interaction_cutoff: float
:param interaction_decay: The distance over which the the coulomb interaction decays from full to none
:type interaction_decay: float
:param indices: Subset indices or atomtype
:type indices: Nonetype/array/string
:return: nD representation - shape (:math:`N_{atoms}`, size(size+1)/2)
:rtype: numpy array
"""
if indices == None:
nindices = len(nuclear_charges)
indices = np.arange(1,1+nindices, 1, dtype = int)
elif type("") == type(indices):
if indices in NUCLEAR_CHARGE:
indices = np.where(nuclear_charges == NUCLEAR_CHARGE[indices])[0] + 1
nindices = indices.size
if nindices == 0:
return np.zeros((0,0))
else:
print("ERROR: Unknown value %s given for 'indices' variable" % indices)
raise SystemExit
else:
indices = np.asarray(indices, dtype = int) + 1
nindices = indices.size
if (sorting == "row-norm"):
return fgenerate_local_coulomb_matrix(indices, nindices, nuclear_charges,
coordinates, nuclear_charges.size, size,
central_cutoff, central_decay, interaction_cutoff, interaction_decay)
elif (sorting == "distance"):
return fgenerate_atomic_coulomb_matrix(indices, nindices, nuclear_charges,
coordinates, nuclear_charges.size, size,
central_cutoff, central_decay, interaction_cutoff, interaction_decay)
else:
print("ERROR: Unknown sorting scheme requested")
raise SystemExit
def generate_eigenvalue_coulomb_matrix(nuclear_charges, coordinates, size = 23):
""" Creates an eigenvalue Coulomb Matrix representation of a molecule.
A matrix :math:`M` is constructed with elements
.. math::
M_{ij} =
\\begin{cases}
\\tfrac{1}{2} Z_{i}^{2.4} & \\text{if } i = j \\\\
\\frac{Z_{i}Z_{j}}{\\| {\\bf R}_{i} - {\\bf R}_{j}\\|} & \\text{if } i \\neq j
\\end{cases},
where :math:`i` and :math:`j` are atom indices, :math:`Z` is nuclear charge and
:math:`\\bf R` is the coordinate in euclidean space.
The molecular representation of the molecule is then the sorted eigenvalues of M.
The representation is calculated using an OpenMP parallel Fortran routine.
:param nuclear_charges: Nuclear charges of the atoms in the molecule
:type nuclear_charges: numpy array
:param coordinates: 3D Coordinates of the atoms in the molecule
:type coordinates: numpy array
:param size: The size of the largest molecule supported by the representation
:type size: integer
:return: 1D representation - shape (size, )
:rtype: numpy array
"""
return fgenerate_eigenvalue_coulomb_matrix(nuclear_charges,
coordinates, size)
def generate_bob(nuclear_charges, coordinates, atomtypes, size=23, asize = {"O":3, "C":7, "N":3, "H":16, "S":1}):
""" Creates a Bag of Bonds (BOB) representation of a molecule.
The representation expands on the coulomb matrix representation.
For each element a bag (vector) is constructed for self interactions
(e.g. ('C', 'H', 'O')).
For each element pair a bag is constructed for interatomic interactions
(e.g. ('CC', 'CH', 'CO', 'HH', 'HO', 'OO')), sorted by value.
The self interaction of element :math:`I` is given by
:math:`\\tfrac{1}{2} Z_{I}^{2.4}`,
with :math:`Z_{i}` being the nuclear charge of element :math:`i`
The interaction between atom :math:`i` of element :math:`I` and
atom :math:`j` of element :math:`J` is given by
:math:`\\frac{Z_{I}Z_{J}}{\\| {\\bf R}_{i} - {\\bf R}_{j}\\|}`
with :math:`R_{i}` being the euclidean coordinate of atom :math:`i`.
The sorted bags are concatenated to an 1D vector representation.
The representation is calculated using an OpenMP parallel Fortran routine.
:param nuclear_charges: Nuclear charges of the atoms in the molecule
:type nuclear_charges: numpy array
:param coordinates: 3D Coordinates of the atoms in the molecule
:type coordinates: numpy array
:param size: The maximum number of atoms in the representation
:type size: integer
:param asize: The maximum number of atoms of each element type supported by the representation
:type asize: dictionary
:return: 1D representation
:rtype: numpy array
"""
n = 0
atoms = sorted(asize, key=asize.get)
nmax = [asize[key] for key in atoms]
ids = np.zeros(len(nmax), dtype=int)
for i, (key, value) in enumerate(zip(atoms,nmax)):
n += value * (1+value)
ids[i] = NUCLEAR_CHARGE[key]
for j in range(i):
v = nmax[j]
n += 2 * value * v
n /= 2
return fgenerate_bob(nuclear_charges, coordinates, nuclear_charges, ids, nmax, n)
def get_slatm_mbtypes(nuclear_charges, pbc='000'):
"""
Get the list of minimal types of many-body terms in a dataset. This resulting list
is necessary as input in the ``generate_slatm_representation()`` function.
:param nuclear_charges: A list of the nuclear charges for each compound in the dataset.
:type nuclear_charges: list of numpy arrays
:param pbc: periodic boundary condition along x,y,z direction, defaulted to '000', i.e., molecule
:type pbc: string
:return: A list containing the types of many-body terms.
:rtype: list
"""
zs = nuclear_charges
nm = len(zs)
zsmax = set()
nas = []
zs_ravel = []
for zsi in zs:
na = len(zsi); nas.append(na)
zsil = list(zsi); zs_ravel += zsil
zsmax.update( zsil )
zsmax = np.array( list(zsmax) )
nass = []
for i in range(nm):
zsi = np.array(zs[i],int)
nass.append( [ (zi == zsi).sum() for zi in zsmax ] )
nzmax = np.max(np.array(nass), axis=0)
nzmax_u = []
if pbc != '000':
# the PBC will introduce new many-body terms, so set
# nzmax to 3 if it's less than 3
for nzi in nzmax:
if nzi <= 2:
nzi = 3
nzmax_u.append(nzi)
nzmax = nzmax_u
boas = [ [zi,] for zi in zsmax ]
bops = [ [zi,zi] for zi in zsmax ] + list( itl.combinations(zsmax,2) )
bots = []
for i in zsmax:
for bop in bops:
j,k = bop
tas = [ [i,j,k], [i,k,j], [j,i,k] ]
for tasi in tas:
if (tasi not in bots) and (tasi[::-1] not in bots):
nzsi = [ (zj == tasi).sum() for zj in zsmax ]
if np.all(nzsi <= nzmax):
bots.append( tasi )
mbtypes = boas + bops + bots
return mbtypes #, np.array(zs_ravel), np.array(nas)
def generate_slatm(coordinates, nuclear_charges, mbtypes,
unit_cell=None, local=False, sigmas=[0.05,0.05], dgrids=[0.03,0.03],
rcut=4.8, alchemy=False, pbc='000', rpower=6):
"""
Generate Spectrum of London and Axillrod-Teller-Muto potential (SLATM) representation.
Both global (``local=False``) and local (``local=True``) SLATM are available.
A version that works for periodic boundary conditions will be released soon.
NOTE: You will need to run the ``get_slatm_mbtypes()`` function to get the ``mbtypes`` input (or generate it manually).
:param coordinates: Input coordinates
:type coordinates: numpy array
:param nuclear_charges: List of nuclear charges.
:type nuclear_charges: numpy array
:param mbtypes: Many-body types for the whole dataset, including 1-, 2- and 3-body types. Could be obtained by calling ``get_slatm_mbtypes()``.
:type mbtypes: list
:param local: Generate a local representation. Defaulted to False (i.e., global representation); otherwise, atomic version.
:type local: bool
:param sigmas: Controlling the width of Gaussian smearing function for 2- and 3-body parts, defaulted to [0.05,0.05], usually these do not need to be adjusted.
:type sigmas: list
:param dgrids: The interval between two sampled internuclear distances and angles, defaulted to [0.03,0.03], no need for change, compromised for speed and accuracy.
:type dgrids: list
:param rcut: Cut-off radius, defaulted to 4.8 Angstrom.
:type rcut: float
:param alchemy: Swith to use the alchemy version of SLATM. (default=False)
:type alchemy: bool
:param pbc: defaulted to '000', meaning it's a molecule; the three digits in the string corresponds to x,y,z direction
:type pbc: string
:param rpower: The power of R in 2-body potential, defaulted to London potential (=6).
:type rpower: float
:return: 1D SLATM representation
:rtype: numpy array
"""
c = unit_cell
iprt=False
if c is None:
c = np.array([[1,0,0],[0,1,0],[0,0,1]])
if pbc != '000':
# print(' -- handling systems with periodic boundary condition')
assert c != None, 'ERROR: Please specify unit cell for SLATM'
# =======================================================================
# PBC may introduce new many-body terms, so at the stage of get statistics
# info from db, we've already considered this point by letting maximal number
# of nuclear charges being 3.
# =======================================================================
zs = nuclear_charges
na = len(zs)
coords = coordinates
obj = [ zs, coords, c ]
iloc = local
if iloc:
mbs = []
X2Ns = []
for ia in range(na):
# if iprt: print ' -- ia = ', ia + 1
n1 = 0; n2 = 0; n3 = 0
mbs_ia = np.zeros(0)
icount = 0
for mbtype in mbtypes:
if len(mbtype) == 1:
mbsi = get_boa(mbtype[0], np.array([zs[ia],]))
#print ' -- mbsi = ', mbsi
if alchemy:
n1 = 1
n1_0 = mbs_ia.shape[0]
if n1_0 == 0:
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
elif n1_0 == 1:
mbs_ia += mbsi
else:
raise '#ERROR'
else:
n1 += len(mbsi)
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
elif len(mbtype) == 2:
#print ' 001, pbc = ', pbc
mbsi = get_sbop(mbtype, obj, iloc=iloc, ia=ia, \
sigma=sigmas[0], dgrid=dgrids[0], rcut=rcut, \
pbc=pbc, rpower=rpower)
mbsi *= 0.5 # only for the two-body parts, local rpst
#print ' 002'
if alchemy:
n2 = len(mbsi)
n2_0 = mbs_ia.shape[0]
if n2_0 == n1:
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
elif n2_0 == n1 + n2:
t = mbs_ia[n1:n1+n2] + mbsi
mbs_ia[n1:n1+n2] = t
else:
raise '#ERROR'
else:
n2 += len(mbsi)
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
else: # len(mbtype) == 3:
mbsi = get_sbot(mbtype, obj, iloc=iloc, ia=ia, \
sigma=sigmas[1], dgrid=dgrids[1], rcut=rcut, pbc=pbc)
if alchemy:
n3 = len(mbsi)
n3_0 = mbs_ia.shape[0]
if n3_0 == n1 + n2:
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
elif n3_0 == n1 + n2 + n3:
t = mbs_ia[n1+n2:n1+n2+n3] + mbsi
mbs_ia[n1+n2:n1+n2+n3] = t
else:
raise '#ERROR'
else:
n3 += len(mbsi)
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
mbs.append( mbs_ia )
X2N = [n1,n2,n3];
if X2N not in X2Ns:
X2Ns.append(X2N)
assert len(X2Ns) == 1, '#ERROR: multiple `X2N ???'
else:
n1 = 0; n2 = 0; n3 = 0
mbs = np.zeros(0)
for mbtype in mbtypes:
if len(mbtype) == 1:
mbsi = get_boa(mbtype[0], zs)
if alchemy:
n1 = 1
n1_0 = mbs.shape[0]
if n1_0 == 0:
mbs = np.concatenate( (mbs, [sum(mbsi)] ), axis=0 )
elif n1_0 == 1:
mbs += sum(mbsi )
else:
raise '#ERROR'
else:
n1 += len(mbsi)
mbs = np.concatenate( (mbs, mbsi), axis=0 )
elif len(mbtype) == 2:
mbsi = get_sbop(mbtype, obj, sigma=sigmas[0], \
dgrid=dgrids[0], rcut=rcut, rpower=rpower)
if alchemy:
n2 = len(mbsi)
n2_0 = mbs.shape[0]
if n2_0 == n1:
mbs = np.concatenate( (mbs, mbsi), axis=0 )
elif n2_0 == n1 + n2:
t = mbs[n1:n1+n2] + mbsi
mbs[n1:n1+n2] = t
else:
raise '#ERROR'
else:
n2 += len(mbsi)
mbs = np.concatenate( (mbs, mbsi), axis=0 )
else: # len(mbtype) == 3:
mbsi = get_sbot(mbtype, obj, sigma=sigmas[1], \
dgrid=dgrids[1], rcut=rcut)
if alchemy:
n3 = len(mbsi)
n3_0 = mbs.shape[0]
if n3_0 == n1 + n2:
mbs = np.concatenate( (mbs, mbsi), axis=0 )
elif n3_0 == n1 + n2 + n3:
t = mbs[n1+n2:n1+n2+n3] + mbsi
mbs[n1+n2:n1+n2+n3] = t
else:
raise '#ERROR'
else:
n3 += len(mbsi)
mbs = np.concatenate( (mbs, mbsi), axis=0 )
return mbs
def generate_acsf(nuclear_charges, coordinates, elements = [1,6,7,8,16], nRs2 = 3, nRs3 = 3, nTs = 3, eta2 = 1,
eta3 = 1, zeta = 1, rcut = 5, acut = 5, bin_min=0.8, gradients = False, pad=None):
"""
Generate the variant of atom-centered symmetry functions used in https://doi.org/10.1039/C7SC04934J
:param nuclear_charges: List of nuclear charges.
:type nuclear_charges: numpy array
:param coordinates: Input coordinates
:type coordinates: numpy array
:param elements: list of unique nuclear charges (atom types)
:type elements: numpy array
:param nRs2: Number of gaussian basis functions in the two-body terms
:type nRs2: integer
:param nRs3: Number of gaussian basis functions in the three-body radial part
:type nRs3: integer
:param nTs: Number of basis functions in the three-body angular part
:type nTs: integer
:param eta2: Precision in the gaussian basis functions in the two-body terms
:type eta2: float
:param eta3: Precision in the gaussian basis functions in the three-body radial part
:type eta3: float
:param zeta: Precision parameter of basis functions in the three-body angular part
:type zeta: float
:param rcut: Cut-off radius of the two-body terms
:type rcut: float
:param acut: Cut-off radius of the three-body terms
:type acut: float
:param bin_min: the value at which to start binning the distances
:type bin_min: positive float
:param gradients: To return gradients or not
:type gradients: boolean
:param pad: `None` if no padding is to be applied other, otherwise an integer corresponding to the desired size
:type gradients: NoneType or integer
:return: Atom-centered symmetry functions representation
:rtype: numpy array
"""
Rs2 = np.linspace(bin_min, rcut, nRs2)
Rs3 = np.linspace(bin_min, acut, nRs3)
Ts = np.linspace(0, np.pi, nTs)
n_elements = len(elements)
natoms = len(coordinates)
descr_size = n_elements * nRs2 + (n_elements * (n_elements + 1)) // 2 * nRs3*nTs
if gradients is False:
rep = fgenerate_acsf(coordinates, nuclear_charges, elements, Rs2, Rs3,
Ts, eta2, eta3, zeta, rcut, acut, natoms, descr_size)
if pad is not None:
rep_pad = np.zeros((pad, descr_size))
rep_pad[:natoms,:] += rep
return rep_pad
else:
return rep
else:
(rep, grad) = fgenerate_acsf_and_gradients(coordinates, nuclear_charges, elements, Rs2, Rs3,
Ts, eta2, eta3, zeta, rcut, acut, natoms, descr_size)
if pad is not None:
rep_pad = np.zeros((pad, descr_size))
grad_pad = np.zeros((pad, descr_size, pad, 3))
rep_pad[:natoms,:] += rep
grad_pad[:natoms,:,:natoms,:] += grad
return rep_pad, grad_pad
else:
return rep, grad
def generate_fchl_acsf(nuclear_charges, coordinates, elements = [1,6,7,8,16],
nRs2=24, nRs3=20, nFourier=1, eta2=0.32, eta3=2.7, zeta=np.pi, rcut=8.0, acut=8.0,
two_body_decay=1.8, three_body_decay=0.57, three_body_weight=13.4,
pad=False, gradients=False):
"""
FCHL-ACSF
Reasonable hyperparameters:
Sigma ~ 21.0
Lambda ~ 1e-8
Max singular value ~ 1e-12
:param nuclear_charges: List of nuclear charges.
:type nuclear_charges: numpy array
:param coordinates: Input coordinates
:type coordinates: numpy array
:param elements: list of unique nuclear charges (atom types)
:type elements: numpy array
:param nRs2: Number of gaussian basis functions in the two-body terms
:type nRs2: integer
:param nRs3: Number of gaussian basis functions in the three-body radial part
:type nRs3: integer
:param nFourier: Order of Fourier expansion
:type nFourier: integer
:param eta2: Precision in the gaussian basis functions in the two-body terms
:type eta2: float
:param eta3: Precision in the gaussian basis functions in the three-body radial part
:type eta3: float
:param zeta: Precision parameter of basis functions in the three-body angular part
:type zeta: float
:param rcut: Cut-off radius of the two-body terms
:type rcut: float
:param acut: Cut-off radius of the three-body terms
:type acut: float
:param gradients: To return gradients or not
:type gradients: boolean
:return: Atom-centered symmetry functions representation
:rtype: numpy array
"""
Rs2 = np.linspace(0, rcut, 1+nRs2)[1:]
Rs3 = np.linspace(0, acut, 1+nRs3)[1:]
Ts = np.linspace(0, np.pi, 2*nFourier)
n_elements = len(elements)
natoms = len(coordinates)
descr_size = n_elements * nRs2 + (n_elements * (n_elements + 1)) * nRs3* nFourier
# Normalization constant for three-body
three_body_weight = np.sqrt(eta3/np.pi) * three_body_weight
if gradients is False:
rep = fgenerate_fchl_acsf(coordinates, nuclear_charges, elements, Rs2, Rs3,
Ts, eta2, eta3, zeta, rcut, acut, natoms, descr_size,
two_body_decay, three_body_decay, three_body_weight)
if pad is not False:
rep_pad = np.zeros((pad, descr_size))
rep_pad[:natoms,:] += rep
return rep_pad
else:
return rep
else:
if (nFourier > 1):
print("Error: FCHL-ACSF only supports nFourier=1, requested", nfourier)
exit()
(rep, grad) = fgenerate_fchl_acsf_and_gradients(coordinates, nuclear_charges, elements, Rs2, Rs3,
Ts, eta2, eta3, zeta, rcut, acut, natoms, descr_size,
two_body_decay, three_body_decay, three_body_weight)
if pad is not False:
rep_pad = np.zeros((pad, descr_size))
grad_pad = np.zeros((pad, descr_size, pad, 3))
rep_pad[:natoms,:] += rep
grad_pad[:natoms,:,:natoms,:] += grad
return rep_pad, grad_pad
else:
return rep, grad