Skip to content

Commit 9641b2c

Browse files
committed
update rescu parse
1 parent af79620 commit 9641b2c

File tree

6 files changed

+258
-19
lines changed

6 files changed

+258
-19
lines changed

README.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@ dftio is to assist machine learning communities to transcript DFT output into a
1919
5. kpoint eigenvalue
2020

2121
### 对接软件
22-
1. ABACUS
23-
2. SEASTA
24-
3. Wannier
25-
4. Gaussian
26-
5. PYSCF
27-
6. VASP
22+
1. RESCU
23+
2. ABACUS
24+
3. SEASTA
25+
4. Wannier
26+
5. Gaussian
27+
6. PYSCF
28+
7. VASP
2829

2930
### 基础的数据类接口
3031
应该提供一个比较基本的数据类,可以加载我们parse出的数据类型,让想拿这个搞ML算法的人可以直接用这个数据集加载已经有的数据,或者在这个基础上魔改一下

dftio/constants.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import numpy as np
22

33
orbitalId = {0:'s',1:'p',2:'d',3:'f',4:'g',5:'h'}
4+
anglrMId = {'s':0,'p':1,'d':2,'f':3,'g':4,'h':5}
45
Bohr2Ang = 0.529177249
56

6-
ABACUS2DeePTB = {
7+
ABACUS2DFTIO = {
78
0: np.eye(1, dtype=np.float32),
89
1: np.eye(3, dtype=np.float32)[[2, 0, 1]],
910
2: np.eye(5, dtype=np.float32)[[4, 2, 0, 1, 3]],
@@ -12,8 +13,23 @@
1213
5: np.eye(11, dtype=np.float32)[[10, 8, 6, 4, 2, 0, 1, 3, 5, 7, 9]]
1314
}
1415

15-
ABACUS2DeePTB[1][[0, 2]] *= -1
16-
ABACUS2DeePTB[2][[1, 3]] *= -1
17-
ABACUS2DeePTB[3][[0, 6, 2, 4]] *= -1
18-
ABACUS2DeePTB[4][[1, 7, 3, 5]] *= -1
19-
ABACUS2DeePTB[5][[0, 10, 8, 2, 6, 4]] *= -1
16+
ABACUS2DFTIO[1][[0, 2]] *= -1
17+
ABACUS2DFTIO[2][[1, 3]] *= -1
18+
ABACUS2DFTIO[3][[0, 6, 2, 4]] *= -1
19+
ABACUS2DFTIO[4][[1, 7, 3, 5]] *= -1
20+
ABACUS2DFTIO[5][[0, 10, 8, 2, 6, 4]] *= -1
21+
22+
RESCU2DFTIO = {
23+
0: np.eye(1, dtype=np.float32),
24+
1: np.eye(3, dtype=np.float32),
25+
2: np.eye(5, dtype=np.float32),
26+
3: np.eye(7, dtype=np.float32),
27+
4: np.eye(9, dtype=np.float32),
28+
5: np.eye(11, dtype=np.float32)
29+
}
30+
31+
RESCU2DFTIO[1][[0, 2]] *= -1
32+
RESCU2DFTIO[2][[1, 3]] *= -1
33+
RESCU2DFTIO[3][[0, 6, 2, 4]] *= -1
34+
RESCU2DFTIO[4][[1, 7, 3, 5]] *= -1
35+
RESCU2DFTIO[5][[0, 10, 8, 2, 6, 4]] *= -1

dftio/io/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from .abacus.abacus_parser import AbacusParser
2+
from .rescu.rescu_parser import RescuParser
23

34

45
__all__ = [
5-
"AbacusParser"
6+
"AbacusParser",
7+
"RescuParser"
68
]

dftio/io/abacus/abacus_parser.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44
from tqdm import tqdm
55
from collections import Counter
6-
from ...constants import orbitalId, ABACUS2DeePTB
6+
from ...constants import orbitalId, ABACUS2DFTIO
77
import ase
88
import dpdata
99
import os
@@ -23,6 +23,7 @@ def __init__(
2323
super(AbacusParser, self).__init__(root, prefix)
2424
self.raw_sys = [dpdata.LabeledSystem(self.raw_datas[idx], fmt='abacus/'+self.get_mode(idx)) for idx in range(len(self.raw_datas))]
2525

26+
# essential
2627
def get_structure(self, idx):
2728
sys = self.raw_sys[idx]
2829

@@ -44,6 +45,7 @@ def get_mode(self, idx):
4445

4546
return mode
4647

48+
# essential
4749
def get_eigenvalue(self, idx):
4850
path = self.raw_datas[idx]
4951
mode = self.get_mode(idx)
@@ -71,6 +73,7 @@ def get_eigenvalue(self, idx):
7173

7274
return {_keys.ENERGY_EIGENVALUE_KEY: eigs, _keys.KPOINT_KEY: kpts}
7375

76+
# essential
7477
def get_basis(self, idx):
7578
mode = self.get_mode(idx)
7679
logfile = "running_"+mode+".log"
@@ -109,6 +112,7 @@ def get_basis(self, idx):
109112

110113
return basis
111114

115+
# essential
112116
def get_blocks(self, idx, hamiltonian=True, overlap=False, density_matrix=False):
113117
mode = self.get_mode(idx)
114118
logfile = "running_"+mode+".log"
@@ -311,7 +315,7 @@ def parse_matrix(self, matrix_path, nsites, site_norbits, orbital_types_dict, el
311315
np.array(line4).astype(np.int32)), shape=(norbits, norbits), dtype=np.complex64).toarray()
312316
for index_site_i in range(nsites):
313317
for index_site_j in range(nsites):
314-
key_str = f"{index_site_i + 1}_{index_site_j + 1}_{R_cur[0]}_{R_cur[1]}_{R_cur[2]}"
318+
key_str = f"{index_site_i}_{index_site_j}_{R_cur[0]}_{R_cur[1]}_{R_cur[2]}"
315319
mat = hamiltonian_cur[(site_norbits_cumsum[index_site_i]
316320
- site_norbits[index_site_i]) * (1 + spinful):
317321
site_norbits_cumsum[index_site_i] * (1 + spinful),
@@ -335,8 +339,8 @@ def transform(self, mat, l_lefts, l_rights):
335339

336340
if max(*l_lefts, *l_rights) > 5:
337341
raise NotImplementedError("Only support l = s, p, d, f, g, h.")
338-
block_lefts = block_diag(*[ABACUS2DeePTB[l_left] for l_left in l_lefts])
339-
block_rights = block_diag(*[ABACUS2DeePTB[l_right] for l_right in l_rights])
342+
block_lefts = block_diag(*[ABACUS2DFTIO[l_left] for l_left in l_lefts])
343+
block_rights = block_diag(*[ABACUS2DFTIO[l_right] for l_right in l_rights])
340344

341345
return block_lefts @ mat @ block_rights.T
342346

dftio/io/parse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,9 @@ def write_dat(self, idx, outroot, eigenvalue=False, hamiltonian=False, overlap=F
216216

217217
return True
218218

219-
def write_lmdb(self, idx, outroot, prefix, eigenvalue: bool=False, hamiltonian: bool=False, overlap: bool=False, density_matrix: bool=False):
219+
def write_lmdb(self, idx, outroot, eigenvalue: bool=False, hamiltonian: bool=False, overlap: bool=False, density_matrix: bool=False):
220220
os.makedirs(outroot, exist_ok=True)
221-
out_dir = os.path.join(outroot, prefix+".lmdb")
221+
out_dir = os.path.join(outroot, "data.lmdb")
222222
lmdb_env = lmdb.open(os.path.join(out_dir), map_size=1048576000000)
223223
structure = self.get_structure(idx)
224224

dftio/io/rescu/rescu_parser.py

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
from scipy.sparse import csr_matrix
2+
from scipy.linalg import block_diag
3+
import re
4+
from tqdm import tqdm
5+
from collections import Counter
6+
from ...constants import orbitalId, RESCU2DFTIO, anglrMId
7+
import ase
8+
import dpdata
9+
import glob
10+
import h5py
11+
import os
12+
import numpy as np
13+
from ..parse import Parser, ParserRegister, find_target_line
14+
from ...data import _keys
15+
from ...register import Register
16+
17+
Hartree2eV = 27.21138602
18+
Bohr2Angstrom = 0.52917721067
19+
20+
@ParserRegister.register("rescu")
21+
class RescuParser(Parser):
22+
def __init__(self, root, prefix, **kwargs):
23+
super(RescuParser, self).__init__(root, prefix)
24+
# the root + prefix should locate the directory of the output file of each calculation
25+
# the list of path will be saved in self.raw_datas
26+
27+
def calculation_type(self, output):
28+
with h5py.File(output, "r") as f:
29+
calT = "".join([chr(i) for i in f["info"]["calculationType"][:].reshape(-1)])
30+
31+
return calT
32+
33+
34+
def get_structure(self, idx):
35+
global_lists = glob.glob(self[idx] + "/*.mat")
36+
for fs in global_lists:
37+
calT = self.calculation_type(fs)
38+
if calT == "self-consistent":
39+
path = fs
40+
break
41+
42+
with h5py.File(path, "r") as f:
43+
pos = f["atom"]["xyz"][:].T.astype(np.float32)
44+
element_map = f["atom"]["element"][:].reshape(-1).astype(np.int32) - 1
45+
46+
if isinstance(f["element"]["species"][:][0][0], np.uint16):
47+
species = ["".join([chr(i) for i in f["element"]["species"][:].reshape(-1)])]
48+
elif isinstance(f["element"]["species"][:][0][0], h5py.h5r.Reference):
49+
species = ["".join([chr(j) for j in f[i][:].reshape(-1)]) for i in f["element"]["species"][:].reshape(-1)]
50+
ans = np.array([ase.atom.atomic_numbers[i] for i in species], dtype=np.int32)
51+
atomic_number = ans[element_map]
52+
53+
pbc = []
54+
for i in f["domain"]["boundary"][:].flatten():
55+
if i == 1:
56+
pbc.append(True)
57+
elif i == 3:
58+
pbc.append(False)
59+
else:
60+
raise ValueError("Unknown boundary condition")
61+
pbc = np.array(pbc)
62+
63+
cell = f["domain"]["latvec"][:].T.astype(np.float32)
64+
65+
structure = {
66+
_keys.ATOMIC_NUMBERS_KEY: atomic_number,
67+
_keys.PBC_KEY: pbc,
68+
_keys.POSITIONS_KEY: pos.reshape(1, -1, 3) * Bohr2Angstrom,
69+
_keys.CELL_KEY: cell.reshape(1,3,3) * Bohr2Angstrom
70+
}
71+
72+
return structure
73+
74+
def get_eigenvalue(self, idx):
75+
global_lists = glob.glob(self[idx] + "/*.mat")
76+
for fs in global_lists:
77+
calT = self.calculation_type(fs)
78+
if calT == "band-structure":
79+
path = fs
80+
break
81+
with h5py.File(path, "r") as f:
82+
kpt = f["band"]["kdirect"][:].T[np.newaxis, :, :]
83+
eigs = f["band"]["ksnrg"][:].T
84+
85+
return {_keys.KPOINT_KEY: kpt, _keys.ENERGY_EIGENVALUE_KEY: eigs}
86+
87+
def get_basis(self, idx):
88+
global_lists = glob.glob(self[idx] + "/*.mat")
89+
for fs in global_lists:
90+
calT = self.calculation_type(fs)
91+
if calT == "self-consistent":
92+
path = fs
93+
break
94+
with h5py.File(path, "r") as f:
95+
Aorb = f["LCAO"]["orbInfo"]["Aorb"][:].flatten()
96+
Lorb = f["LCAO"]["orbInfo"]["Lorb"][:].flatten()
97+
98+
stru = self.get_structure(idx=idx)
99+
atomic_number = stru[_keys.ATOMIC_NUMBERS_KEY]
100+
an = list(set(atomic_number))
101+
an_ind = [atomic_number.tolist().index(i)+1 for i in an]# since Aorb info start from 1
102+
basis = {}
103+
for i, at in enumerate(an_ind):
104+
ix = np.where(Aorb == at)[0]
105+
basis[ase.atom.chemical_symbols[an[i]]] = Lorb[ix]
106+
107+
for k in basis:
108+
bb = basis[k]
109+
bc = Counter(bb)
110+
bs = ""
111+
for l in sorted(bc.keys()):
112+
bs += str(int(bc[l] / (2*l+1))) + orbitalId[l]
113+
basis[k] = bs
114+
115+
return basis
116+
117+
def get_blocks(self, idx, hamiltonian: bool = False, overlap: bool = False, density_matrix: bool = False):
118+
# rescu use Spherical Harmonic basis with the phase factor, and the order of basis is defined in the form from small to large,
119+
# i.e. for p orbital, the m is ranked as [-1, 0, 1]
120+
global_lists = glob.glob(self[idx] + "/*.h5")
121+
for fs in global_lists:
122+
with h5py.File(fs, "r") as f:
123+
if "LCAO" in f and "hamiltonian1" in f["LCAO"]:
124+
Rvec = f["LCAO"]["Rvec"][:].T
125+
path = fs
126+
break
127+
128+
if any([hamiltonian, overlap, density_matrix]):
129+
basis = self.get_basis(idx=idx)
130+
else:
131+
return [{}], [{}], [{}]
132+
133+
l_dict = {}
134+
# count norbs
135+
count = {}
136+
for at in basis:
137+
count[at] = 0
138+
l_dict[at] = []
139+
for iorb in range(int(len(basis[at]) / 2)):
140+
n, o = int(basis[at][2*iorb]), basis[at][2*iorb+1]
141+
count[at] += n * (2*anglrMId[o]+1)
142+
l_dict[at] += [anglrMId[o]] * n
143+
144+
an = self.get_structure(idx=idx)[_keys.ATOMIC_NUMBERS_KEY]
145+
146+
hamiltonian_dict = {}
147+
overlap_dict = {}
148+
density_matrix = {}
149+
150+
151+
with h5py.File(path, "r") as f:
152+
if hamiltonian:
153+
hamil = []
154+
hamil_mask = []
155+
for i in range(Rvec.shape[0]):
156+
if np.abs(f["LCAO"]["hamiltonian" + str(i+1)][:]).max() > 1e-8:
157+
hamil.append(f["LCAO"]["hamiltonian" + str(i+1)][:].T)
158+
hamil_mask.append(True)
159+
else:
160+
hamil_mask.append(False)
161+
hamil_mask = np.array(hamil_mask)
162+
hamil = np.stack(hamil).astype(np.float32) * Hartree2eV
163+
hamil_Rvec = Rvec[hamil_mask]
164+
xcount = 0
165+
for i, ai in enumerate(an):
166+
si = ase.atom.chemical_symbols[ai]
167+
ycount = 0
168+
for j, aj in enumerate(an):
169+
sj = ase.atom.chemical_symbols[aj]
170+
keys = map(lambda x: "_".join([str(i),str(j),str(x[0].astype(np.int32)),str(x[1].astype(np.int32)),str(x[2].astype(np.int32))]), hamil_Rvec)
171+
blocks = self.transform(hamil[:, xcount:xcount+count[si], ycount:ycount+count[sj]], l_dict[si], l_dict[sj])
172+
173+
hamiltonian_dict.update(dict(zip(keys, blocks)))
174+
175+
ycount += count[sj]
176+
xcount += count[si]
177+
178+
179+
if overlap:
180+
ovp = []
181+
ovp_mask = []
182+
for i in range(Rvec.shape[0]):
183+
if np.abs(f["LCAO"]["overlap" + str(i+1)][:]).max() > 1e-8:
184+
ovp.append(f["LCAO"]["overlap" + str(i+1)][:].T)
185+
ovp_mask.append(True)
186+
else:
187+
ovp_mask.append(False)
188+
ovp_mask = np.array(ovp_mask)
189+
ovp = np.stack(ovp).astype(np.float32)
190+
ovp_Rvec = Rvec[ovp_mask]
191+
xcount = 0
192+
for i, ai in enumerate(an):
193+
si = ase.atom.chemical_symbols[ai]
194+
ycount = 0
195+
for j, aj in enumerate(an):
196+
sj = ase.atom.chemical_symbols[aj]
197+
keys = map(lambda x: "_".join([str(i),str(j),str(x[0].astype(np.int32)),str(x[1].astype(np.int32)),str(x[2].astype(np.int32))]), ovp_Rvec)
198+
blocks = self.transform(ovp[:, xcount:xcount+count[si], ycount:ycount+count[sj]], l_dict[si], l_dict[sj])
199+
overlap_dict.update(dict(zip(keys, blocks)))
200+
201+
ycount += count[sj]
202+
xcount += count[si]
203+
204+
if density_matrix:
205+
raise NotImplementedError("Density matrix is not implemented yet.")
206+
207+
return [hamiltonian_dict], [overlap_dict], [density_matrix]
208+
209+
def transform(self, mat, l_lefts, l_rights):
210+
211+
if max(*l_lefts, *l_rights) > 5:
212+
raise NotImplementedError("Only support l = s, p, d, f, g, h.")
213+
block_lefts = block_diag(*[RESCU2DFTIO[l_left] for l_left in l_lefts])
214+
block_rights = block_diag(*[RESCU2DFTIO[l_right] for l_right in l_rights])
215+
216+
return block_lefts @ mat @ block_rights.T

0 commit comments

Comments
 (0)