Coverage for calorine/nep/model.py: 100%

261 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2024-10-17 08:55 +0000

1from dataclasses import dataclass 

2from itertools import product 

3from typing import Dict, List, Tuple 

4 

5import numpy as np 

6 

7 

8def _get_model_type(first_row: List[str]) -> str: 

9 """Parses a the first row of a ``nep.txt`` file, 

10 and returns the type of NEP model. Available types 

11 are ``potential``, ``dipole`` and ``polarizability``. 

12 

13 Parameters 

14 ---------- 

15 first_row 

16 first row of a NEP file, split by white space. 

17 """ 

18 model_type = first_row[0] 

19 if 'dipole' in model_type: 

20 return 'dipole' 

21 elif 'polarizability' in model_type: 

22 return 'polarizability' 

23 return 'potential' 

24 

25 

26def _get_nep_contents(filename: str) -> Tuple[Dict, List]: 

27 """Parses a ``nep.txt`` file, and returns a dict describing the header 

28 and an unformatted list of all model parameters. 

29 Intended to be used by the :func:`read_model <calorine.nep.read_model>` function. 

30 

31 Parameters 

32 ---------- 

33 filename 

34 input file name 

35 """ 

36 # parse file and split header and parameters 

37 header = [] 

38 parameters = [] 

39 nheader = 5 # 5 rows for NEP2, 6-7 rows for NEP3 onwards 

40 base_line = 3 

41 with open(filename) as f: 

42 for k, line in enumerate(f.readlines()): 

43 flds = line.split() 

44 assert len(flds) != 0, f'Empty line number {k}' 

45 if k == 0 and 'zbl' in flds[0]: 

46 base_line += 1 

47 nheader += 1 

48 if k == base_line and 'basis_size' in flds[0]: 

49 # Introduced in nep.txt after GPUMD v3.2 

50 nheader += 1 

51 if k < nheader: 

52 header.append(tuple(flds)) 

53 elif len(flds) == 1: 

54 parameters.append(float(flds[0])) 

55 else: 

56 raise IOError(f'Failed to parse line {k} from {filename}') 

57 # compile data from the header into a dict 

58 data = {} 

59 for flds in header: 

60 if flds[0] in ['cutoff', 'zbl']: 

61 data[flds[0]] = tuple(map(float, flds[1:])) 

62 elif flds[0] in ['n_max', 'l_max', 'ANN', 'basis_size']: 

63 data[flds[0]] = tuple(map(int, flds[1:])) 

64 elif flds[0].startswith('nep'): 

65 version = flds[0].replace('nep', '').split('_')[0] 

66 if version == '': 

67 # Case for NEP2 

68 version = 2 

69 else: 

70 version = int(version) 

71 data['version'] = version 

72 data['types'] = flds[2:] 

73 data['model_type'] = _get_model_type(flds) 

74 else: 

75 raise ValueError(f'Unknown field: {flds[0]}') 

76 return data, parameters 

77 

78 

79@dataclass 

80class Model: 

81 r"""Objects of this class represent a NEP model in a form suitable for 

82 inspection and manipulation. Typically a :class:`Model` object is instantiated 

83 by calling the :func:`read_model <calorine.nep.read_model>` function. 

84 

85 Attributes 

86 ---------- 

87 version : int 

88 NEP version 

89 model_type: str 

90 One of ``potential``, ``dipole`` or ``polarizability``. 

91 types : Tuple[str, ...] 

92 chemical species that this model represents 

93 radial_cutoff : float 

94 the radial cutoff parameter in Å 

95 angular_cutoff : float 

96 the angular cutoff parameter in Å 

97 max_neighbors_radial : int 

98 maximum number of neighbors in neighbor list for radial terms 

99 max_neighbors_angular : int 

100 maximum number of neighbors in neighbor list for angular terms 

101 zbl : Tuple[float, float] 

102 inner and outer cutoff for transition to ZBL potential 

103 n_basis_radial : int 

104 number of radial basis functions :math:`n_\mathrm{basis}^\mathrm{R}` 

105 n_basis_angular : int 

106 number of angular basis functions :math:`n_\mathrm{basis}^\mathrm{A}` 

107 n_max_radial : int 

108 maximum order of Chebyshev polymonials included in 

109 radial expansion :math:`n_\mathrm{max}^\mathrm{R}` 

110 n_max_angular : int 

111 maximum order of Chebyshev polymonials included in 

112 angular expansion :math:`n_\mathrm{max}^\mathrm{A}` 

113 l_max_3b : int 

114 maximum expansion order for three-body terms :math:`l_\mathrm{max}^\mathrm{3b}` 

115 l_max_4b : int 

116 maximum expansion order for four-body terms :math:`l_\mathrm{max}^\mathrm{4b}` 

117 l_max_5b : int 

118 maximum expansion order for five-body terms :math:`l_\mathrm{max}^\mathrm{5b}` 

119 n_descriptor_radial : int 

120 dimension of radial part of descriptor 

121 n_descriptor_angular : int 

122 dimension of angular part of descriptor 

123 n_neuron : int 

124 number of neurons in hidden layer 

125 n_parameters : int 

126 total number of parameters including scalers (which are not fit parameters) 

127 n_descriptor_parameters : int 

128 number of parameters in descriptor 

129 n_ann_parameters : int 

130 number of neural network weights 

131 ann_parameters : Dict[Tuple[str, Dict[str, np.darray]]] 

132 neural network weights 

133 q_scaler : List[float] 

134 scaling parameters 

135 radial_descriptor_weights : Dict[Tuple[str, str], np.ndarray] 

136 radial descriptor weights by combination of species; the array for each combination 

137 has dimensions of 

138 :math:`(n_\mathrm{max}^\mathrm{R}+1) \times (n_\mathrm{basis}^\mathrm{R}+1)` 

139 angular_descriptor_weights : Dict[Tuple[str, str], np.ndarray] 

140 angular descriptor weights by combination of species; the array for each combination 

141 has dimensions of 

142 :math:`(n_\mathrm{max}^\mathrm{A}+1) \times (n_\mathrm{basis}^\mathrm{A}+1)` 

143 """ 

144 

145 version: int 

146 model_type: str 

147 types: Tuple[str, ...] 

148 

149 radial_cutoff: float 

150 angular_cutoff: float 

151 

152 n_basis_radial: int 

153 n_basis_angular: int 

154 n_max_radial: int 

155 n_max_angular: int 

156 l_max_3b: int 

157 l_max_4b: int 

158 l_max_5b: int 

159 n_descriptor_radial: int 

160 n_descriptor_angular: int 

161 

162 n_neuron: int 

163 n_parameters: int 

164 n_descriptor_parameters: int 

165 n_ann_parameters: int 

166 ann_parameters: Dict[str, Dict[str, np.ndarray]] 

167 q_scaler: List[float] 

168 radial_descriptor_weights: Dict[Tuple[str, str], np.ndarray] 

169 angular_descriptor_weights: Dict[Tuple[str, str], np.ndarray] 

170 

171 zbl: Tuple[float, float] = None 

172 max_neighbors_radial: int = None 

173 max_neighbors_angular: int = None 

174 

175 _special_fields = [ 

176 'ann_parameters', 

177 'q_scaler', 

178 'radial_descriptor_weights', 

179 'angular_descriptor_weights', 

180 ] 

181 

182 def __str__(self) -> str: 

183 s = [] 

184 for fld in self.__dataclass_fields__: 

185 if fld not in self._special_fields: 

186 s += [f'{fld:22} : {getattr(self, fld)}'] 

187 return '\n'.join(s) 

188 

189 def _repr_html_(self) -> str: 

190 s = [] 

191 s += ['<table border="1" class="dataframe"'] 

192 s += [ 

193 '<thead><tr><th style="text-align: left;">Field</th><th>Value</th></tr></thead>' 

194 ] 

195 s += ['<tbody>'] 

196 for fld in self.__dataclass_fields__: 

197 if fld not in self._special_fields: 

198 s += [ 

199 f'<tr><td style="text-align: left;">{fld:22}</td>' 

200 f'<td>{getattr(self, fld)}</td><tr>' 

201 ] 

202 for fld in self._special_fields: 

203 d = getattr(self, fld) 

204 if fld.endswith('descriptor_weights'): 

205 dim = list(d.values())[0].shape 

206 if fld == 'ann_parameters' and self.version == 4: 

207 dim = (len(self.types), len(list(d.values())[0])) 

208 else: 

209 dim = len(d) 

210 s += [ 

211 f'<tr><td style="text-align: left;">Dimension of {fld:22}</td><td>{dim}</td><tr>' 

212 ] 

213 s += ['</tbody>'] 

214 s += ['</table>'] 

215 return ''.join(s) 

216 

217 def write(self, filename: str) -> None: 

218 """Write NEP model to file in ``nep.txt`` format.""" 

219 with open(filename, 'w') as f: 

220 # header 

221 version_name = f'nep{self.version}' 

222 if self.zbl is not None: 

223 version_name += '_zbl' 

224 elif self.model_type != 'potential': 

225 version_name += f'_{self.model_type}' 

226 f.write(f'{version_name} {len(self.types)} {" ".join(self.types)}\n') 

227 if self.zbl is not None: 

228 f.write(f'zbl {" ".join(map(str, self.zbl))}\n') 

229 f.write(f'cutoff {self.radial_cutoff} {self.angular_cutoff}') 

230 if ( 

231 self.max_neighbors_radial is not None 

232 and self.max_neighbors_angular is not None 

233 ): 

234 f.write(f' {self.max_neighbors_radial} {self.max_neighbors_angular}') 

235 f.write('\n') 

236 f.write(f'n_max {self.n_max_radial} {self.n_max_angular}\n') 

237 f.write(f'basis_size {self.n_basis_radial} {self.n_basis_angular}\n') 

238 f.write(f'l_max {self.l_max_3b} {self.l_max_4b} {self.l_max_5b}\n') 

239 f.write(f'ANN {self.n_neuron} 0\n') 

240 

241 # neural network weights 

242 keys = self.types if self.version == 4 else ['all_species'] 

243 suffixes = ['', '_polar'] if self.model_type == 'polarizability' else [''] 

244 for suffix in suffixes: 

245 for s in keys: 

246 # Order: w0, b0, w1 

247 # w0 indexed as: n*N_descriptor + nu 

248 w0 = self.ann_parameters[s][f'w0{suffix}'] 

249 b0 = self.ann_parameters[s][f'b0{suffix}'] 

250 w1 = self.ann_parameters[s][f'w1{suffix}'] 

251 for n in range(self.n_neuron): 

252 for nu in range( 

253 self.n_descriptor_radial + self.n_descriptor_angular 

254 ): 

255 f.write(f'{w0[n, nu]:15.7e}\n') 

256 for b in b0[:, 0]: 

257 f.write(f'{b:15.7e}\n') 

258 for v in w1[0, :]: 

259 f.write(f'{v:15.7e}\n') 

260 b1 = self.ann_parameters[f'b1{suffix}'] 

261 f.write(f'{b1:15.7e}\n') 

262 

263 # descriptor weights 

264 mat = [] 

265 for s1 in self.types: 

266 for s2 in self.types: 

267 mat = np.hstack( 

268 [mat, self.radial_descriptor_weights[(s1, s2)].flatten()] 

269 ) 

270 mat = np.hstack( 

271 [mat, self.angular_descriptor_weights[(s1, s2)].flatten()] 

272 ) 

273 n_types = len(self.types) 

274 n = int(len(mat) / (n_types * n_types)) 

275 mat = mat.reshape((n_types * n_types, n)).T 

276 for v in mat.flatten(): 

277 f.write(f'{v:15.7e}\n') 

278 

279 # scaler 

280 for v in self.q_scaler: 

281 f.write(f'{v:15.7e}\n') 

282 

283 

284def read_model(filename: str) -> Model: 

285 """Parses a file in ``nep.txt`` format and returns the 

286 content in the form of a :class:`Model <calorine.nep.model.Model>` 

287 object. 

288 

289 Parameters 

290 ---------- 

291 filename 

292 input file name 

293 """ 

294 data, parameters = _get_nep_contents(filename) 

295 

296 # sanity checks 

297 for fld in ['cutoff', 'basis_size', 'n_max', 'l_max', 'ANN']: 

298 assert fld in data, f'Invalid model file; {fld} line is missing' 

299 assert data['version'] in [ 

300 3, 

301 4, 

302 ], 'Invalid model files; only NEP versions 3 and 4 are currently supported' 

303 

304 # split up cutoff tuple 

305 assert len(data['cutoff']) in [2, 4] 

306 data['radial_cutoff'] = data['cutoff'][0] 

307 data['angular_cutoff'] = data['cutoff'][1] 

308 if len(data['cutoff']) == 4: 

309 data['max_neighbors_radial'] = int(data['cutoff'][2]) 

310 data['max_neighbors_angular'] = int(data['cutoff'][3]) 

311 del data['cutoff'] 

312 

313 # split up basis_size tuple 

314 assert len(data['basis_size']) == 2 

315 data['n_basis_radial'] = data['basis_size'][0] 

316 data['n_basis_angular'] = data['basis_size'][1] 

317 del data['basis_size'] 

318 

319 # split up n_max tuple 

320 assert len(data['n_max']) == 2 

321 data['n_max_radial'] = data['n_max'][0] 

322 data['n_max_angular'] = data['n_max'][1] 

323 del data['n_max'] 

324 

325 # split up nl_max tuple 

326 len_l = len(data['l_max']) 

327 assert len_l in [1, 2, 3] 

328 data['l_max_3b'] = data['l_max'][0] 

329 data['l_max_4b'] = data['l_max'][1] if len_l > 1 else 0 

330 data['l_max_5b'] = data['l_max'][2] if len_l > 2 else 0 

331 del data['l_max'] 

332 

333 # compute dimensions of descriptor components 

334 data['n_descriptor_radial'] = data['n_max_radial'] + 1 

335 l_max_enh = data['l_max_3b'] + (data['l_max_4b'] > 0) + (data['l_max_5b'] > 0) 

336 data['n_descriptor_angular'] = (data['n_max_angular'] + 1) * l_max_enh 

337 n_descriptor = data['n_descriptor_radial'] + data['n_descriptor_angular'] 

338 

339 # compute number of parameters 

340 data['n_neuron'] = data['ANN'][0] 

341 del data['ANN'] 

342 n_types = len(data['types']) 

343 if data['version'] == 3: 

344 n = 1 

345 else: # version 4 

346 # one hidden layer per atomic species 

347 n = n_types 

348 n_ann_input_weights = (n_descriptor + 1) * data['n_neuron'] # weights + bias 

349 n_ann_output_weights = data['n_neuron'] # only weights 

350 n_ann_parameters = ( 

351 n_ann_input_weights + n_ann_output_weights 

352 ) * n + 1 # + single output bias 

353 

354 n_descriptor_weights = n_types**2 * ( 

355 (data['n_max_radial'] + 1) * (data['n_basis_radial'] + 1) 

356 + (data['n_max_angular'] + 1) * (data['n_basis_angular'] + 1) 

357 ) 

358 data['n_parameters'] = n_ann_parameters + n_descriptor_weights + n_descriptor 

359 is_polarizability_model = data['model_type'] == 'polarizability' 

360 if data['n_parameters'] + n_ann_parameters == len(parameters): 

361 data['n_parameters'] += n_ann_parameters 

362 assert is_polarizability_model, ( 

363 'Model is not labelled as a polarizability model, but the number of ' 

364 'parameters matches a polarizability model.\n' 

365 'If this is a polarizability model trained with GPUMD <=v3.8, please ' 

366 'modify the header in the nep.txt file to read ' 

367 f'`nep{data["version"]}_polarizability`.\n' 

368 ) 

369 assert data['n_parameters'] == len(parameters), ( 

370 'Parsing of parameters inconsistent; please submit a bug report\n' 

371 f'{data["n_parameters"]} != {len(parameters)}' 

372 ) 

373 data['n_ann_parameters'] = n_ann_parameters 

374 

375 # split up parameters into the ANN weights, descriptor weights, and scaling parameters 

376 n1 = n_ann_parameters 

377 n1 *= 2 if is_polarizability_model else 1 

378 n2 = n1 + n_descriptor_weights 

379 data['ann_parameters'] = parameters[:n1] 

380 descriptor_weights = np.array(parameters[n1:n2]) 

381 data['q_scaler'] = parameters[n2:] 

382 

383 # Group ANN parameters 

384 pars = {} 

385 n1 = 0 

386 n_network_params = n_ann_input_weights + n_ann_output_weights # except last bias 

387 n_neuron = data['n_neuron'] 

388 keys = data['types'] if data['version'] == 4 else ['all_species'] 

389 

390 n_count = 2 if is_polarizability_model else 1 

391 for count in range(n_count): 

392 # if polarizability model, all parameters including bias are repeated 

393 # need to offset n1 by +1 to handle bias 

394 n1 += count 

395 for s in keys: 

396 # Get the parameters for the ANN; in the case of NEP4, there is effectively 

397 # one network per atomic species. 

398 ann_parameters = data['ann_parameters'][n1 : n1 + n_network_params] 

399 ann_input_weights = ann_parameters[:n_ann_input_weights] 

400 w0 = np.zeros((n_neuron, n_descriptor)) 

401 w0[...] = np.nan 

402 b0 = np.zeros((n_neuron, 1)) 

403 b0[...] = np.nan 

404 for n in range(n_neuron): 

405 for nu in range(n_descriptor): 

406 w0[n, nu] = ann_input_weights[n * n_descriptor + nu] 

407 b0[:, 0] = ann_input_weights[n_neuron * n_descriptor :] 

408 

409 assert np.all( 

410 w0.shape == (n_neuron, n_descriptor) 

411 ), f'w0 has invalid shape for key {s}; please submit a bug report' 

412 assert np.all( 

413 b0.shape == (n_neuron, 1) 

414 ), f'b0 has invalid shape for key {s}; please submit a bug report' 

415 assert not np.any( 

416 np.isnan(w0) 

417 ), f'some weights in w0 are nan for key {s}; please submit a bug report' 

418 assert not np.any( 

419 np.isnan(b0) 

420 ), f'some weights in b0 are nan for key {s}; please submit a bug report' 

421 

422 ann_output_weights = ann_parameters[ 

423 n_ann_input_weights : n_ann_input_weights + n_ann_output_weights 

424 ] 

425 

426 w1 = np.zeros((1, n_neuron)) 

427 w1[0, :] = ann_output_weights[:] 

428 assert np.all( 

429 w1.shape == (1, n_neuron) 

430 ), f'w1 has invalid shape for key {s}; please submit a bug report' 

431 assert not np.any( 

432 np.isnan(w1) 

433 ), f'some weights in w1 are nan for key {s}; please submit a bug report' 

434 

435 if count == 0: 

436 pars[s] = dict(w0=w0, b0=b0, w1=w1) 

437 else: 

438 pars[s].update({'w0_polar': w0, 'b0_polar': b0, 'w1_polar': w1}) 

439 n1 += n_network_params 

440 if count == 0: 

441 pars['b1'] = data['ann_parameters'][n1] 

442 else: 

443 pars['b1_polar'] = data['ann_parameters'][n1] 

444 

445 sum = 0 

446 for s in pars.keys(): 

447 if s.startswith('b1'): 

448 sum += 1 

449 else: 

450 sum += np.sum([np.count_nonzero(p) for p in pars[s].values()]) 

451 assert sum == n_ann_parameters * n_count, ( 

452 'Inconsistent number of parameters accounted for; please submit a bug report\n' 

453 f'{sum} != {n_ann_parameters}' 

454 ) 

455 data['ann_parameters'] = pars 

456 

457 # split up descriptor by chemical species and radial/angular 

458 data['n_descriptor_parameters'] = len(descriptor_weights) 

459 n = int(len(descriptor_weights) / (n_types * n_types)) 

460 n_max_radial = data['n_max_radial'] 

461 n_max_angular = data['n_max_angular'] 

462 n_basis_radial = data['n_basis_radial'] 

463 n_basis_angular = data['n_basis_angular'] 

464 m = (n_max_radial + 1) * (n_basis_radial + 1) 

465 descriptor_weights = descriptor_weights.reshape((n, n_types * n_types)).T 

466 descriptor_weights_radial = descriptor_weights[:, :m] 

467 descriptor_weights_angular = descriptor_weights[:, m:] 

468 

469 # add descriptors to data dict 

470 data['radial_descriptor_weights'] = {} 

471 data['angular_descriptor_weights'] = {} 

472 m = -1 

473 for i, j in product(range(n_types), repeat=2): 

474 m += 1 

475 s1, s2 = data['types'][i], data['types'][j] 

476 subdata = descriptor_weights_radial[m, :].reshape( 

477 (n_max_radial + 1, n_basis_radial + 1) 

478 ) 

479 data['radial_descriptor_weights'][(s1, s2)] = subdata 

480 subdata = descriptor_weights_angular[m, :].reshape( 

481 (n_max_angular + 1, n_basis_angular + 1) 

482 ) 

483 data['angular_descriptor_weights'][(s1, s2)] = subdata 

484 

485 return Model(**data)