Coverage for calorine/nep/model.py: 100%

1from dataclasses import dataclass

2from itertools import product

3from typing import Dict, List, Tuple

5import numpy as np

8def _get_model_type(first_row: List[str]) -> str:

9 """Parses a the first row of a ``nep.txt`` file,

10 and returns the type of NEP model. Available types

11 are ``potential``, ``dipole`` and ``polarizability``.

13 Parameters

14 ----------

15 first_row

16 first row of a NEP file, split by white space.

17 """

18 model_type = first_row[0]

19 if 'dipole' in model_type:

20 return 'dipole'

21 elif 'polarizability' in model_type:

22 return 'polarizability'

23 return 'potential'

26def _get_nep_contents(filename: str) -> Tuple[Dict, List]:

27 """Parses a ``nep.txt`` file, and returns a dict describing the header

28 and an unformatted list of all model parameters.

29 Intended to be used by the :func:`read_model <calorine.nep.read_model>` function.

31 Parameters

32 ----------

33 filename

34 input file name

35 """

36 # parse file and split header and parameters

37 header = []

38 parameters = []

39 nheader = 5 # 5 rows for NEP2, 6-7 rows for NEP3 onwards

40 base_line = 3

41 with open(filename) as f:

42 for k, line in enumerate(f.readlines()):

43 flds = line.split()

44 assert len(flds) != 0, f'Empty line number {k}'

45 if k == 0 and 'zbl' in flds[0]:

46 base_line += 1

47 nheader += 1

48 if k == base_line and 'basis_size' in flds[0]:

49 # Introduced in nep.txt after GPUMD v3.2

50 nheader += 1

51 if k < nheader:

52 header.append(tuple(flds))

53 elif len(flds) == 1:

54 parameters.append(float(flds[0]))

55 else:

56 raise IOError(f'Failed to parse line {k} from {filename}')

57 # compile data from the header into a dict

58 data = {}

59 for flds in header:

60 if flds[0] in ['cutoff', 'zbl']:

61 data[flds[0]] = tuple(map(float, flds[1:]))

62 elif flds[0] in ['n_max', 'l_max', 'ANN', 'basis_size']:

63 data[flds[0]] = tuple(map(int, flds[1:]))

64 elif flds[0].startswith('nep'):

65 version = flds[0].replace('nep', '').split('_')[0]

66 if version == '':

67 # Case for NEP2

68 version = 2

69 else:

70 version = int(version)

71 data['version'] = version

72 data['types'] = flds[2:]

73 data['model_type'] = _get_model_type(flds)

74 else:

75 raise ValueError(f'Unknown field: {flds[0]}')

76 return data, parameters

79@dataclass

80class Model:

81 r"""Objects of this class represent a NEP model in a form suitable for

82 inspection and manipulation. Typically a :class:`Model` object is instantiated

83 by calling the :func:`read_model <calorine.nep.read_model>` function.

85 Attributes

86 ----------

87 version : int

88 NEP version

89 model_type: str

90 One of ``potential``, ``dipole`` or ``polarizability``.

91 types : Tuple[str, ...]

92 chemical species that this model represents

93 radial_cutoff : float

94 the radial cutoff parameter in Å

95 angular_cutoff : float

96 the angular cutoff parameter in Å

97 max_neighbors_radial : int

98 maximum number of neighbors in neighbor list for radial terms

99 max_neighbors_angular : int

100 maximum number of neighbors in neighbor list for angular terms

101 zbl : Tuple[float, float]

102 inner and outer cutoff for transition to ZBL potential

103 n_basis_radial : int

104 number of radial basis functions :math:`n_\mathrm{basis}^\mathrm{R}`

105 n_basis_angular : int

106 number of angular basis functions :math:`n_\mathrm{basis}^\mathrm{A}`

107 n_max_radial : int

108 maximum order of Chebyshev polymonials included in

109 radial expansion :math:`n_\mathrm{max}^\mathrm{R}`

110 n_max_angular : int

111 maximum order of Chebyshev polymonials included in

112 angular expansion :math:`n_\mathrm{max}^\mathrm{A}`

113 l_max_3b : int

114 maximum expansion order for three-body terms :math:`l_\mathrm{max}^\mathrm{3b}`

115 l_max_4b : int

116 maximum expansion order for four-body terms :math:`l_\mathrm{max}^\mathrm{4b}`

117 l_max_5b : int

118 maximum expansion order for five-body terms :math:`l_\mathrm{max}^\mathrm{5b}`

119 n_descriptor_radial : int

120 dimension of radial part of descriptor

121 n_descriptor_angular : int

122 dimension of angular part of descriptor

123 n_neuron : int

124 number of neurons in hidden layer

125 n_parameters : int

126 total number of parameters including scalers (which are not fit parameters)

127 n_descriptor_parameters : int

128 number of parameters in descriptor

129 n_ann_parameters : int

130 number of neural network weights

131 ann_parameters : Dict[Tuple[str, Dict[str, np.darray]]]

132 neural network weights

133 q_scaler : List[float]

134 scaling parameters

135 radial_descriptor_weights : Dict[Tuple[str, str], np.ndarray]

136 radial descriptor weights by combination of species; the array for each combination

137 has dimensions of

138 :math:`(n_\mathrm{max}^\mathrm{R}+1) \times (n_\mathrm{basis}^\mathrm{R}+1)`

139 angular_descriptor_weights : Dict[Tuple[str, str], np.ndarray]

140 angular descriptor weights by combination of species; the array for each combination

141 has dimensions of

142 :math:`(n_\mathrm{max}^\mathrm{A}+1) \times (n_\mathrm{basis}^\mathrm{A}+1)`

143 """

144

145 version: int

146 model_type: str

147 types: Tuple[str, ...]

148

149 radial_cutoff: float

150 angular_cutoff: float

151

152 n_basis_radial: int

153 n_basis_angular: int

154 n_max_radial: int

155 n_max_angular: int

156 l_max_3b: int

157 l_max_4b: int

158 l_max_5b: int

159 n_descriptor_radial: int

160 n_descriptor_angular: int

161

162 n_neuron: int

163 n_parameters: int

164 n_descriptor_parameters: int

165 n_ann_parameters: int

166 ann_parameters: Dict[str, Dict[str, np.ndarray]]

167 q_scaler: List[float]

168 radial_descriptor_weights: Dict[Tuple[str, str], np.ndarray]

169 angular_descriptor_weights: Dict[Tuple[str, str], np.ndarray]

170

171 zbl: Tuple[float, float] = None

172 max_neighbors_radial: int = None

173 max_neighbors_angular: int = None

174

175 _special_fields = [

176 'ann_parameters',

177 'q_scaler',

178 'radial_descriptor_weights',

179 'angular_descriptor_weights',

180 ]

181

182 def __str__(self) -> str:

183 s = []

184 for fld in self.__dataclass_fields__:

185 if fld not in self._special_fields:

186 s += [f'{fld:22} : {getattr(self, fld)}']

187 return '\n'.join(s)

188

189 def _repr_html_(self) -> str:

190 s = []

191 s += ['<table border="1" class="dataframe"']

192 s += [

193 '<thead><tr><th style="text-align: left;">Field</th><th>Value</th></tr></thead>'

194 ]

195 s += ['<tbody>']

196 for fld in self.__dataclass_fields__:

197 if fld not in self._special_fields:

198 s += [

199 f'<tr><td style="text-align: left;">{fld:22}</td>'

200 f'<td>{getattr(self, fld)}</td><tr>'

201 ]

202 for fld in self._special_fields:

203 d = getattr(self, fld)

204 if fld.endswith('descriptor_weights'):

205 dim = list(d.values())[0].shape

206 if fld == 'ann_parameters' and self.version == 4:

207 dim = (len(self.types), len(list(d.values())[0]))

208 else:

209 dim = len(d)

210 s += [

211 f'<tr><td style="text-align: left;">Dimension of {fld:22}</td><td>{dim}</td><tr>'

212 ]

213 s += ['</tbody>']

214 s += ['</table>']

215 return ''.join(s)

216

217 def write(self, filename: str) -> None:

218 """Write NEP model to file in ``nep.txt`` format."""

219 with open(filename, 'w') as f:

220 # header

221 version_name = f'nep{self.version}'

222 if self.zbl is not None:

223 version_name += '_zbl'

224 elif self.model_type != 'potential':

225 version_name += f'_{self.model_type}'

226 f.write(f'{version_name} {len(self.types)} {" ".join(self.types)}\n')

227 if self.zbl is not None:

228 f.write(f'zbl {" ".join(map(str, self.zbl))}\n')

229 f.write(f'cutoff {self.radial_cutoff} {self.angular_cutoff}')

230 if (

231 self.max_neighbors_radial is not None

232 and self.max_neighbors_angular is not None

233 ):

234 f.write(f' {self.max_neighbors_radial} {self.max_neighbors_angular}')

235 f.write('\n')

236 f.write(f'n_max {self.n_max_radial} {self.n_max_angular}\n')

237 f.write(f'basis_size {self.n_basis_radial} {self.n_basis_angular}\n')

238 f.write(f'l_max {self.l_max_3b} {self.l_max_4b} {self.l_max_5b}\n')

239 f.write(f'ANN {self.n_neuron} 0\n')

240

241 # neural network weights

242 keys = self.types if self.version == 4 else ['all_species']

243 suffixes = ['', '_polar'] if self.model_type == 'polarizability' else ['']

244 for suffix in suffixes:

245 for s in keys:

246 # Order: w0, b0, w1

247 # w0 indexed as: n*N_descriptor + nu

248 w0 = self.ann_parameters[s][f'w0{suffix}']

249 b0 = self.ann_parameters[s][f'b0{suffix}']

250 w1 = self.ann_parameters[s][f'w1{suffix}']

251 for n in range(self.n_neuron):

252 for nu in range(

253 self.n_descriptor_radial + self.n_descriptor_angular

254 ):

255 f.write(f'{w0[n, nu]:15.7e}\n')

256 for b in b0[:, 0]:

257 f.write(f'{b:15.7e}\n')

258 for v in w1[0, :]:

259 f.write(f'{v:15.7e}\n')

260 b1 = self.ann_parameters[f'b1{suffix}']

261 f.write(f'{b1:15.7e}\n')

262

263 # descriptor weights

264 mat = []

265 for s1 in self.types:

266 for s2 in self.types:

267 mat = np.hstack(

268 [mat, self.radial_descriptor_weights[(s1, s2)].flatten()]

269 )

270 mat = np.hstack(

271 [mat, self.angular_descriptor_weights[(s1, s2)].flatten()]

272 )

273 n_types = len(self.types)

274 n = int(len(mat) / (n_types * n_types))

275 mat = mat.reshape((n_types * n_types, n)).T

276 for v in mat.flatten():

277 f.write(f'{v:15.7e}\n')

278

279 # scaler

280 for v in self.q_scaler:

281 f.write(f'{v:15.7e}\n')

282

283

284def read_model(filename: str) -> Model:

285 """Parses a file in ``nep.txt`` format and returns the

286 content in the form of a :class:`Model <calorine.nep.model.Model>`

287 object.

288

289 Parameters

290 ----------

291 filename

292 input file name

293 """

294 data, parameters = _get_nep_contents(filename)

295

296 # sanity checks

297 for fld in ['cutoff', 'basis_size', 'n_max', 'l_max', 'ANN']:

298 assert fld in data, f'Invalid model file; {fld} line is missing'

299 assert data['version'] in [

300 3,

301 4,

302 ], 'Invalid model files; only NEP versions 3 and 4 are currently supported'

303

304 # split up cutoff tuple

305 assert len(data['cutoff']) in [2, 4]

306 data['radial_cutoff'] = data['cutoff'][0]

307 data['angular_cutoff'] = data['cutoff'][1]

308 if len(data['cutoff']) == 4:

309 data['max_neighbors_radial'] = int(data['cutoff'][2])

310 data['max_neighbors_angular'] = int(data['cutoff'][3])

311 del data['cutoff']

312

313 # split up basis_size tuple

314 assert len(data['basis_size']) == 2

315 data['n_basis_radial'] = data['basis_size'][0]

316 data['n_basis_angular'] = data['basis_size'][1]

317 del data['basis_size']

318

319 # split up n_max tuple

320 assert len(data['n_max']) == 2

321 data['n_max_radial'] = data['n_max'][0]

322 data['n_max_angular'] = data['n_max'][1]

323 del data['n_max']

324

325 # split up nl_max tuple

326 len_l = len(data['l_max'])

327 assert len_l in [1, 2, 3]

328 data['l_max_3b'] = data['l_max'][0]

329 data['l_max_4b'] = data['l_max'][1] if len_l > 1 else 0

330 data['l_max_5b'] = data['l_max'][2] if len_l > 2 else 0

331 del data['l_max']

332

333 # compute dimensions of descriptor components

334 data['n_descriptor_radial'] = data['n_max_radial'] + 1

335 l_max_enh = data['l_max_3b'] + (data['l_max_4b'] > 0) + (data['l_max_5b'] > 0)

336 data['n_descriptor_angular'] = (data['n_max_angular'] + 1) * l_max_enh

337 n_descriptor = data['n_descriptor_radial'] + data['n_descriptor_angular']

338

339 # compute number of parameters

340 data['n_neuron'] = data['ANN'][0]

341 del data['ANN']

342 n_types = len(data['types'])

343 if data['version'] == 3:

344 n = 1

345 else: # version 4

346 # one hidden layer per atomic species

347 n = n_types

348 n_ann_input_weights = (n_descriptor + 1) * data['n_neuron'] # weights + bias

349 n_ann_output_weights = data['n_neuron'] # only weights

350 n_ann_parameters = (

351 n_ann_input_weights + n_ann_output_weights

352 ) * n + 1 # + single output bias

353

354 n_descriptor_weights = n_types**2 * (

355 (data['n_max_radial'] + 1) * (data['n_basis_radial'] + 1)

356 + (data['n_max_angular'] + 1) * (data['n_basis_angular'] + 1)

357 )

358 data['n_parameters'] = n_ann_parameters + n_descriptor_weights + n_descriptor

359 is_polarizability_model = data['model_type'] == 'polarizability'

360 if data['n_parameters'] + n_ann_parameters == len(parameters):

361 data['n_parameters'] += n_ann_parameters

362 assert is_polarizability_model, (

363 'Model is not labelled as a polarizability model, but the number of '

364 'parameters matches a polarizability model.\n'

365 'If this is a polarizability model trained with GPUMD <=v3.8, please '

366 'modify the header in the nep.txt file to read '

367 f'`nep{data["version"]}_polarizability`.\n'

368 )

369 assert data['n_parameters'] == len(parameters), (

370 'Parsing of parameters inconsistent; please submit a bug report\n'

371 f'{data["n_parameters"]} != {len(parameters)}'

372 )

373 data['n_ann_parameters'] = n_ann_parameters

374

375 # split up parameters into the ANN weights, descriptor weights, and scaling parameters

376 n1 = n_ann_parameters

377 n1 *= 2 if is_polarizability_model else 1

378 n2 = n1 + n_descriptor_weights

379 data['ann_parameters'] = parameters[:n1]

380 descriptor_weights = np.array(parameters[n1:n2])

381 data['q_scaler'] = parameters[n2:]

382

383 # Group ANN parameters

384 pars = {}

385 n1 = 0

386 n_network_params = n_ann_input_weights + n_ann_output_weights # except last bias

387 n_neuron = data['n_neuron']

388 keys = data['types'] if data['version'] == 4 else ['all_species']

389

390 n_count = 2 if is_polarizability_model else 1

391 for count in range(n_count):

392 # if polarizability model, all parameters including bias are repeated

393 # need to offset n1 by +1 to handle bias

394 n1 += count

395 for s in keys:

396 # Get the parameters for the ANN; in the case of NEP4, there is effectively

397 # one network per atomic species.

398 ann_parameters = data['ann_parameters'][n1 : n1 + n_network_params]

399 ann_input_weights = ann_parameters[:n_ann_input_weights]

400 w0 = np.zeros((n_neuron, n_descriptor))

401 w0[...] = np.nan

402 b0 = np.zeros((n_neuron, 1))

403 b0[...] = np.nan

404 for n in range(n_neuron):

405 for nu in range(n_descriptor):

406 w0[n, nu] = ann_input_weights[n * n_descriptor + nu]

407 b0[:, 0] = ann_input_weights[n_neuron * n_descriptor :]

408

409 assert np.all(

410 w0.shape == (n_neuron, n_descriptor)

411 ), f'w0 has invalid shape for key {s}; please submit a bug report'

412 assert np.all(

413 b0.shape == (n_neuron, 1)

414 ), f'b0 has invalid shape for key {s}; please submit a bug report'

415 assert not np.any(

416 np.isnan(w0)

417 ), f'some weights in w0 are nan for key {s}; please submit a bug report'

418 assert not np.any(

419 np.isnan(b0)

420 ), f'some weights in b0 are nan for key {s}; please submit a bug report'

421

422 ann_output_weights = ann_parameters[

423 n_ann_input_weights : n_ann_input_weights + n_ann_output_weights

424 ]

425

426 w1 = np.zeros((1, n_neuron))

427 w1[0, :] = ann_output_weights[:]

428 assert np.all(

429 w1.shape == (1, n_neuron)

430 ), f'w1 has invalid shape for key {s}; please submit a bug report'

431 assert not np.any(

432 np.isnan(w1)

433 ), f'some weights in w1 are nan for key {s}; please submit a bug report'

434

435 if count == 0:

436 pars[s] = dict(w0=w0, b0=b0, w1=w1)

437 else:

438 pars[s].update({'w0_polar': w0, 'b0_polar': b0, 'w1_polar': w1})

439 n1 += n_network_params

440 if count == 0:

441 pars['b1'] = data['ann_parameters'][n1]

442 else:

443 pars['b1_polar'] = data['ann_parameters'][n1]

444

445 sum = 0

446 for s in pars.keys():

447 if s.startswith('b1'):

448 sum += 1

449 else:

450 sum += np.sum([np.count_nonzero(p) for p in pars[s].values()])

451 assert sum == n_ann_parameters * n_count, (

452 'Inconsistent number of parameters accounted for; please submit a bug report\n'

453 f'{sum} != {n_ann_parameters}'

454 )

455 data['ann_parameters'] = pars

456

457 # split up descriptor by chemical species and radial/angular

458 data['n_descriptor_parameters'] = len(descriptor_weights)

459 n = int(len(descriptor_weights) / (n_types * n_types))

460 n_max_radial = data['n_max_radial']

461 n_max_angular = data['n_max_angular']

462 n_basis_radial = data['n_basis_radial']

463 n_basis_angular = data['n_basis_angular']

464 m = (n_max_radial + 1) * (n_basis_radial + 1)

465 descriptor_weights = descriptor_weights.reshape((n, n_types * n_types)).T

466 descriptor_weights_radial = descriptor_weights[:, :m]

467 descriptor_weights_angular = descriptor_weights[:, m:]

468

469 # add descriptors to data dict

470 data['radial_descriptor_weights'] = {}

471 data['angular_descriptor_weights'] = {}

472 m = -1

473 for i, j in product(range(n_types), repeat=2):

474 m += 1

475 s1, s2 = data['types'][i], data['types'][j]

476 subdata = descriptor_weights_radial[m, :].reshape(

477 (n_max_radial + 1, n_basis_radial + 1)

478 )

479 data['radial_descriptor_weights'][(s1, s2)] = subdata

480 subdata = descriptor_weights_angular[m, :].reshape(

481 (n_max_angular + 1, n_basis_angular + 1)

482 )

483 data['angular_descriptor_weights'][(s1, s2)] = subdata

484

485 return Model(**data)