Coverage for calorine/gpumd/io.py: 100%

224 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2025-02-05 14:53 +0000

1from warnings import warn 

2from collections.abc import Iterable 

3from pathlib import Path 

4from typing import List, Tuple, Union 

5 

6import numpy as np 

7from ase import Atoms 

8from ase.io import read, write 

9from pandas import DataFrame 

10 

11 

12def read_kappa(filename: str) -> DataFrame: 

13 """Parses a file in ``kappa.out`` format from GPUMD and returns the 

14 content as a data frame. More information concerning file format, 

15 content and units can be found `here 

16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__. 

17 

18 Parameters 

19 ---------- 

20 filename 

21 Input file name. 

22 """ 

23 data = np.loadtxt(filename) 

24 if isinstance(data[0], np.float64): 

25 # If only a single row in kappa.out, append a dimension 

26 data = data.reshape(1, -1) 

27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split() 

28 if len(data[0]) != len(tags): 

29 raise ValueError( 

30 f'Input file contains {len(data[0])} data columns.' 

31 f' Expected {len(tags)} columns.' 

32 ) 

33 df = DataFrame(data=data, columns=tags) 

34 df['kx_tot'] = df.kx_in + df.kx_out 

35 df['ky_tot'] = df.ky_in + df.ky_out 

36 return df 

37 

38 

39def read_msd(filename: str) -> DataFrame: 

40 """Parses a file in ``msd.out`` format from GPUMD and returns the 

41 content as a data frame. More information concerning file format, 

42 content and units can be found `here 

43 <https://gpumd.org/gpumd/output_files/msd_out.html>`__. 

44 

45 Parameters 

46 ---------- 

47 filename 

48 Input file name. 

49 """ 

50 data = np.loadtxt(filename) 

51 if isinstance(data[0], np.float64): 

52 # If only a single row in msd.out, append a dimension 

53 data = data.reshape(1, -1) 

54 tags = 'time msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split() 

55 if len(data[0]) != len(tags): 

56 raise ValueError( 

57 f'Input file contains {len(data[0])} data columns.' 

58 f' Expected {len(tags)} columns.' 

59 ) 

60 df = DataFrame(data=data, columns=tags) 

61 return df 

62 

63 

64def read_hac(filename: str, 

65 exclude_currents: bool = True, 

66 exclude_in_out: bool = True) -> DataFrame: 

67 """Parses a file in ``hac.out`` format from GPUMD and returns the 

68 content as a data frame. More information concerning file format, 

69 content and units can be found `here 

70 <https://gpumd.org/gpumd/output_files/hac_out.html>`__. 

71 

72 Parameters 

73 ---------- 

74 filename 

75 Input file name. 

76 exclude_currents 

77 Do not include currents in output to save memory. 

78 exclude_in_out 

79 Do not include `in` and `out` parts of conductivity in output to save memory. 

80 """ 

81 data = np.loadtxt(filename) 

82 if isinstance(data[0], np.float64): 

83 # If only a single row in hac.out, append a dimension 

84 data = data.reshape(1, -1) 

85 tags = 'time' 

86 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z' 

87 tags += ' kx_in kx_out ky_in ky_out kz_tot' 

88 tags = tags.split() 

89 if len(data[0]) != len(tags): 

90 raise ValueError( 

91 f'Input file contains {len(data[0])} data columns.' 

92 f' Expected {len(tags)} columns.' 

93 ) 

94 df = DataFrame(data=data, columns=tags) 

95 df['kx_tot'] = df.kx_in + df.kx_out 

96 df['ky_tot'] = df.ky_in + df.ky_out 

97 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x 

98 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y 

99 df['jjz_tot'] = df.jtot_jtot_z 

100 del df['jtot_jtot_z'] 

101 if exclude_in_out: 

102 # remove columns with in/out data to save space 

103 for col in df: 

104 if 'in' in col or 'out' in col: 

105 del df[col] 

106 if exclude_currents: 

107 # remove columns with currents to save space 

108 for col in df: 

109 if col.startswith('j'): 

110 del df[col] 

111 return df 

112 

113 

114def read_thermo(filename: str, natoms: int = 1) -> DataFrame: 

115 """Parses a file in ``thermo.out`` format from GPUMD and returns the 

116 content as a data frame. More information concerning file format, 

117 content and units can be found `here 

118 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__. 

119 

120 Parameters 

121 ---------- 

122 filename 

123 Input file name. 

124 natoms 

125 Number of atoms; used to normalize energies. 

126 """ 

127 data = np.loadtxt(filename) 

128 if isinstance(data[0], np.float64): 

129 # If only a single row in loss.out, append a dimension 

130 data = data.reshape(1, -1) 

131 if len(data[0]) == 9: 

132 # orthorhombic box 

133 tags = 'temperature kinetic_energy potential_energy' 

134 tags += ' stress_xx stress_yy stress_zz' 

135 tags += ' cell_xx cell_yy cell_zz' 

136 elif len(data[0]) == 12: 

137 # orthorhombic box with stresses in Voigt notation (v3.3.1+) 

138 tags = 'temperature kinetic_energy potential_energy' 

139 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

140 tags += ' cell_xx cell_yy cell_zz' 

141 elif len(data[0]) == 15: 

142 # triclinic box 

143 tags = 'temperature kinetic_energy potential_energy' 

144 tags += ' stress_xx stress_yy stress_zz' 

145 tags += ( 

146 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

147 ) 

148 elif len(data[0]) == 18: 

149 # triclinic box with stresses in Voigt notation (v3.3.1+) 

150 tags = 'temperature kinetic_energy potential_energy' 

151 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

152 tags += ( 

153 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

154 ) 

155 else: 

156 raise ValueError( 

157 f'Input file contains {len(data[0])} data columns.' 

158 ' Expected 9, 12, 15 or 18 columns.' 

159 ) 

160 df = DataFrame(data=data, columns=tags.split()) 

161 assert natoms > 0, 'natoms must be positive' 

162 df.kinetic_energy /= natoms 

163 df.potential_energy /= natoms 

164 return df 

165 

166 

167def read_xyz(filename: str) -> Atoms: 

168 """ 

169 Reads the structure input file (``model.xyz``) for GPUMD and returns the 

170 structure. 

171 

172 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does 

173 not read velocities properly. 

174 

175 Parameters 

176 ---------- 

177 filename 

178 Name of file from which to read the structure. 

179 

180 Returns 

181 ------- 

182 Structure as ASE Atoms object with additional per-atom arrays 

183 representing atomic masses, velocities etc. 

184 """ 

185 structure = read(filename, format='extxyz') 

186 if structure.has('vel'): 

187 structure.set_velocities(structure.get_array('vel')) 

188 return structure 

189 

190 

191def read_runfile(filename: str) -> List[Tuple[str, list]]: 

192 """ 

193 Parses a GPUMD input file in ``run.in`` format and returns the 

194 content in the form a list of keyword-value pairs. 

195 

196 Parameters 

197 ---------- 

198 filename 

199 Input file name. 

200 

201 Returns 

202 ------- 

203 List of keyword-value pairs. 

204 """ 

205 data = [] 

206 with open(filename, 'r') as f: 

207 for k, line in enumerate(f.readlines()): 

208 flds = line.split() 

209 if len(flds) == 0: 

210 continue 

211 elif len(flds) == 1: 

212 raise ValueError(f'Line {k} contains only one field:\n{line}') 

213 keyword = flds[0] 

214 values = tuple(flds[1:]) 

215 if keyword in ['time_step', 'velocity']: 

216 values = float(values[0]) 

217 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']: 

218 values = int(values[0]) 

219 elif len(values) == 1: 

220 values = values[0] 

221 data.append((keyword, values)) 

222 return data 

223 

224 

225def write_runfile( 

226 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]] 

227): 

228 """Write a file in run.in format to define input parameters for MD simulation. 

229 

230 Parameters 

231 ---------- 

232 file 

233 Path to file to be written. 

234 

235 parameters : dict 

236 Defines all key-value pairs used in run.in file 

237 (see GPUMD documentation for a complete list). 

238 Values can be either floats, integers, or lists/tuples. 

239 """ 

240 

241 with open(file, 'w') as f: 

242 # Write all keywords with parameter(s) 

243 for key, val in parameters: 

244 f.write(f'{key} ') 

245 if isinstance(val, Iterable) and not isinstance(val, str): 

246 for v in val: 

247 f.write(f'{v} ') 

248 else: 

249 f.write(f'{val}') 

250 f.write('\n') 

251 

252 

253def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None): 

254 """ 

255 Writes a structure into GPUMD input format (`model.xyz`). 

256 

257 Parameters 

258 ---------- 

259 filename 

260 Name of file to which the structure should be written. 

261 structure 

262 Input structure. 

263 groupings 

264 Groups into which the individual atoms should be divided in the form of 

265 a list of list of lists. Specifically, the outer list corresponds to 

266 the grouping methods, of which there can be three at the most, which 

267 contains a list of groups in the form of lists of site indices. The 

268 sum of the lengths of the latter must be the same as the total number 

269 of atoms. 

270 

271 Raises 

272 ------ 

273 ValueError 

274 Raised if parameters are incompatible. 

275 """ 

276 # Make a local copy of the atoms object 

277 _structure = structure.copy() 

278 

279 # Check velocties parameter 

280 velocities = _structure.get_velocities() 

281 if velocities is None or np.max(np.abs(velocities)) < 1e-6: 

282 has_velocity = 0 

283 else: 

284 has_velocity = 1 

285 

286 # Check groupings parameter 

287 if groupings is None: 

288 number_of_grouping_methods = 0 

289 else: 

290 number_of_grouping_methods = len(groupings) 

291 if number_of_grouping_methods > 3: 

292 raise ValueError('There can be no more than 3 grouping methods!') 

293 for g, grouping in enumerate(groupings): 

294 all_indices = [i for group in grouping for i in group] 

295 if len(all_indices) != len(_structure) or set(all_indices) != set( 

296 range(len(_structure)) 

297 ): 

298 raise ValueError( 

299 f'The indices listed in grouping method {g} are' 

300 ' not compatible with the input structure!' 

301 ) 

302 

303 # Allowed keyword=value pairs. Use ASEs extyz write functionality. 

304 # pbc="pbc_a pbc_b pbc_c" 

305 # lattice="ax ay az bx by bz cx cy cz" 

306 # properties=property_name:data_type:number_of_columns 

307 # species:S:1 

308 # pos:R:3 

309 # mass:R:1 

310 # vel:R:3 

311 # group:I:number_of_grouping_methods 

312 if _structure.has('mass'): 

313 # If structure already has masses set, use those 

314 warn('Structure already has array "mass"; will use existing values.') 

315 else: 

316 _structure.new_array('mass', _structure.get_masses()) 

317 

318 if has_velocity: 

319 _structure.new_array('vel', _structure.get_velocities()) 

320 if groupings is not None: 

321 group_indices = np.array( 

322 [ 

323 [ 

324 [ 

325 group_index 

326 for group_index, group in enumerate(grouping) 

327 if structure_idx in group 

328 ] 

329 for grouping in groupings 

330 ] 

331 for structure_idx in range(len(_structure)) 

332 ] 

333 ).squeeze() # pythoniccc 

334 _structure.new_array('group', group_indices) 

335 

336 write(filename=filename, images=_structure, write_info=True, format='extxyz') 

337 

338 

339def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame: 

340 """Parses a Monte Carlo output file in ``mcmd.out`` format 

341 and returns the content in the form of a DataFrame. 

342 

343 Parameters 

344 ---------- 

345 filename 

346 Path to file to be parsed. 

347 accumulate 

348 If ``True`` the MD steps between subsequent Monte Carlo 

349 runs in the same output file will be accumulated. 

350 

351 Returns 

352 ------- 

353 DataFrame containing acceptance ratios and concentrations (if available), 

354 as well as key Monte Carlo parameters. 

355 """ 

356 with open(filename, 'r') as f: 

357 lines = f.readlines() 

358 data = [] 

359 offset = 0 

360 step = 0 

361 accummulated_step = 0 

362 for line in lines: 

363 if line.startswith('# mc'): 

364 flds = line.split() 

365 mc_type = flds[2] 

366 md_steps = int(flds[3]) 

367 mc_trials = int(flds[4]) 

368 temperature_initial = float(flds[5]) 

369 temperature_final = float(flds[6]) 

370 if mc_type.endswith('sgc'): 

371 ntypes = int(flds[7]) 

372 species = [flds[8+2*k] for k in range(ntypes)] 

373 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)} 

374 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan 

375 elif line.startswith('# num_MD_steps'): 

376 continue 

377 else: 

378 flds = line.split() 

379 previous_step = step 

380 step = int(flds[0]) 

381 if step <= previous_step and accumulate: 

382 offset += previous_step 

383 accummulated_step = step + offset 

384 record = dict( 

385 step=accummulated_step, 

386 mc_type=mc_type, 

387 md_steps=md_steps, 

388 mc_trials=mc_trials, 

389 temperature_initial=temperature_initial, 

390 temperature_final=temperature_final, 

391 acceptance_ratio=float(flds[1]), 

392 ) 

393 if mc_type.endswith('sgc'): 

394 record.update(phis) 

395 if mc_type == 'vcsgc': 

396 record['kappa'] = kappa 

397 concentrations = {f'conc_{s}': float(flds[k]) 

398 for k, s in enumerate(species, start=2)} 

399 record.update(concentrations) 

400 data.append(record) 

401 df = DataFrame.from_dict(data) 

402 return df 

403 

404 

405def read_thermodynamic_data(directory_name: str) -> DataFrame: 

406 """Parses the data in a GPUMD output directory 

407 and returns the content in the form of a :class:`DataFrame`. 

408 This function reads the ``thermo.out`` and ``run.in`` files, 

409 and returns the thermodynamic data including the time (in ps), 

410 the pressure (in GPa), the side lengths of the simulation cell 

411 (in Å), and the volume (in Å:sup:`3`). 

412 

413 Parameters 

414 ---------- 

415 directory_name 

416 Path to directory to be parsed. 

417 

418 Returns 

419 ------- 

420 :class:`DataFrame` containing (augmented) thermodynamic data. 

421 """ 

422 

423 try: 

424 params = read_runfile(f'{directory_name}/run.in') 

425 except FileNotFoundError: 

426 raise FileNotFoundError(f'No `run.in` file found in {directory_name}') 

427 

428 blocks = [] 

429 time_step = 1.0 # GPUMD default 

430 dump_thermo = None 

431 for p, v in params: 

432 if p == 'time_step': 

433 time_step = v 

434 elif p == 'dump_thermo': 

435 dump_thermo = v 

436 elif p == 'run': 

437 if dump_thermo is None: 

438 raise ValueError( 

439 'Could not extract value of `dump_thermo` keyword from `run.in` file.') 

440 # We do not require dump_thermo to exist for subsequent 

441 # runs if it has been used for atleast one before. 

442 # But if there has been no new dump_thermo, we 

443 # should not create a block. 

444 if (dump_thermo != 'DEFINEDONCE'): 

445 blocks.append(dict( 

446 nsteps=v, 

447 time_step=time_step, 

448 dump_thermo=dump_thermo, 

449 )) 

450 dump_thermo = 'DEFINEDONCE' 

451 

452 try: 

453 df = read_thermo(f'{directory_name}/thermo.out') 

454 except FileNotFoundError: 

455 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}') 

456 

457 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0)) 

458 for b in blocks if b['dump_thermo'] is not None]) 

459 if len(df) != expected_rows: 

460 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match' 

461 f' expectation based on `run.in` file ({expected_rows}).') 

462 if len(df) > expected_rows: 

463 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to' 

464 f' expectation based on `run.in` file ({expected_rows}).') 

465 

466 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet. 

467 times = [] 

468 offset = 0.0 

469 for b in blocks: 

470 ns = int(round(b['nsteps'] / b['dump_thermo'], 0)) 

471 block_times = np.array(range(1, 1 + ns)) \ 

472 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps 

473 block_times += offset 

474 times.extend(block_times) 

475 offset = times[-1] 

476 df['time'] = times[:len(df)] 

477 

478 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3 

479 if 'cell_xy' in df: 

480 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2) 

481 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2) 

482 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2) 

483 volume = (df.cell_xx * df.cell_yy * df.cell_zz + 

484 df.cell_xy * df.cell_yz * df.cell_xz + 

485 df.cell_xz * df.cell_yx * df.cell_zy - 

486 df.cell_xx * df.cell_yz * df.cell_zy - 

487 df.cell_xy * df.cell_yx * df.cell_zz - 

488 df.cell_xz * df.cell_yy * df.cell_zx) 

489 else: 

490 df['alat'] = df.cell_xx 

491 df['blat'] = df.cell_yy 

492 df['clat'] = df.cell_zz 

493 volume = (df.cell_xx * df.cell_yy * df.cell_zz) 

494 df['volume'] = volume 

495 

496 return df