Coverage for calorine/gpumd/io.py: 100%

244 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-12-11 13:49 +0000

1from warnings import warn 

2from collections.abc import Iterable 

3from pathlib import Path 

4from typing import List, Tuple, Union 

5 

6import numpy as np 

7from ase import Atoms 

8from ase.io import read, write 

9from pandas import DataFrame 

10 

11 

12def read_kappa(filename: str) -> DataFrame: 

13 """Parses a file in ``kappa.out`` format from GPUMD and returns the 

14 content as a data frame. More information concerning file format, 

15 content and units can be found `here 

16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__. 

17 

18 Parameters 

19 ---------- 

20 filename 

21 Input file name. 

22 """ 

23 data = np.loadtxt(filename) 

24 if isinstance(data[0], np.float64): 

25 # If only a single row in kappa.out, append a dimension 

26 data = data.reshape(1, -1) 

27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split() 

28 if len(data[0]) != len(tags): 

29 raise ValueError( 

30 f'Input file contains {len(data[0])} data columns.' 

31 f' Expected {len(tags)} columns.' 

32 ) 

33 df = DataFrame(data=data, columns=tags) 

34 df['kx_tot'] = df.kx_in + df.kx_out 

35 df['ky_tot'] = df.ky_in + df.ky_out 

36 return df 

37 

38 

39def read_msd(filename: str) -> DataFrame: 

40 """Parses a file in ``msd.out`` format from GPUMD and returns the 

41 content as a data frame. More information concerning file format, 

42 content and units can be found `here 

43 <https://gpumd.org/gpumd/output_files/msd_out.html>`__. 

44 

45 Parameters 

46 ---------- 

47 filename 

48 Input file name. 

49 """ 

50 data = np.loadtxt(filename) 

51 if isinstance(data[0], np.float64): 

52 # If only a single row in msd.out, append a dimension 

53 data = data.reshape(1, -1) 

54 ncols = len(data[0]) 

55 ngroups = (ncols - 1) // 6 

56 if ngroups * 6 + 1 != ncols: 

57 raise ValueError( 

58 f'Input file contains {ncols} data columns.' 

59 f' Expected {1+ngroups*6} columns (1+6*ngroups).' 

60 ) 

61 tags = ['time'] 

62 flds = 'msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split() 

63 if ngroups == 1: 

64 tags.extend(flds) 

65 else: 

66 tags.extend([f'{f}_{n}' for n in range(ngroups) for f in flds]) 

67 df = DataFrame(data=data, columns=tags) 

68 return df 

69 

70 

71def read_hac(filename: str, 

72 exclude_currents: bool = True, 

73 exclude_in_out: bool = True) -> DataFrame: 

74 """Parses a file in ``hac.out`` format from GPUMD and returns the 

75 content as a data frame. More information concerning file format, 

76 content and units can be found `here 

77 <https://gpumd.org/gpumd/output_files/hac_out.html>`__. 

78 

79 Parameters 

80 ---------- 

81 filename 

82 Input file name. 

83 exclude_currents 

84 Do not include currents in output to save memory. 

85 exclude_in_out 

86 Do not include `in` and `out` parts of conductivity in output to save memory. 

87 """ 

88 data = np.loadtxt(filename) 

89 if isinstance(data[0], np.float64): 

90 # If only a single row in hac.out, append a dimension 

91 data = data.reshape(1, -1) 

92 tags = 'time' 

93 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z' 

94 tags += ' kx_in kx_out ky_in ky_out kz_tot' 

95 tags = tags.split() 

96 if len(data[0]) != len(tags): 

97 raise ValueError( 

98 f'Input file contains {len(data[0])} data columns.' 

99 f' Expected {len(tags)} columns.' 

100 ) 

101 df = DataFrame(data=data, columns=tags) 

102 df['kx_tot'] = df.kx_in + df.kx_out 

103 df['ky_tot'] = df.ky_in + df.ky_out 

104 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x 

105 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y 

106 df['jjz_tot'] = df.jtot_jtot_z 

107 del df['jtot_jtot_z'] 

108 if exclude_in_out: 

109 # remove columns with in/out data to save space 

110 for col in df: 

111 if 'in' in col or 'out' in col: 

112 del df[col] 

113 if exclude_currents: 

114 # remove columns with currents to save space 

115 for col in df: 

116 if col.startswith('j'): 

117 del df[col] 

118 return df 

119 

120 

121def read_thermo(filename: str, natoms: int = 1) -> DataFrame: 

122 """Parses a file in ``thermo.out`` format from GPUMD and returns the 

123 content as a data frame. More information concerning file format, 

124 content and units can be found `here 

125 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__. 

126 

127 Parameters 

128 ---------- 

129 filename 

130 Input file name. 

131 natoms 

132 Number of atoms; used to normalize energies. 

133 """ 

134 data = np.loadtxt(filename) 

135 if len(data) == 0: 

136 return DataFrame(data=data) 

137 if isinstance(data[0], np.float64): 

138 # If only a single row in loss.out, append a dimension 

139 data = data.reshape(1, -1) 

140 if len(data[0]) == 9: 

141 # orthorhombic box 

142 tags = 'temperature kinetic_energy potential_energy' 

143 tags += ' stress_xx stress_yy stress_zz' 

144 tags += ' cell_xx cell_yy cell_zz' 

145 elif len(data[0]) == 12: 

146 # orthorhombic box with stresses in Voigt notation (v3.3.1+) 

147 tags = 'temperature kinetic_energy potential_energy' 

148 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

149 tags += ' cell_xx cell_yy cell_zz' 

150 elif len(data[0]) == 15: 

151 # triclinic box 

152 tags = 'temperature kinetic_energy potential_energy' 

153 tags += ' stress_xx stress_yy stress_zz' 

154 tags += ( 

155 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

156 ) 

157 elif len(data[0]) == 18: 

158 # triclinic box with stresses in Voigt notation (v3.3.1+) 

159 tags = 'temperature kinetic_energy potential_energy' 

160 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

161 tags += ( 

162 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

163 ) 

164 else: 

165 raise ValueError( 

166 f'Input file contains {len(data[0])} data columns.' 

167 ' Expected 9, 12, 15 or 18 columns.' 

168 ) 

169 df = DataFrame(data=data, columns=tags.split()) 

170 assert natoms > 0, 'natoms must be positive' 

171 df.kinetic_energy /= natoms 

172 df.potential_energy /= natoms 

173 return df 

174 

175 

176def read_xyz(filename: str) -> Atoms: 

177 """ 

178 Reads the structure input file (``model.xyz``) for GPUMD and returns the 

179 structure. 

180 

181 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does 

182 not read velocities properly. 

183 

184 Parameters 

185 ---------- 

186 filename 

187 Name of file from which to read the structure. 

188 

189 Returns 

190 ------- 

191 Structure as ASE Atoms object with additional per-atom arrays 

192 representing atomic masses, velocities etc. 

193 """ 

194 structure = read(filename, format='extxyz') 

195 if structure.has('vel'): 

196 structure.set_velocities(structure.get_array('vel')) 

197 return structure 

198 

199 

200def read_runfile(filename: str) -> List[Tuple[str, list]]: 

201 """ 

202 Parses a GPUMD input file in ``run.in`` format and returns the 

203 content in the form a list of keyword-value pairs. 

204 

205 Parameters 

206 ---------- 

207 filename 

208 Input file name. 

209 

210 Returns 

211 ------- 

212 List of keyword-value pairs. 

213 """ 

214 data = [] 

215 with open(filename, 'r') as f: 

216 for k, line in enumerate(f.readlines()): 

217 flds = line.split() 

218 if len(flds) == 0: 

219 continue 

220 elif len(flds) == 1: 

221 raise ValueError(f'Line {k} contains only one field:\n{line}') 

222 keyword = flds[0] 

223 values = tuple(flds[1:]) 

224 if keyword in ['time_step', 'velocity']: 

225 values = float(values[0]) 

226 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']: 

227 values = int(values[0]) 

228 elif len(values) == 1: 

229 values = values[0] 

230 data.append((keyword, values)) 

231 return data 

232 

233 

234def write_runfile( 

235 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]] 

236): 

237 """Write a file in run.in format to define input parameters for MD simulation. 

238 

239 Parameters 

240 ---------- 

241 file 

242 Path to file to be written. 

243 

244 parameters 

245 Defines all command-parameter(s) pairs used in run.in file 

246 (see GPUMD documentation for a complete list). 

247 Values can be either floats, integers, or lists/tuples. 

248 """ 

249 

250 with open(file, 'w') as f: 

251 # Write all keywords with parameter(s) 

252 for key, val in parameters: 

253 f.write(f'{key} ') 

254 if isinstance(val, Iterable) and not isinstance(val, str): 

255 for v in val: 

256 f.write(f'{v} ') 

257 else: 

258 f.write(f'{val}') 

259 f.write('\n') 

260 

261 

262def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None): 

263 """ 

264 Writes a structure into GPUMD input format (`model.xyz`). 

265 

266 Parameters 

267 ---------- 

268 filename 

269 Name of file to which the structure should be written. 

270 structure 

271 Input structure. 

272 groupings 

273 Groups into which the individual atoms should be divided in the form of 

274 a list of list of lists. Specifically, the outer list corresponds to 

275 the grouping methods, of which there can be three at the most, which 

276 contains a list of groups in the form of lists of site indices. The 

277 sum of the lengths of the latter must be the same as the total number 

278 of atoms. 

279 

280 Raises 

281 ------ 

282 ValueError 

283 Raised if parameters are incompatible. 

284 """ 

285 # Make a local copy of the atoms object 

286 _structure = structure.copy() 

287 

288 # Check velocties parameter 

289 velocities = _structure.get_velocities() 

290 if velocities is None or np.max(np.abs(velocities)) < 1e-6: 

291 has_velocity = 0 

292 else: 

293 has_velocity = 1 

294 

295 # Check groupings parameter 

296 if groupings is None: 

297 number_of_grouping_methods = 0 

298 else: 

299 number_of_grouping_methods = len(groupings) 

300 if number_of_grouping_methods > 3: 

301 raise ValueError('There can be no more than 3 grouping methods!') 

302 for g, grouping in enumerate(groupings): 

303 all_indices = [i for group in grouping for i in group] 

304 if len(all_indices) != len(_structure) or set(all_indices) != set( 

305 range(len(_structure)) 

306 ): 

307 raise ValueError( 

308 f'The indices listed in grouping method {g} are' 

309 ' not compatible with the input structure!' 

310 ) 

311 

312 # Allowed keyword=value pairs. Use ASEs extyz write functionality. 

313 # pbc="pbc_a pbc_b pbc_c" 

314 # lattice="ax ay az bx by bz cx cy cz" 

315 # properties=property_name:data_type:number_of_columns 

316 # species:S:1 

317 # pos:R:3 

318 # mass:R:1 

319 # vel:R:3 

320 # group:I:number_of_grouping_methods 

321 if _structure.has('mass'): 

322 # If structure already has masses set, use those 

323 warn('Structure already has array "mass"; will use existing values.') 

324 else: 

325 _structure.new_array('mass', _structure.get_masses()) 

326 

327 if has_velocity: 

328 _structure.new_array('vel', _structure.get_velocities()) 

329 if groupings is not None: 

330 group_indices = np.array( 

331 [ 

332 [ 

333 [ 

334 group_index 

335 for group_index, group in enumerate(grouping) 

336 if structure_idx in group 

337 ] 

338 for grouping in groupings 

339 ] 

340 for structure_idx in range(len(_structure)) 

341 ] 

342 ).squeeze() # pythoniccc 

343 _structure.new_array('group', group_indices) 

344 

345 write(filename=filename, images=_structure, write_info=True, format='extxyz') 

346 

347 

348def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame: 

349 """Parses a Monte Carlo output file in ``mcmd.out`` format 

350 and returns the content in the form of a DataFrame. 

351 

352 Parameters 

353 ---------- 

354 filename 

355 Path to file to be parsed. 

356 accumulate 

357 If ``True`` the MD steps between subsequent Monte Carlo 

358 runs in the same output file will be accumulated. 

359 

360 Returns 

361 ------- 

362 DataFrame containing acceptance ratios and concentrations (if available), 

363 as well as key Monte Carlo parameters. 

364 """ 

365 with open(filename, 'r') as f: 

366 lines = f.readlines() 

367 data = [] 

368 offset = 0 

369 step = 0 

370 accummulated_step = 0 

371 for line in lines: 

372 if line.startswith('# mc'): 

373 flds = line.split() 

374 mc_type = flds[2] 

375 md_steps = int(flds[3]) 

376 mc_trials = int(flds[4]) 

377 temperature_initial = float(flds[5]) 

378 temperature_final = float(flds[6]) 

379 if mc_type.endswith('sgc'): 

380 ntypes = int(flds[7]) 

381 species = [flds[8+2*k] for k in range(ntypes)] 

382 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)} 

383 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan 

384 elif line.startswith('# num_MD_steps'): 

385 continue 

386 else: 

387 flds = line.split() 

388 previous_step = step 

389 step = int(flds[0]) 

390 if step <= previous_step and accumulate: 

391 offset += previous_step 

392 accummulated_step = step + offset 

393 record = dict( 

394 step=accummulated_step, 

395 mc_type=mc_type, 

396 md_steps=md_steps, 

397 mc_trials=mc_trials, 

398 temperature_initial=temperature_initial, 

399 temperature_final=temperature_final, 

400 acceptance_ratio=float(flds[1]), 

401 ) 

402 if mc_type.endswith('sgc'): 

403 record.update(phis) 

404 if mc_type == 'vcsgc': 

405 record['kappa'] = kappa 

406 concentrations = {f'conc_{s}': float(flds[k]) 

407 for k, s in enumerate(species, start=2)} 

408 record.update(concentrations) 

409 data.append(record) 

410 df = DataFrame.from_dict(data) 

411 return df 

412 

413 

414def read_thermodynamic_data( 

415 directory_name: str, 

416 normalize: bool = False, 

417) -> DataFrame: 

418 """Parses the data in a GPUMD output directory 

419 and returns the content in the form of a :class:`DataFrame`. 

420 This function reads the ``thermo.out``, ``run.in``, and ``model.xyz`` 

421 (optionally) files, and returns the thermodynamic data including the 

422 time (in ps), the pressure (in GPa), the side lengths of the simulation 

423 cell (in Å), and the volume (in Å:sup:`3` or Å:sup:`3`/atom). 

424 

425 Parameters 

426 ---------- 

427 directory_name 

428 Path to directory to be parsed. 

429 normalize 

430 Normalize thermodynamic quantities per atom. 

431 This requires the ``model.xyz`` file to be present. 

432 

433 Returns 

434 ------- 

435 :class:`DataFrame` containing (augmented) thermodynamic data. 

436 """ 

437 

438 try: 

439 params = read_runfile(f'{directory_name}/run.in') 

440 except FileNotFoundError: 

441 raise FileNotFoundError(f'No `run.in` file found in {directory_name}') 

442 

443 if normalize: 

444 try: 

445 structure = read(f'{directory_name}/model.xyz') 

446 except FileNotFoundError: 

447 raise FileNotFoundError(f'No `model.xyz` file found in {directory_name}') 

448 natoms = len(structure) 

449 else: 

450 natoms = 1 

451 

452 blocks = [] 

453 time_step = 1.0 # GPUMD default 

454 dump_thermo = None 

455 for p, v in params: 

456 if p == 'time_step': 

457 time_step = v 

458 elif p == 'dump_thermo': 

459 dump_thermo = v 

460 elif p == 'run': 

461 if dump_thermo is None: 

462 continue 

463 # We do not require dump_thermo to exist for subsequent 

464 # runs if it has been used for atleast one before. 

465 # But if there has been no new dump_thermo, we 

466 # should not create a block. 

467 if (dump_thermo != 'DEFINEDONCE'): 

468 blocks.append(dict( 

469 nsteps=v, 

470 time_step=time_step, 

471 dump_thermo=dump_thermo, 

472 )) 

473 dump_thermo = 'DEFINEDONCE' 

474 

475 try: 

476 df = read_thermo(f'{directory_name}/thermo.out', natoms=natoms) 

477 except FileNotFoundError: 

478 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}') 

479 

480 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0)) 

481 for b in blocks if b['dump_thermo'] is not None]) 

482 if len(df) != expected_rows: 

483 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match' 

484 f' expectation based on `run.in` file ({expected_rows}).') 

485 if len(df) > expected_rows: 

486 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to' 

487 f' expectation based on `run.in` file ({expected_rows}).') 

488 if len(df) == 0: 

489 # Could be the case when a run has just started and thermo.out has been created 

490 # but not populated yet 

491 warn('`thermo.out` is empty') 

492 return df 

493 

494 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet. 

495 times = [] 

496 offset = 0.0 

497 for b in blocks: 

498 ns = int(round(b['nsteps'] / b['dump_thermo'], 0)) 

499 block_times = np.array(range(1, 1 + ns)) \ 

500 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps 

501 block_times += offset 

502 times.extend(block_times) 

503 offset = times[-1] 

504 df['time'] = times[:len(df)] 

505 

506 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3 

507 if 'cell_xy' in df: 

508 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2) 

509 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2) 

510 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2) 

511 volume = (df.cell_xx * df.cell_yy * df.cell_zz + 

512 df.cell_xy * df.cell_yz * df.cell_xz + 

513 df.cell_xz * df.cell_yx * df.cell_zy - 

514 df.cell_xx * df.cell_yz * df.cell_zy - 

515 df.cell_xy * df.cell_yx * df.cell_zz - 

516 df.cell_xz * df.cell_yy * df.cell_zx) 

517 else: 

518 df['alat'] = df.cell_xx 

519 df['blat'] = df.cell_yy 

520 df['clat'] = df.cell_zz 

521 volume = (df.cell_xx * df.cell_yy * df.cell_zz) 

522 df['volume'] = volume 

523 if normalize: 

524 df.volume /= natoms 

525 

526 return df