Coverage for calorine / gpumd / io.py: 100%

247 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-02-12 16:00 +0000

1from warnings import warn 

2from collections.abc import Iterable 

3from pathlib import Path 

4from typing import List, Tuple, Union 

5 

6import numpy as np 

7from ase import Atoms 

8from ase.io import read, write 

9from ase.units import fs 

10from pandas import DataFrame 

11 

12 

13def read_kappa(filename: str) -> DataFrame: 

14 """Parses a file in ``kappa.out`` format from GPUMD and returns the 

15 content as a data frame. More information concerning file format, 

16 content and units can be found `here 

17 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__. 

18 

19 Parameters 

20 ---------- 

21 filename 

22 Input file name. 

23 """ 

24 data = np.loadtxt(filename) 

25 if isinstance(data[0], np.float64): 

26 # If only a single row in kappa.out, append a dimension 

27 data = data.reshape(1, -1) 

28 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split() 

29 if len(data[0]) != len(tags): 

30 raise ValueError( 

31 f'Input file contains {len(data[0])} data columns.' 

32 f' Expected {len(tags)} columns.' 

33 ) 

34 df = DataFrame(data=data, columns=tags) 

35 df['kx_tot'] = df.kx_in + df.kx_out 

36 df['ky_tot'] = df.ky_in + df.ky_out 

37 return df 

38 

39 

40def read_msd(filename: str) -> DataFrame: 

41 """Parses a file in ``msd.out`` format from GPUMD and returns the 

42 content as a data frame. More information concerning file format, 

43 content and units can be found `here 

44 <https://gpumd.org/gpumd/output_files/msd_out.html>`__. 

45 

46 Parameters 

47 ---------- 

48 filename 

49 Input file name. 

50 """ 

51 data = np.loadtxt(filename) 

52 if isinstance(data[0], np.float64): 

53 # If only a single row in msd.out, append a dimension 

54 data = data.reshape(1, -1) 

55 ncols = len(data[0]) 

56 ngroups = (ncols - 1) // 6 

57 if ngroups * 6 + 1 != ncols: 

58 raise ValueError( 

59 f'Input file contains {ncols} data columns.' 

60 f' Expected {1+ngroups*6} columns (1+6*ngroups).' 

61 ) 

62 tags = ['time'] 

63 flds = 'msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split() 

64 if ngroups == 1: 

65 tags.extend(flds) 

66 else: 

67 tags.extend([f'{f}_{n}' for n in range(ngroups) for f in flds]) 

68 df = DataFrame(data=data, columns=tags) 

69 return df 

70 

71 

72def read_hac(filename: str, 

73 exclude_currents: bool = True, 

74 exclude_in_out: bool = True) -> DataFrame: 

75 """Parses a file in ``hac.out`` format from GPUMD and returns the 

76 content as a data frame. More information concerning file format, 

77 content and units can be found `here 

78 <https://gpumd.org/gpumd/output_files/hac_out.html>`__. 

79 

80 Parameters 

81 ---------- 

82 filename 

83 Input file name. 

84 exclude_currents 

85 Do not include currents in output to save memory. 

86 exclude_in_out 

87 Do not include `in` and `out` parts of conductivity in output to save memory. 

88 """ 

89 data = np.loadtxt(filename) 

90 if isinstance(data[0], np.float64): 

91 # If only a single row in hac.out, append a dimension 

92 data = data.reshape(1, -1) 

93 tags = 'time' 

94 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z' 

95 tags += ' kx_in kx_out ky_in ky_out kz_tot' 

96 tags = tags.split() 

97 if len(data[0]) != len(tags): 

98 raise ValueError( 

99 f'Input file contains {len(data[0])} data columns.' 

100 f' Expected {len(tags)} columns.' 

101 ) 

102 df = DataFrame(data=data, columns=tags) 

103 df['kx_tot'] = df.kx_in + df.kx_out 

104 df['ky_tot'] = df.ky_in + df.ky_out 

105 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x 

106 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y 

107 df['jjz_tot'] = df.jtot_jtot_z 

108 del df['jtot_jtot_z'] 

109 if exclude_in_out: 

110 # remove columns with in/out data to save space 

111 for col in df: 

112 if 'in' in col or 'out' in col: 

113 del df[col] 

114 if exclude_currents: 

115 # remove columns with currents to save space 

116 for col in df: 

117 if col.startswith('j'): 

118 del df[col] 

119 return df 

120 

121 

122def read_thermo(filename: str, natoms: int = 1) -> DataFrame: 

123 """Parses a file in ``thermo.out`` format from GPUMD and returns the 

124 content as a data frame. More information concerning file format, 

125 content and units can be found `here 

126 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__. 

127 

128 Parameters 

129 ---------- 

130 filename 

131 Input file name. 

132 natoms 

133 Number of atoms; used to normalize energies. 

134 """ 

135 data = np.loadtxt(filename) 

136 if len(data) == 0: 

137 return DataFrame(data=data) 

138 if isinstance(data[0], np.float64): 

139 # If only a single row in loss.out, append a dimension 

140 data = data.reshape(1, -1) 

141 if len(data[0]) == 9: 

142 # orthorhombic box 

143 tags = 'temperature kinetic_energy potential_energy' 

144 tags += ' stress_xx stress_yy stress_zz' 

145 tags += ' cell_xx cell_yy cell_zz' 

146 elif len(data[0]) == 12: 

147 # orthorhombic box with stresses in Voigt notation (v3.3.1+) 

148 tags = 'temperature kinetic_energy potential_energy' 

149 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

150 tags += ' cell_xx cell_yy cell_zz' 

151 elif len(data[0]) == 15: 

152 # triclinic box 

153 tags = 'temperature kinetic_energy potential_energy' 

154 tags += ' stress_xx stress_yy stress_zz' 

155 tags += ( 

156 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

157 ) 

158 elif len(data[0]) == 18: 

159 # triclinic box with stresses in Voigt notation (v3.3.1+) 

160 tags = 'temperature kinetic_energy potential_energy' 

161 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

162 tags += ( 

163 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

164 ) 

165 else: 

166 raise ValueError( 

167 f'Input file contains {len(data[0])} data columns.' 

168 ' Expected 9, 12, 15 or 18 columns.' 

169 ) 

170 df = DataFrame(data=data, columns=tags.split()) 

171 assert natoms > 0, 'natoms must be positive' 

172 df.kinetic_energy /= natoms 

173 df.potential_energy /= natoms 

174 return df 

175 

176 

177def read_xyz(filename: str) -> Atoms: 

178 """ 

179 Reads the structure input file (``model.xyz``) for GPUMD and returns the 

180 structure. 

181 

182 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does 

183 not read velocities properly. Specifically, the velocity unit is converted from GPUMD units 

184 (Å/fs) to ASE units (1/sqrt(u/eV)). 

185 

186 Parameters 

187 ---------- 

188 filename 

189 Name of file from which to read the structure. 

190 

191 Returns 

192 ------- 

193 Structure as ASE Atoms object with additional per-atom arrays 

194 representing atomic masses, velocities etc. 

195 """ 

196 structure = read(filename, format='extxyz') 

197 if structure.has('vel'): 

198 gpumd_to_ase_velocity = 1 / fs 

199 structure.set_velocities(structure.get_array('vel') * gpumd_to_ase_velocity) 

200 return structure 

201 

202 

203def read_runfile(filename: str) -> List[Tuple[str, list]]: 

204 """ 

205 Parses a GPUMD input file in ``run.in`` format and returns the 

206 content in the form a list of keyword-value pairs. 

207 

208 Parameters 

209 ---------- 

210 filename 

211 Input file name. 

212 

213 Returns 

214 ------- 

215 List of keyword-value pairs. 

216 """ 

217 data = [] 

218 with open(filename, 'r') as f: 

219 for k, line in enumerate(f.readlines()): 

220 flds = line.split() 

221 if len(flds) == 0: 

222 continue 

223 elif len(flds) == 1: 

224 raise ValueError(f'Line {k} contains only one field:\n{line}') 

225 keyword = flds[0] 

226 values = tuple(flds[1:]) 

227 if keyword in ['time_step', 'velocity']: 

228 values = float(values[0]) 

229 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']: 

230 values = int(values[0]) 

231 elif len(values) == 1: 

232 values = values[0] 

233 data.append((keyword, values)) 

234 return data 

235 

236 

237def write_runfile( 

238 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]] 

239): 

240 """Write a file in run.in format to define input parameters for MD simulation. 

241 

242 Parameters 

243 ---------- 

244 file 

245 Path to file to be written. 

246 

247 parameters 

248 Defines all command-parameter(s) pairs used in run.in file 

249 (see GPUMD documentation for a complete list). 

250 Values can be either floats, integers, or lists/tuples. 

251 """ 

252 

253 with open(file, 'w') as f: 

254 # Write all keywords with parameter(s) 

255 for key, val in parameters: 

256 f.write(f'{key} ') 

257 if isinstance(val, Iterable) and not isinstance(val, str): 

258 for v in val: 

259 f.write(f'{v} ') 

260 else: 

261 f.write(f'{val}') 

262 f.write('\n') 

263 

264 

265def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None): 

266 """ 

267 Writes a structure into GPUMD input format (`model.xyz`). 

268 

269 This is a wrapper function around :func:`ase.io.write_xyz` since the ASE implementation does 

270 not write velocities properly. Specifically, the velocity unit is converted from ASE units 

271 (1/sqrt(u/eV)) to GPUMD units (Å/fs). 

272 

273 Parameters 

274 ---------- 

275 filename 

276 Name of file to which the structure should be written. 

277 structure 

278 Input structure. 

279 groupings 

280 Groups into which the individual atoms should be divided in the form of 

281 a list of list of lists. Specifically, the outer list corresponds to 

282 the grouping methods, of which there can be three at the most, which 

283 contains a list of groups in the form of lists of site indices. The 

284 sum of the lengths of the latter must be the same as the total number 

285 of atoms. 

286 

287 Raises 

288 ------ 

289 ValueError 

290 Raised if parameters are incompatible. 

291 """ 

292 # Make a local copy of the atoms object 

293 _structure = structure.copy() 

294 

295 # Check velocties parameter 

296 velocities = _structure.get_velocities() 

297 if velocities is None or np.max(np.abs(velocities)) < 1e-6: 

298 has_velocity = 0 

299 else: 

300 has_velocity = 1 

301 

302 # Check groupings parameter 

303 if groupings is None: 

304 number_of_grouping_methods = 0 

305 else: 

306 number_of_grouping_methods = len(groupings) 

307 if number_of_grouping_methods > 3: 

308 raise ValueError('There can be no more than 3 grouping methods!') 

309 for g, grouping in enumerate(groupings): 

310 all_indices = [i for group in grouping for i in group] 

311 if len(all_indices) != len(_structure) or set(all_indices) != set( 

312 range(len(_structure)) 

313 ): 

314 raise ValueError( 

315 f'The indices listed in grouping method {g} are' 

316 ' not compatible with the input structure!' 

317 ) 

318 

319 # Allowed keyword=value pairs. Use ASEs extyz write functionality. 

320 # pbc="pbc_a pbc_b pbc_c" 

321 # lattice="ax ay az bx by bz cx cy cz" 

322 # properties=property_name:data_type:number_of_columns 

323 # species:S:1 

324 # pos:R:3 

325 # mass:R:1 

326 # vel:R:3 

327 # group:I:number_of_grouping_methods 

328 if _structure.has('mass'): 

329 # If structure already has masses set, use those 

330 warn('Structure already has array "mass"; will use existing values.') 

331 else: 

332 _structure.new_array('mass', _structure.get_masses()) 

333 

334 if has_velocity: 

335 ase_to_gpumd_velocity = fs 

336 _structure.new_array('vel', _structure.get_velocities() * ase_to_gpumd_velocity) 

337 if groupings is not None: 

338 group_indices = np.array( 

339 [ 

340 [ 

341 [ 

342 group_index 

343 for group_index, group in enumerate(grouping) 

344 if structure_idx in group 

345 ] 

346 for grouping in groupings 

347 ] 

348 for structure_idx in range(len(_structure)) 

349 ] 

350 ).squeeze() # pythoniccc 

351 _structure.new_array('group', group_indices) 

352 

353 write(filename=filename, images=_structure, write_info=True, format='extxyz') 

354 

355 

356def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame: 

357 """Parses a Monte Carlo output file in ``mcmd.out`` format 

358 and returns the content in the form of a DataFrame. 

359 

360 Parameters 

361 ---------- 

362 filename 

363 Path to file to be parsed. 

364 accumulate 

365 If ``True`` the MD steps between subsequent Monte Carlo 

366 runs in the same output file will be accumulated. 

367 

368 Returns 

369 ------- 

370 DataFrame containing acceptance ratios and concentrations (if available), 

371 as well as key Monte Carlo parameters. 

372 """ 

373 with open(filename, 'r') as f: 

374 lines = f.readlines() 

375 data = [] 

376 offset = 0 

377 step = 0 

378 accummulated_step = 0 

379 for line in lines: 

380 if line.startswith('# mc'): 

381 flds = line.split() 

382 mc_type = flds[2] 

383 md_steps = int(flds[3]) 

384 mc_trials = int(flds[4]) 

385 temperature_initial = float(flds[5]) 

386 temperature_final = float(flds[6]) 

387 if mc_type.endswith('sgc'): 

388 ntypes = int(flds[7]) 

389 species = [flds[8+2*k] for k in range(ntypes)] 

390 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)} 

391 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan 

392 elif line.startswith('# num_MD_steps'): 

393 continue 

394 else: 

395 flds = line.split() 

396 previous_step = step 

397 step = int(flds[0]) 

398 if step <= previous_step and accumulate: 

399 offset += previous_step 

400 accummulated_step = step + offset 

401 record = dict( 

402 step=accummulated_step, 

403 mc_type=mc_type, 

404 md_steps=md_steps, 

405 mc_trials=mc_trials, 

406 temperature_initial=temperature_initial, 

407 temperature_final=temperature_final, 

408 acceptance_ratio=float(flds[1]), 

409 ) 

410 if mc_type.endswith('sgc'): 

411 record.update(phis) 

412 if mc_type == 'vcsgc': 

413 record['kappa'] = kappa 

414 concentrations = {f'conc_{s}': float(flds[k]) 

415 for k, s in enumerate(species, start=2)} 

416 record.update(concentrations) 

417 data.append(record) 

418 df = DataFrame.from_dict(data) 

419 return df 

420 

421 

422def read_thermodynamic_data( 

423 directory_name: str, 

424 normalize: bool = False, 

425) -> DataFrame: 

426 """Parses the data in a GPUMD output directory 

427 and returns the content in the form of a :class:`DataFrame`. 

428 This function reads the ``thermo.out``, ``run.in``, and ``model.xyz`` 

429 (optionally) files, and returns the thermodynamic data including the 

430 time (in ps), the pressure (in GPa), the side lengths of the simulation 

431 cell (in Å), and the volume (in Å:sup:`3` or Å:sup:`3`/atom). 

432 

433 Parameters 

434 ---------- 

435 directory_name 

436 Path to directory to be parsed. 

437 normalize 

438 Normalize thermodynamic quantities per atom. 

439 This requires the ``model.xyz`` file to be present. 

440 

441 Returns 

442 ------- 

443 :class:`DataFrame` containing (augmented) thermodynamic data. 

444 """ 

445 

446 try: 

447 params = read_runfile(f'{directory_name}/run.in') 

448 except FileNotFoundError: 

449 raise FileNotFoundError(f'No `run.in` file found in {directory_name}') 

450 

451 if normalize: 

452 try: 

453 structure = read(f'{directory_name}/model.xyz') 

454 except FileNotFoundError: 

455 raise FileNotFoundError(f'No `model.xyz` file found in {directory_name}') 

456 natoms = len(structure) 

457 else: 

458 natoms = 1 

459 

460 blocks = [] 

461 time_step = 1.0 # GPUMD default 

462 dump_thermo = None 

463 for p, v in params: 

464 if p == 'time_step': 

465 time_step = v 

466 elif p == 'dump_thermo': 

467 dump_thermo = v 

468 elif p == 'run': 

469 if dump_thermo is None: 

470 continue 

471 # We do not require dump_thermo to exist for subsequent 

472 # runs if it has been used for atleast one before. 

473 # But if there has been no new dump_thermo, we 

474 # should not create a block. 

475 if (dump_thermo != 'DEFINEDONCE'): 

476 blocks.append(dict( 

477 nsteps=v, 

478 time_step=time_step, 

479 dump_thermo=dump_thermo, 

480 )) 

481 dump_thermo = 'DEFINEDONCE' 

482 

483 try: 

484 df = read_thermo(f'{directory_name}/thermo.out', natoms=natoms) 

485 except FileNotFoundError: 

486 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}') 

487 

488 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0)) 

489 for b in blocks if b['dump_thermo'] is not None]) 

490 if len(df) != expected_rows: 

491 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match' 

492 f' expectation based on `run.in` file ({expected_rows}).') 

493 if len(df) > expected_rows: 

494 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to' 

495 f' expectation based on `run.in` file ({expected_rows}).') 

496 if len(df) == 0: 

497 # Could be the case when a run has just started and thermo.out has been created 

498 # but not populated yet 

499 warn('`thermo.out` is empty') 

500 return df 

501 

502 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet. 

503 times = [] 

504 offset = 0.0 

505 for b in blocks: 

506 ns = int(round(b['nsteps'] / b['dump_thermo'], 0)) 

507 block_times = np.array(range(1, 1 + ns)) \ 

508 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps 

509 block_times += offset 

510 times.extend(block_times) 

511 offset = times[-1] 

512 df['time'] = times[:len(df)] 

513 

514 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3 

515 if 'cell_xy' in df: 

516 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2) 

517 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2) 

518 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2) 

519 volume = (df.cell_xx * df.cell_yy * df.cell_zz + 

520 df.cell_xy * df.cell_yz * df.cell_xz + 

521 df.cell_xz * df.cell_yx * df.cell_zy - 

522 df.cell_xx * df.cell_yz * df.cell_zy - 

523 df.cell_xy * df.cell_yx * df.cell_zz - 

524 df.cell_xz * df.cell_yy * df.cell_zx) 

525 else: 

526 df['alat'] = df.cell_xx 

527 df['blat'] = df.cell_yy 

528 df['clat'] = df.cell_zz 

529 volume = (df.cell_xx * df.cell_yy * df.cell_zz) 

530 df['volume'] = volume 

531 if normalize: 

532 df.volume /= natoms 

533 

534 return df