Coverage for calorine/gpumd/io.py: 99%

239 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-12-04 13:49 +0000

1from warnings import warn 

2from collections.abc import Iterable 

3from pathlib import Path 

4from typing import List, Tuple, Union 

5 

6import numpy as np 

7from ase import Atoms 

8from ase.io import read, write 

9from pandas import DataFrame 

10 

11 

12def read_kappa(filename: str) -> DataFrame: 

13 """Parses a file in ``kappa.out`` format from GPUMD and returns the 

14 content as a data frame. More information concerning file format, 

15 content and units can be found `here 

16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__. 

17 

18 Parameters 

19 ---------- 

20 filename 

21 Input file name. 

22 """ 

23 data = np.loadtxt(filename) 

24 if isinstance(data[0], np.float64): 

25 # If only a single row in kappa.out, append a dimension 

26 data = data.reshape(1, -1) 

27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split() 

28 if len(data[0]) != len(tags): 

29 raise ValueError( 

30 f'Input file contains {len(data[0])} data columns.' 

31 f' Expected {len(tags)} columns.' 

32 ) 

33 df = DataFrame(data=data, columns=tags) 

34 df['kx_tot'] = df.kx_in + df.kx_out 

35 df['ky_tot'] = df.ky_in + df.ky_out 

36 return df 

37 

38 

39def read_msd(filename: str) -> DataFrame: 

40 """Parses a file in ``msd.out`` format from GPUMD and returns the 

41 content as a data frame. More information concerning file format, 

42 content and units can be found `here 

43 <https://gpumd.org/gpumd/output_files/msd_out.html>`__. 

44 

45 Parameters 

46 ---------- 

47 filename 

48 Input file name. 

49 """ 

50 data = np.loadtxt(filename) 

51 if isinstance(data[0], np.float64): 

52 # If only a single row in msd.out, append a dimension 

53 data = data.reshape(1, -1) 

54 ncols = len(data[0]) 

55 ngroups = (ncols - 1) // 6 

56 if ngroups * 6 + 1 != ncols: 

57 raise ValueError( 

58 f'Input file contains {ncols} data columns.' 

59 f' Expected {1+ngroups*6} columns (1+6*ngroups).' 

60 ) 

61 tags = ['time'] 

62 flds = 'msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split() 

63 if ngroups == 1: 

64 tags.extend(flds) 

65 else: 

66 tags.extend([f'{f}_{n}' for n in range(ngroups) for f in flds]) 

67 df = DataFrame(data=data, columns=tags) 

68 return df 

69 

70 

71def read_hac(filename: str, 

72 exclude_currents: bool = True, 

73 exclude_in_out: bool = True) -> DataFrame: 

74 """Parses a file in ``hac.out`` format from GPUMD and returns the 

75 content as a data frame. More information concerning file format, 

76 content and units can be found `here 

77 <https://gpumd.org/gpumd/output_files/hac_out.html>`__. 

78 

79 Parameters 

80 ---------- 

81 filename 

82 Input file name. 

83 exclude_currents 

84 Do not include currents in output to save memory. 

85 exclude_in_out 

86 Do not include `in` and `out` parts of conductivity in output to save memory. 

87 """ 

88 data = np.loadtxt(filename) 

89 if isinstance(data[0], np.float64): 

90 # If only a single row in hac.out, append a dimension 

91 data = data.reshape(1, -1) 

92 tags = 'time' 

93 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z' 

94 tags += ' kx_in kx_out ky_in ky_out kz_tot' 

95 tags = tags.split() 

96 if len(data[0]) != len(tags): 

97 raise ValueError( 

98 f'Input file contains {len(data[0])} data columns.' 

99 f' Expected {len(tags)} columns.' 

100 ) 

101 df = DataFrame(data=data, columns=tags) 

102 df['kx_tot'] = df.kx_in + df.kx_out 

103 df['ky_tot'] = df.ky_in + df.ky_out 

104 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x 

105 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y 

106 df['jjz_tot'] = df.jtot_jtot_z 

107 del df['jtot_jtot_z'] 

108 if exclude_in_out: 

109 # remove columns with in/out data to save space 

110 for col in df: 

111 if 'in' in col or 'out' in col: 

112 del df[col] 

113 if exclude_currents: 

114 # remove columns with currents to save space 

115 for col in df: 

116 if col.startswith('j'): 

117 del df[col] 

118 return df 

119 

120 

121def read_thermo(filename: str, natoms: int = 1) -> DataFrame: 

122 """Parses a file in ``thermo.out`` format from GPUMD and returns the 

123 content as a data frame. More information concerning file format, 

124 content and units can be found `here 

125 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__. 

126 

127 Parameters 

128 ---------- 

129 filename 

130 Input file name. 

131 natoms 

132 Number of atoms; used to normalize energies. 

133 """ 

134 data = np.loadtxt(filename) 

135 if isinstance(data[0], np.float64): 

136 # If only a single row in loss.out, append a dimension 

137 data = data.reshape(1, -1) 

138 if len(data[0]) == 9: 

139 # orthorhombic box 

140 tags = 'temperature kinetic_energy potential_energy' 

141 tags += ' stress_xx stress_yy stress_zz' 

142 tags += ' cell_xx cell_yy cell_zz' 

143 elif len(data[0]) == 12: 

144 # orthorhombic box with stresses in Voigt notation (v3.3.1+) 

145 tags = 'temperature kinetic_energy potential_energy' 

146 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

147 tags += ' cell_xx cell_yy cell_zz' 

148 elif len(data[0]) == 15: 

149 # triclinic box 

150 tags = 'temperature kinetic_energy potential_energy' 

151 tags += ' stress_xx stress_yy stress_zz' 

152 tags += ( 

153 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

154 ) 

155 elif len(data[0]) == 18: 

156 # triclinic box with stresses in Voigt notation (v3.3.1+) 

157 tags = 'temperature kinetic_energy potential_energy' 

158 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

159 tags += ( 

160 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

161 ) 

162 else: 

163 raise ValueError( 

164 f'Input file contains {len(data[0])} data columns.' 

165 ' Expected 9, 12, 15 or 18 columns.' 

166 ) 

167 df = DataFrame(data=data, columns=tags.split()) 

168 assert natoms > 0, 'natoms must be positive' 

169 df.kinetic_energy /= natoms 

170 df.potential_energy /= natoms 

171 return df 

172 

173 

174def read_xyz(filename: str) -> Atoms: 

175 """ 

176 Reads the structure input file (``model.xyz``) for GPUMD and returns the 

177 structure. 

178 

179 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does 

180 not read velocities properly. 

181 

182 Parameters 

183 ---------- 

184 filename 

185 Name of file from which to read the structure. 

186 

187 Returns 

188 ------- 

189 Structure as ASE Atoms object with additional per-atom arrays 

190 representing atomic masses, velocities etc. 

191 """ 

192 structure = read(filename, format='extxyz') 

193 if structure.has('vel'): 

194 structure.set_velocities(structure.get_array('vel')) 

195 return structure 

196 

197 

198def read_runfile(filename: str) -> List[Tuple[str, list]]: 

199 """ 

200 Parses a GPUMD input file in ``run.in`` format and returns the 

201 content in the form a list of keyword-value pairs. 

202 

203 Parameters 

204 ---------- 

205 filename 

206 Input file name. 

207 

208 Returns 

209 ------- 

210 List of keyword-value pairs. 

211 """ 

212 data = [] 

213 with open(filename, 'r') as f: 

214 for k, line in enumerate(f.readlines()): 

215 flds = line.split() 

216 if len(flds) == 0: 

217 continue 

218 elif len(flds) == 1: 

219 raise ValueError(f'Line {k} contains only one field:\n{line}') 

220 keyword = flds[0] 

221 values = tuple(flds[1:]) 

222 if keyword in ['time_step', 'velocity']: 

223 values = float(values[0]) 

224 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']: 

225 values = int(values[0]) 

226 elif len(values) == 1: 

227 values = values[0] 

228 data.append((keyword, values)) 

229 return data 

230 

231 

232def write_runfile( 

233 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]] 

234): 

235 """Write a file in run.in format to define input parameters for MD simulation. 

236 

237 Parameters 

238 ---------- 

239 file 

240 Path to file to be written. 

241 

242 parameters 

243 Defines all command-parameter(s) pairs used in run.in file 

244 (see GPUMD documentation for a complete list). 

245 Values can be either floats, integers, or lists/tuples. 

246 """ 

247 

248 with open(file, 'w') as f: 

249 # Write all keywords with parameter(s) 

250 for key, val in parameters: 

251 f.write(f'{key} ') 

252 if isinstance(val, Iterable) and not isinstance(val, str): 

253 for v in val: 

254 f.write(f'{v} ') 

255 else: 

256 f.write(f'{val}') 

257 f.write('\n') 

258 

259 

260def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None): 

261 """ 

262 Writes a structure into GPUMD input format (`model.xyz`). 

263 

264 Parameters 

265 ---------- 

266 filename 

267 Name of file to which the structure should be written. 

268 structure 

269 Input structure. 

270 groupings 

271 Groups into which the individual atoms should be divided in the form of 

272 a list of list of lists. Specifically, the outer list corresponds to 

273 the grouping methods, of which there can be three at the most, which 

274 contains a list of groups in the form of lists of site indices. The 

275 sum of the lengths of the latter must be the same as the total number 

276 of atoms. 

277 

278 Raises 

279 ------ 

280 ValueError 

281 Raised if parameters are incompatible. 

282 """ 

283 # Make a local copy of the atoms object 

284 _structure = structure.copy() 

285 

286 # Check velocties parameter 

287 velocities = _structure.get_velocities() 

288 if velocities is None or np.max(np.abs(velocities)) < 1e-6: 

289 has_velocity = 0 

290 else: 

291 has_velocity = 1 

292 

293 # Check groupings parameter 

294 if groupings is None: 

295 number_of_grouping_methods = 0 

296 else: 

297 number_of_grouping_methods = len(groupings) 

298 if number_of_grouping_methods > 3: 

299 raise ValueError('There can be no more than 3 grouping methods!') 

300 for g, grouping in enumerate(groupings): 

301 all_indices = [i for group in grouping for i in group] 

302 if len(all_indices) != len(_structure) or set(all_indices) != set( 

303 range(len(_structure)) 

304 ): 

305 raise ValueError( 

306 f'The indices listed in grouping method {g} are' 

307 ' not compatible with the input structure!' 

308 ) 

309 

310 # Allowed keyword=value pairs. Use ASEs extyz write functionality. 

311 # pbc="pbc_a pbc_b pbc_c" 

312 # lattice="ax ay az bx by bz cx cy cz" 

313 # properties=property_name:data_type:number_of_columns 

314 # species:S:1 

315 # pos:R:3 

316 # mass:R:1 

317 # vel:R:3 

318 # group:I:number_of_grouping_methods 

319 if _structure.has('mass'): 

320 # If structure already has masses set, use those 

321 warn('Structure already has array "mass"; will use existing values.') 

322 else: 

323 _structure.new_array('mass', _structure.get_masses()) 

324 

325 if has_velocity: 

326 _structure.new_array('vel', _structure.get_velocities()) 

327 if groupings is not None: 

328 group_indices = np.array( 

329 [ 

330 [ 

331 [ 

332 group_index 

333 for group_index, group in enumerate(grouping) 

334 if structure_idx in group 

335 ] 

336 for grouping in groupings 

337 ] 

338 for structure_idx in range(len(_structure)) 

339 ] 

340 ).squeeze() # pythoniccc 

341 _structure.new_array('group', group_indices) 

342 

343 write(filename=filename, images=_structure, write_info=True, format='extxyz') 

344 

345 

346def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame: 

347 """Parses a Monte Carlo output file in ``mcmd.out`` format 

348 and returns the content in the form of a DataFrame. 

349 

350 Parameters 

351 ---------- 

352 filename 

353 Path to file to be parsed. 

354 accumulate 

355 If ``True`` the MD steps between subsequent Monte Carlo 

356 runs in the same output file will be accumulated. 

357 

358 Returns 

359 ------- 

360 DataFrame containing acceptance ratios and concentrations (if available), 

361 as well as key Monte Carlo parameters. 

362 """ 

363 with open(filename, 'r') as f: 

364 lines = f.readlines() 

365 data = [] 

366 offset = 0 

367 step = 0 

368 accummulated_step = 0 

369 for line in lines: 

370 if line.startswith('# mc'): 

371 flds = line.split() 

372 mc_type = flds[2] 

373 md_steps = int(flds[3]) 

374 mc_trials = int(flds[4]) 

375 temperature_initial = float(flds[5]) 

376 temperature_final = float(flds[6]) 

377 if mc_type.endswith('sgc'): 

378 ntypes = int(flds[7]) 

379 species = [flds[8+2*k] for k in range(ntypes)] 

380 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)} 

381 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan 

382 elif line.startswith('# num_MD_steps'): 

383 continue 

384 else: 

385 flds = line.split() 

386 previous_step = step 

387 step = int(flds[0]) 

388 if step <= previous_step and accumulate: 

389 offset += previous_step 

390 accummulated_step = step + offset 

391 record = dict( 

392 step=accummulated_step, 

393 mc_type=mc_type, 

394 md_steps=md_steps, 

395 mc_trials=mc_trials, 

396 temperature_initial=temperature_initial, 

397 temperature_final=temperature_final, 

398 acceptance_ratio=float(flds[1]), 

399 ) 

400 if mc_type.endswith('sgc'): 

401 record.update(phis) 

402 if mc_type == 'vcsgc': 

403 record['kappa'] = kappa 

404 concentrations = {f'conc_{s}': float(flds[k]) 

405 for k, s in enumerate(species, start=2)} 

406 record.update(concentrations) 

407 data.append(record) 

408 df = DataFrame.from_dict(data) 

409 return df 

410 

411 

412def read_thermodynamic_data( 

413 directory_name: str, 

414 normalize: bool = False, 

415) -> DataFrame: 

416 """Parses the data in a GPUMD output directory 

417 and returns the content in the form of a :class:`DataFrame`. 

418 This function reads the ``thermo.out``, ``run.in``, and ``model.xyz`` 

419 (optionally) files, and returns the thermodynamic data including the 

420 time (in ps), the pressure (in GPa), the side lengths of the simulation 

421 cell (in Å), and the volume (in Å:sup:`3` or Å:sup:`3`/atom). 

422 

423 Parameters 

424 ---------- 

425 directory_name 

426 Path to directory to be parsed. 

427 normalize 

428 Normalize thermodynamic quantities per atom. 

429 This requires the ``model.xyz`` file to be present. 

430 

431 Returns 

432 ------- 

433 :class:`DataFrame` containing (augmented) thermodynamic data. 

434 """ 

435 

436 try: 

437 params = read_runfile(f'{directory_name}/run.in') 

438 except FileNotFoundError: 

439 raise FileNotFoundError(f'No `run.in` file found in {directory_name}') 

440 

441 if normalize: 

442 try: 

443 structure = read(f'{directory_name}/model.xyz') 

444 except FileNotFoundError: 

445 raise FileNotFoundError(f'No `model.xyz` file found in {directory_name}') 

446 natoms = len(structure) 

447 else: 

448 natoms = 1 

449 

450 blocks = [] 

451 time_step = 1.0 # GPUMD default 

452 dump_thermo = None 

453 for p, v in params: 

454 if p == 'time_step': 

455 time_step = v 

456 elif p == 'dump_thermo': 

457 dump_thermo = v 

458 elif p == 'run': 

459 if dump_thermo is None: 459 ↛ 460line 459 didn't jump to line 460 because the condition on line 459 was never true

460 continue 

461 # We do not require dump_thermo to exist for subsequent 

462 # runs if it has been used for atleast one before. 

463 # But if there has been no new dump_thermo, we 

464 # should not create a block. 

465 if (dump_thermo != 'DEFINEDONCE'): 

466 blocks.append(dict( 

467 nsteps=v, 

468 time_step=time_step, 

469 dump_thermo=dump_thermo, 

470 )) 

471 dump_thermo = 'DEFINEDONCE' 

472 

473 try: 

474 df = read_thermo(f'{directory_name}/thermo.out', natoms=natoms) 

475 except FileNotFoundError: 

476 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}') 

477 

478 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0)) 

479 for b in blocks if b['dump_thermo'] is not None]) 

480 if len(df) != expected_rows: 

481 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match' 

482 f' expectation based on `run.in` file ({expected_rows}).') 

483 if len(df) > expected_rows: 

484 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to' 

485 f' expectation based on `run.in` file ({expected_rows}).') 

486 

487 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet. 

488 times = [] 

489 offset = 0.0 

490 for b in blocks: 

491 ns = int(round(b['nsteps'] / b['dump_thermo'], 0)) 

492 block_times = np.array(range(1, 1 + ns)) \ 

493 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps 

494 block_times += offset 

495 times.extend(block_times) 

496 offset = times[-1] 

497 df['time'] = times[:len(df)] 

498 

499 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3 

500 if 'cell_xy' in df: 

501 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2) 

502 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2) 

503 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2) 

504 volume = (df.cell_xx * df.cell_yy * df.cell_zz + 

505 df.cell_xy * df.cell_yz * df.cell_xz + 

506 df.cell_xz * df.cell_yx * df.cell_zy - 

507 df.cell_xx * df.cell_yz * df.cell_zy - 

508 df.cell_xy * df.cell_yx * df.cell_zz - 

509 df.cell_xz * df.cell_yy * df.cell_zx) 

510 else: 

511 df['alat'] = df.cell_xx 

512 df['blat'] = df.cell_yy 

513 df['clat'] = df.cell_zz 

514 volume = (df.cell_xx * df.cell_yy * df.cell_zz) 

515 df['volume'] = volume 

516 if normalize: 

517 df.volume /= natoms 

518 

519 return df