Coverage for calorine/gpumd/io.py: 100%

215 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-12-10 08:26 +0000

1from warnings import warn 

2from collections.abc import Iterable 

3from pathlib import Path 

4from typing import List, Tuple, Union 

5 

6import numpy as np 

7from ase import Atoms 

8from ase.io import read, write 

9from pandas import DataFrame 

10 

11 

12def read_kappa(filename: str) -> DataFrame: 

13 """Parses a file in ``kappa.out`` format from GPUMD and returns the 

14 content as a data frame. More information concerning file format, 

15 content and units can be found `here 

16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__. 

17 

18 Parameters 

19 ---------- 

20 filename 

21 Input file name. 

22 """ 

23 data = np.loadtxt(filename) 

24 if isinstance(data[0], np.float64): 

25 # If only a single row in kappa.out, append a dimension 

26 data = data.reshape(1, -1) 

27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split() 

28 if len(data[0]) != len(tags): 

29 raise ValueError( 

30 f'Input file contains {len(data[0])} data columns.' 

31 f' Expected {len(tags)} columns.' 

32 ) 

33 df = DataFrame(data=data, columns=tags) 

34 df['kx_tot'] = df.kx_in + df.kx_out 

35 df['ky_tot'] = df.ky_in + df.ky_out 

36 return df 

37 

38 

39def read_hac(filename: str, 

40 exclude_currents: bool = True, 

41 exclude_in_out: bool = True) -> DataFrame: 

42 """Parses a file in ``hac.out`` format from GPUMD and returns the 

43 content as a data frame. More information concerning file format, 

44 content and units can be found `here 

45 <https://gpumd.org/gpumd/output_files/hac_out.html>`__. 

46 

47 Parameters 

48 ---------- 

49 filename 

50 Input file name. 

51 exclude_currents 

52 Do not include currents in output to save memory. 

53 exclude_in_out 

54 Do not include `in` and `out` parts of conductivity in output to save memory. 

55 """ 

56 data = np.loadtxt(filename) 

57 if isinstance(data[0], np.float64): 

58 # If only a single row in hac.out, append a dimension 

59 data = data.reshape(1, -1) 

60 tags = 'time' 

61 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z' 

62 tags += ' kx_in kx_out ky_in ky_out kz_tot' 

63 tags = tags.split() 

64 if len(data[0]) != len(tags): 

65 raise ValueError( 

66 f'Input file contains {len(data[0])} data columns.' 

67 f' Expected {len(tags)} columns.' 

68 ) 

69 df = DataFrame(data=data, columns=tags) 

70 df['kx_tot'] = df.kx_in + df.kx_out 

71 df['ky_tot'] = df.ky_in + df.ky_out 

72 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x 

73 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y 

74 df['jjz_tot'] = df.jtot_jtot_z 

75 del df['jtot_jtot_z'] 

76 if exclude_in_out: 

77 # remove columns with in/out data to save space 

78 for col in df: 

79 if 'in' in col or 'out' in col: 

80 del df[col] 

81 if exclude_currents: 

82 # remove columns with currents to save space 

83 for col in df: 

84 if col.startswith('j'): 

85 del df[col] 

86 return df 

87 

88 

89def read_thermo(filename: str, natoms: int = 1) -> DataFrame: 

90 """Parses a file in ``thermo.out`` format from GPUMD and returns the 

91 content as a data frame. More information concerning file format, 

92 content and units can be found `here 

93 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__. 

94 

95 Parameters 

96 ---------- 

97 filename 

98 Input file name. 

99 natoms 

100 Number of atoms; used to normalize energies. 

101 """ 

102 data = np.loadtxt(filename) 

103 if isinstance(data[0], np.float64): 

104 # If only a single row in loss.out, append a dimension 

105 data = data.reshape(1, -1) 

106 if len(data[0]) == 9: 

107 # orthorhombic box 

108 tags = 'temperature kinetic_energy potential_energy' 

109 tags += ' stress_xx stress_yy stress_zz' 

110 tags += ' cell_xx cell_yy cell_zz' 

111 elif len(data[0]) == 12: 

112 # orthorhombic box with stresses in Voigt notation (v3.3.1+) 

113 tags = 'temperature kinetic_energy potential_energy' 

114 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

115 tags += ' cell_xx cell_yy cell_zz' 

116 elif len(data[0]) == 15: 

117 # triclinic box 

118 tags = 'temperature kinetic_energy potential_energy' 

119 tags += ' stress_xx stress_yy stress_zz' 

120 tags += ( 

121 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

122 ) 

123 elif len(data[0]) == 18: 

124 # triclinic box with stresses in Voigt notation (v3.3.1+) 

125 tags = 'temperature kinetic_energy potential_energy' 

126 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

127 tags += ( 

128 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

129 ) 

130 else: 

131 raise ValueError( 

132 f'Input file contains {len(data[0])} data columns.' 

133 ' Expected 9, 12, 15 or 18 columns.' 

134 ) 

135 df = DataFrame(data=data, columns=tags.split()) 

136 assert natoms > 0, 'natoms must be positive' 

137 df.kinetic_energy /= natoms 

138 df.potential_energy /= natoms 

139 return df 

140 

141 

142def read_xyz(filename: str) -> Atoms: 

143 """ 

144 Reads the structure input file (``model.xyz``) for GPUMD and returns the 

145 structure. 

146 

147 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does 

148 not read velocities properly. 

149 

150 Parameters 

151 ---------- 

152 filename 

153 Name of file from which to read the structure. 

154 

155 Returns 

156 ------- 

157 Structure as ASE Atoms object with additional per-atom arrays 

158 representing atomic masses, velocities etc. 

159 """ 

160 structure = read(filename, format='extxyz') 

161 if structure.has('vel'): 

162 structure.set_velocities(structure.get_array('vel')) 

163 return structure 

164 

165 

166def read_runfile(filename: str) -> List[Tuple[str, list]]: 

167 """ 

168 Parses a GPUMD input file in ``run.in`` format and returns the 

169 content in the form a list of keyword-value pairs. 

170 

171 Parameters 

172 ---------- 

173 filename 

174 Input file name. 

175 

176 Returns 

177 ------- 

178 List of keyword-value pairs. 

179 """ 

180 data = [] 

181 with open(filename, 'r') as f: 

182 for k, line in enumerate(f.readlines()): 

183 flds = line.split() 

184 if len(flds) == 0: 

185 continue 

186 elif len(flds) == 1: 

187 raise ValueError(f'Line {k} contains only one field:\n{line}') 

188 keyword = flds[0] 

189 values = tuple(flds[1:]) 

190 if keyword in ['time_step', 'velocity']: 

191 values = float(values[0]) 

192 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']: 

193 values = int(values[0]) 

194 elif len(values) == 1: 

195 values = values[0] 

196 data.append((keyword, values)) 

197 return data 

198 

199 

200def write_runfile( 

201 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]] 

202): 

203 """Write a file in run.in format to define input parameters for MD simulation. 

204 

205 Parameters 

206 ---------- 

207 file 

208 Path to file to be written. 

209 

210 parameters : dict 

211 Defines all key-value pairs used in run.in file 

212 (see GPUMD documentation for a complete list). 

213 Values can be either floats, integers, or lists/tuples. 

214 """ 

215 

216 with open(file, 'w') as f: 

217 # Write all keywords with parameter(s) 

218 for key, val in parameters: 

219 f.write(f'{key} ') 

220 if isinstance(val, Iterable) and not isinstance(val, str): 

221 for v in val: 

222 f.write(f'{v} ') 

223 else: 

224 f.write(f'{val}') 

225 f.write('\n') 

226 

227 

228def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None): 

229 """ 

230 Writes a structure into GPUMD input format (`model.xyz`). 

231 

232 Parameters 

233 ---------- 

234 filename 

235 Name of file to which the structure should be written. 

236 structure 

237 Input structure. 

238 groupings 

239 Groups into which the individual atoms should be divided in the form of 

240 a list of list of lists. Specifically, the outer list corresponds to 

241 the grouping methods, of which there can be three at the most, which 

242 contains a list of groups in the form of lists of site indices. The 

243 sum of the lengths of the latter must be the same as the total number 

244 of atoms. 

245 

246 Raises 

247 ------ 

248 ValueError 

249 Raised if parameters are incompatible. 

250 """ 

251 # Make a local copy of the atoms object 

252 _structure = structure.copy() 

253 

254 # Check velocties parameter 

255 velocities = _structure.get_velocities() 

256 if velocities is None or np.max(np.abs(velocities)) < 1e-6: 

257 has_velocity = 0 

258 else: 

259 has_velocity = 1 

260 

261 # Check groupings parameter 

262 if groupings is None: 

263 number_of_grouping_methods = 0 

264 else: 

265 number_of_grouping_methods = len(groupings) 

266 if number_of_grouping_methods > 3: 

267 raise ValueError('There can be no more than 3 grouping methods!') 

268 for g, grouping in enumerate(groupings): 

269 all_indices = [i for group in grouping for i in group] 

270 if len(all_indices) != len(_structure) or set(all_indices) != set( 

271 range(len(_structure)) 

272 ): 

273 raise ValueError( 

274 f'The indices listed in grouping method {g} are' 

275 ' not compatible with the input structure!' 

276 ) 

277 

278 # Allowed keyword=value pairs. Use ASEs extyz write functionality. 

279 # pbc="pbc_a pbc_b pbc_c" 

280 # lattice="ax ay az bx by bz cx cy cz" 

281 # properties=property_name:data_type:number_of_columns 

282 # species:S:1 

283 # pos:R:3 

284 # mass:R:1 

285 # vel:R:3 

286 # group:I:number_of_grouping_methods 

287 if _structure.has('mass'): 

288 # If structure already has masses set, use those 

289 warn('Structure already has array "mass"; will use existing values.') 

290 else: 

291 _structure.new_array('mass', _structure.get_masses()) 

292 

293 if has_velocity: 

294 _structure.new_array('vel', _structure.get_velocities()) 

295 if groupings is not None: 

296 group_indices = np.array( 

297 [ 

298 [ 

299 [ 

300 group_index 

301 for group_index, group in enumerate(grouping) 

302 if structure_idx in group 

303 ] 

304 for grouping in groupings 

305 ] 

306 for structure_idx in range(len(_structure)) 

307 ] 

308 ).squeeze() # pythoniccc 

309 _structure.new_array('group', group_indices) 

310 

311 write(filename=filename, images=_structure, write_info=True, format='extxyz') 

312 

313 

314def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame: 

315 """Parses a Monte Carlo output file in ``mcmd.out`` format 

316 and returns the content in the form of a DataFrame. 

317 

318 Parameters 

319 ---------- 

320 filename 

321 Path to file to be parsed. 

322 accumulate 

323 If ``True`` the MD steps between subsequent Monte Carlo 

324 runs in the same output file will be accumulated. 

325 

326 Returns 

327 ------- 

328 DataFrame containing acceptance ratios and concentrations (if available), 

329 as well as key Monte Carlo parameters. 

330 """ 

331 with open(filename, 'r') as f: 

332 lines = f.readlines() 

333 data = [] 

334 offset = 0 

335 step = 0 

336 accummulated_step = 0 

337 for line in lines: 

338 if line.startswith('# mc'): 

339 flds = line.split() 

340 mc_type = flds[2] 

341 md_steps = int(flds[3]) 

342 mc_trials = int(flds[4]) 

343 temperature_initial = float(flds[5]) 

344 temperature_final = float(flds[6]) 

345 if mc_type.endswith('sgc'): 

346 ntypes = int(flds[7]) 

347 species = [flds[8+2*k] for k in range(ntypes)] 

348 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)} 

349 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan 

350 elif line.startswith('# num_MD_steps'): 

351 continue 

352 else: 

353 flds = line.split() 

354 previous_step = step 

355 step = int(flds[0]) 

356 if step <= previous_step and accumulate: 

357 offset += previous_step 

358 accummulated_step = step + offset 

359 record = dict( 

360 step=accummulated_step, 

361 mc_type=mc_type, 

362 md_steps=md_steps, 

363 mc_trials=mc_trials, 

364 temperature_initial=temperature_initial, 

365 temperature_final=temperature_final, 

366 acceptance_ratio=float(flds[1]), 

367 ) 

368 if mc_type.endswith('sgc'): 

369 record.update(phis) 

370 if mc_type == 'vcsgc': 

371 record['kappa'] = kappa 

372 concentrations = {f'conc_{s}': float(flds[k]) 

373 for k, s in enumerate(species, start=2)} 

374 record.update(concentrations) 

375 data.append(record) 

376 df = DataFrame.from_dict(data) 

377 return df 

378 

379 

380def read_thermodynamic_data(directory_name: str) -> DataFrame: 

381 """Parses the data in a GPUMD output directory 

382 and returns the content in the form of a :class:`DataFrame`. 

383 This function reads the ``thermo.out`` and ``run.in`` files, 

384 and returns the thermodynamic data including the time (in ps), 

385 the pressure (in GPa), the side lengths of the simulation cell 

386 (in Å), and the volume (in Å:sup:`3`). 

387 

388 Parameters 

389 ---------- 

390 directory_name 

391 Path to directory to be parsed. 

392 

393 Returns 

394 ------- 

395 :class:`DataFrame` containing (augmented) thermodynamic data. 

396 """ 

397 

398 try: 

399 params = read_runfile(f'{directory_name}/run.in') 

400 except FileNotFoundError: 

401 raise FileNotFoundError(f'No `run.in` file found in {directory_name}') 

402 

403 blocks = [] 

404 time_step = 1.0 # GPUMD default 

405 dump_thermo = None 

406 for p, v in params: 

407 if p == 'time_step': 

408 time_step = v 

409 elif p == 'dump_thermo': 

410 dump_thermo = v 

411 elif p == 'run': 

412 if dump_thermo is None: 

413 raise ValueError( 

414 'Could not extract value of `dump_thermo` keyword from `run.in` file.') 

415 # We do not require dump_thermo to exist for subsequent 

416 # runs if it has been used for atleast one before. 

417 # But if there has been no new dump_thermo, we 

418 # should not create a block. 

419 if (dump_thermo != 'DEFINEDONCE'): 

420 blocks.append(dict( 

421 nsteps=v, 

422 time_step=time_step, 

423 dump_thermo=dump_thermo, 

424 )) 

425 dump_thermo = 'DEFINEDONCE' 

426 

427 try: 

428 df = read_thermo(f'{directory_name}/thermo.out') 

429 except FileNotFoundError: 

430 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}') 

431 

432 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0)) 

433 for b in blocks if b['dump_thermo'] is not None]) 

434 if len(df) != expected_rows: 

435 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match' 

436 f' expectation based on `run.in` file ({expected_rows}).') 

437 if len(df) > expected_rows: 

438 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to' 

439 f' expectation based on `run.in` file ({expected_rows}).') 

440 

441 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet. 

442 times = [] 

443 offset = 0.0 

444 for b in blocks: 

445 ns = int(round(b['nsteps'] / b['dump_thermo'], 0)) 

446 block_times = np.array(range(1, 1 + ns)) \ 

447 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps 

448 block_times += offset 

449 times.extend(block_times) 

450 offset = times[-1] 

451 df['time'] = times[:len(df)] 

452 

453 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3 

454 if 'cell_xy' in df: 

455 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2) 

456 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2) 

457 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2) 

458 volume = (df.cell_xx * df.cell_yy * df.cell_zz + 

459 df.cell_xy * df.cell_yz * df.cell_xz + 

460 df.cell_xz * df.cell_yx * df.cell_zy - 

461 df.cell_xx * df.cell_yz * df.cell_zy - 

462 df.cell_xy * df.cell_yx * df.cell_zz - 

463 df.cell_xz * df.cell_yy * df.cell_zx) 

464 else: 

465 df['alat'] = df.cell_xx 

466 df['blat'] = df.cell_yy 

467 df['clat'] = df.cell_zz 

468 volume = (df.cell_xx * df.cell_yy * df.cell_zz) 

469 df['volume'] = volume 

470 

471 return df