Coverage for calorine/gpumd/io.py: 100%

1from warnings import warn

2from collections.abc import Iterable

3from pathlib import Path

4from typing import List, Tuple, Union

6import numpy as np

7from ase import Atoms

8from ase.io import read, write

9from pandas import DataFrame

12def read_kappa(filename: str) -> DataFrame:

13 """Parses a file in ``kappa.out`` format from GPUMD and returns the

14 content as a data frame. More information concerning file format,

15 content and units can be found `here

16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__.

18 Parameters

19 ----------

20 filename

21 Input file name.

22 """

23 data = np.loadtxt(filename)

24 if isinstance(data[0], np.float64):

25 # If only a single row in kappa.out, append a dimension

26 data = data.reshape(1, -1)

27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split()

28 if len(data[0]) != len(tags):

29 raise ValueError(

30 f'Input file contains {len(data[0])} data columns.'

31 f' Expected {len(tags)} columns.'

32 )

33 df = DataFrame(data=data, columns=tags)

34 df['kx_tot'] = df.kx_in + df.kx_out

35 df['ky_tot'] = df.ky_in + df.ky_out

36 return df

39def read_msd(filename: str) -> DataFrame:

40 """Parses a file in ``msd.out`` format from GPUMD and returns the

41 content as a data frame. More information concerning file format,

42 content and units can be found `here

43 <https://gpumd.org/gpumd/output_files/msd_out.html>`__.

45 Parameters

46 ----------

47 filename

48 Input file name.

49 """

50 data = np.loadtxt(filename)

51 if isinstance(data[0], np.float64):

52 # If only a single row in msd.out, append a dimension

53 data = data.reshape(1, -1)

54 tags = 'time msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split()

55 if len(data[0]) != len(tags):

56 raise ValueError(

57 f'Input file contains {len(data[0])} data columns.'

58 f' Expected {len(tags)} columns.'

59 )

60 df = DataFrame(data=data, columns=tags)

61 return df

64def read_hac(filename: str,

65 exclude_currents: bool = True,

66 exclude_in_out: bool = True) -> DataFrame:

67 """Parses a file in ``hac.out`` format from GPUMD and returns the

68 content as a data frame. More information concerning file format,

69 content and units can be found `here

70 <https://gpumd.org/gpumd/output_files/hac_out.html>`__.

72 Parameters

73 ----------

74 filename

75 Input file name.

76 exclude_currents

77 Do not include currents in output to save memory.

78 exclude_in_out

79 Do not include `in` and `out` parts of conductivity in output to save memory.

80 """

81 data = np.loadtxt(filename)

82 if isinstance(data[0], np.float64):

83 # If only a single row in hac.out, append a dimension

84 data = data.reshape(1, -1)

85 tags = 'time'

86 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z'

87 tags += ' kx_in kx_out ky_in ky_out kz_tot'

88 tags = tags.split()

89 if len(data[0]) != len(tags):

90 raise ValueError(

91 f'Input file contains {len(data[0])} data columns.'

92 f' Expected {len(tags)} columns.'

93 )

94 df = DataFrame(data=data, columns=tags)

95 df['kx_tot'] = df.kx_in + df.kx_out

96 df['ky_tot'] = df.ky_in + df.ky_out

97 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x

98 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y

99 df['jjz_tot'] = df.jtot_jtot_z

100 del df['jtot_jtot_z']

101 if exclude_in_out:

102 # remove columns with in/out data to save space

103 for col in df:

104 if 'in' in col or 'out' in col:

105 del df[col]

106 if exclude_currents:

107 # remove columns with currents to save space

108 for col in df:

109 if col.startswith('j'):

110 del df[col]

111 return df

112

113

114def read_thermo(filename: str, natoms: int = 1) -> DataFrame:

115 """Parses a file in ``thermo.out`` format from GPUMD and returns the

116 content as a data frame. More information concerning file format,

117 content and units can be found `here

118 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__.

119

120 Parameters

121 ----------

122 filename

123 Input file name.

124 natoms

125 Number of atoms; used to normalize energies.

126 """

127 data = np.loadtxt(filename)

128 if isinstance(data[0], np.float64):

129 # If only a single row in loss.out, append a dimension

130 data = data.reshape(1, -1)

131 if len(data[0]) == 9:

132 # orthorhombic box

133 tags = 'temperature kinetic_energy potential_energy'

134 tags += ' stress_xx stress_yy stress_zz'

135 tags += ' cell_xx cell_yy cell_zz'

136 elif len(data[0]) == 12:

137 # orthorhombic box with stresses in Voigt notation (v3.3.1+)

138 tags = 'temperature kinetic_energy potential_energy'

139 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'

140 tags += ' cell_xx cell_yy cell_zz'

141 elif len(data[0]) == 15:

142 # triclinic box

143 tags = 'temperature kinetic_energy potential_energy'

144 tags += ' stress_xx stress_yy stress_zz'

145 tags += (

146 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'

147 )

148 elif len(data[0]) == 18:

149 # triclinic box with stresses in Voigt notation (v3.3.1+)

150 tags = 'temperature kinetic_energy potential_energy'

151 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'

152 tags += (

153 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'

154 )

155 else:

156 raise ValueError(

157 f'Input file contains {len(data[0])} data columns.'

158 ' Expected 9, 12, 15 or 18 columns.'

159 )

160 df = DataFrame(data=data, columns=tags.split())

161 assert natoms > 0, 'natoms must be positive'

162 df.kinetic_energy /= natoms

163 df.potential_energy /= natoms

164 return df

165

166

167def read_xyz(filename: str) -> Atoms:

168 """

169 Reads the structure input file (``model.xyz``) for GPUMD and returns the

170 structure.

171

172 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does

173 not read velocities properly.

174

175 Parameters

176 ----------

177 filename

178 Name of file from which to read the structure.

179

180 Returns

181 -------

182 Structure as ASE Atoms object with additional per-atom arrays

183 representing atomic masses, velocities etc.

184 """

185 structure = read(filename, format='extxyz')

186 if structure.has('vel'):

187 structure.set_velocities(structure.get_array('vel'))

188 return structure

189

190

191def read_runfile(filename: str) -> List[Tuple[str, list]]:

192 """

193 Parses a GPUMD input file in ``run.in`` format and returns the

194 content in the form a list of keyword-value pairs.

195

196 Parameters

197 ----------

198 filename

199 Input file name.

200

201 Returns

202 -------

203 List of keyword-value pairs.

204 """

205 data = []

206 with open(filename, 'r') as f:

207 for k, line in enumerate(f.readlines()):

208 flds = line.split()

209 if len(flds) == 0:

210 continue

211 elif len(flds) == 1:

212 raise ValueError(f'Line {k} contains only one field:\n{line}')

213 keyword = flds[0]

214 values = tuple(flds[1:])

215 if keyword in ['time_step', 'velocity']:

216 values = float(values[0])

217 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']:

218 values = int(values[0])

219 elif len(values) == 1:

220 values = values[0]

221 data.append((keyword, values))

222 return data

223

224

225def write_runfile(

226 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]]

227):

228 """Write a file in run.in format to define input parameters for MD simulation.

229

230 Parameters

231 ----------

232 file

233 Path to file to be written.

234

235 parameters

236 Defines all command-parameter(s) pairs used in run.in file

237 (see GPUMD documentation for a complete list).

238 Values can be either floats, integers, or lists/tuples.

239 """

240

241 with open(file, 'w') as f:

242 # Write all keywords with parameter(s)

243 for key, val in parameters:

244 f.write(f'{key} ')

245 if isinstance(val, Iterable) and not isinstance(val, str):

246 for v in val:

247 f.write(f'{v} ')

248 else:

249 f.write(f'{val}')

250 f.write('\n')

251

252

253def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None):

254 """

255 Writes a structure into GPUMD input format (`model.xyz`).

256

257 Parameters

258 ----------

259 filename

260 Name of file to which the structure should be written.

261 structure

262 Input structure.

263 groupings

264 Groups into which the individual atoms should be divided in the form of

265 a list of list of lists. Specifically, the outer list corresponds to

266 the grouping methods, of which there can be three at the most, which

267 contains a list of groups in the form of lists of site indices. The

268 sum of the lengths of the latter must be the same as the total number

269 of atoms.

270

271 Raises

272 ------

273 ValueError

274 Raised if parameters are incompatible.

275 """

276 # Make a local copy of the atoms object

277 _structure = structure.copy()

278

279 # Check velocties parameter

280 velocities = _structure.get_velocities()

281 if velocities is None or np.max(np.abs(velocities)) < 1e-6:

282 has_velocity = 0

283 else:

284 has_velocity = 1

285

286 # Check groupings parameter

287 if groupings is None:

288 number_of_grouping_methods = 0

289 else:

290 number_of_grouping_methods = len(groupings)

291 if number_of_grouping_methods > 3:

292 raise ValueError('There can be no more than 3 grouping methods!')

293 for g, grouping in enumerate(groupings):

294 all_indices = [i for group in grouping for i in group]

295 if len(all_indices) != len(_structure) or set(all_indices) != set(

296 range(len(_structure))

297 ):

298 raise ValueError(

299 f'The indices listed in grouping method {g} are'

300 ' not compatible with the input structure!'

301 )

302

303 # Allowed keyword=value pairs. Use ASEs extyz write functionality.

304 # pbc="pbc_a pbc_b pbc_c"

305 # lattice="ax ay az bx by bz cx cy cz"

306 # properties=property_name:data_type:number_of_columns

307 # species:S:1

308 # pos:R:3

309 # mass:R:1

310 # vel:R:3

311 # group:I:number_of_grouping_methods

312 if _structure.has('mass'):

313 # If structure already has masses set, use those

314 warn('Structure already has array "mass"; will use existing values.')

315 else:

316 _structure.new_array('mass', _structure.get_masses())

317

318 if has_velocity:

319 _structure.new_array('vel', _structure.get_velocities())

320 if groupings is not None:

321 group_indices = np.array(

322 [

323 [

324 [

325 group_index

326 for group_index, group in enumerate(grouping)

327 if structure_idx in group

328 ]

329 for grouping in groupings

330 ]

331 for structure_idx in range(len(_structure))

332 ]

333 ).squeeze() # pythoniccc

334 _structure.new_array('group', group_indices)

335

336 write(filename=filename, images=_structure, write_info=True, format='extxyz')

337

338

339def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame:

340 """Parses a Monte Carlo output file in ``mcmd.out`` format

341 and returns the content in the form of a DataFrame.

342

343 Parameters

344 ----------

345 filename

346 Path to file to be parsed.

347 accumulate

348 If ``True`` the MD steps between subsequent Monte Carlo

349 runs in the same output file will be accumulated.

350

351 Returns

352 -------

353 DataFrame containing acceptance ratios and concentrations (if available),

354 as well as key Monte Carlo parameters.

355 """

356 with open(filename, 'r') as f:

357 lines = f.readlines()

358 data = []

359 offset = 0

360 step = 0

361 accummulated_step = 0

362 for line in lines:

363 if line.startswith('# mc'):

364 flds = line.split()

365 mc_type = flds[2]

366 md_steps = int(flds[3])

367 mc_trials = int(flds[4])

368 temperature_initial = float(flds[5])

369 temperature_final = float(flds[6])

370 if mc_type.endswith('sgc'):

371 ntypes = int(flds[7])

372 species = [flds[8+2*k] for k in range(ntypes)]

373 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)}

374 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan

375 elif line.startswith('# num_MD_steps'):

376 continue

377 else:

378 flds = line.split()

379 previous_step = step

380 step = int(flds[0])

381 if step <= previous_step and accumulate:

382 offset += previous_step

383 accummulated_step = step + offset

384 record = dict(

385 step=accummulated_step,

386 mc_type=mc_type,

387 md_steps=md_steps,

388 mc_trials=mc_trials,

389 temperature_initial=temperature_initial,

390 temperature_final=temperature_final,

391 acceptance_ratio=float(flds[1]),

392 )

393 if mc_type.endswith('sgc'):

394 record.update(phis)

395 if mc_type == 'vcsgc':

396 record['kappa'] = kappa

397 concentrations = {f'conc_{s}': float(flds[k])

398 for k, s in enumerate(species, start=2)}

399 record.update(concentrations)

400 data.append(record)

401 df = DataFrame.from_dict(data)

402 return df

403

404

405def read_thermodynamic_data(

406 directory_name: str,

407 normalize: bool = False,

408) -> DataFrame:

409 """Parses the data in a GPUMD output directory

410 and returns the content in the form of a :class:`DataFrame`.

411 This function reads the ``thermo.out``, ``run.in``, and ``model.xyz``

412 (optionally) files, and returns the thermodynamic data including the

413 time (in ps), the pressure (in GPa), the side lengths of the simulation

414 cell (in Å), and the volume (in Å:sup:`3` or Å:sup:`3`/atom).

415

416 Parameters

417 ----------

418 directory_name

419 Path to directory to be parsed.

420 normalize

421 Normalize thermodynamic quantities per atom.

422 This requires the ``model.xyz`` file to be present.

423

424 Returns

425 -------

426 :class:`DataFrame` containing (augmented) thermodynamic data.

427 """

428

429 try:

430 params = read_runfile(f'{directory_name}/run.in')

431 except FileNotFoundError:

432 raise FileNotFoundError(f'No `run.in` file found in {directory_name}')

433

434 if normalize:

435 try:

436 structure = read(f'{directory_name}/model.xyz')

437 except FileNotFoundError:

438 raise FileNotFoundError(f'No `model.xyz` file found in {directory_name}')

439 natoms = len(structure)

440 else:

441 natoms = 1

442

443 blocks = []

444 time_step = 1.0 # GPUMD default

445 dump_thermo = None

446 for p, v in params:

447 if p == 'time_step':

448 time_step = v

449 elif p == 'dump_thermo':

450 dump_thermo = v

451 elif p == 'run':

452 if dump_thermo is None:

453 raise ValueError(

454 'Could not extract value of `dump_thermo` keyword from `run.in` file.')

455 # We do not require dump_thermo to exist for subsequent

456 # runs if it has been used for atleast one before.

457 # But if there has been no new dump_thermo, we

458 # should not create a block.

459 if (dump_thermo != 'DEFINEDONCE'):

460 blocks.append(dict(

461 nsteps=v,

462 time_step=time_step,

463 dump_thermo=dump_thermo,

464 ))

465 dump_thermo = 'DEFINEDONCE'

466

467 try:

468 df = read_thermo(f'{directory_name}/thermo.out', natoms=natoms)

469 except FileNotFoundError:

470 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}')

471

472 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0))

473 for b in blocks if b['dump_thermo'] is not None])

474 if len(df) != expected_rows:

475 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match'

476 f' expectation based on `run.in` file ({expected_rows}).')

477 if len(df) > expected_rows:

478 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to'

479 f' expectation based on `run.in` file ({expected_rows}).')

480

481 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet.

482 times = []

483 offset = 0.0

484 for b in blocks:

485 ns = int(round(b['nsteps'] / b['dump_thermo'], 0))

486 block_times = np.array(range(1, 1 + ns)) \

487 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps

488 block_times += offset

489 times.extend(block_times)

490 offset = times[-1]

491 df['time'] = times[:len(df)]

492

493 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3

494 if 'cell_xy' in df:

495 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2)

496 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2)

497 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2)

498 volume = (df.cell_xx * df.cell_yy * df.cell_zz +

499 df.cell_xy * df.cell_yz * df.cell_xz +

500 df.cell_xz * df.cell_yx * df.cell_zy -

501 df.cell_xx * df.cell_yz * df.cell_zy -

502 df.cell_xy * df.cell_yx * df.cell_zz -

503 df.cell_xz * df.cell_yy * df.cell_zx)

504 else:

505 df['alat'] = df.cell_xx

506 df['blat'] = df.cell_yy

507 df['clat'] = df.cell_zz

508 volume = (df.cell_xx * df.cell_yy * df.cell_zz)

509 df['volume'] = volume

510 if normalize:

511 df.volume /= natoms

512

513 return df