Coverage for calorine / gpumd / io.py: 100%
247 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-12 16:00 +0000
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-12 16:00 +0000
1from warnings import warn
2from collections.abc import Iterable
3from pathlib import Path
4from typing import List, Tuple, Union
6import numpy as np
7from ase import Atoms
8from ase.io import read, write
9from ase.units import fs
10from pandas import DataFrame
13def read_kappa(filename: str) -> DataFrame:
14 """Parses a file in ``kappa.out`` format from GPUMD and returns the
15 content as a data frame. More information concerning file format,
16 content and units can be found `here
17 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__.
19 Parameters
20 ----------
21 filename
22 Input file name.
23 """
24 data = np.loadtxt(filename)
25 if isinstance(data[0], np.float64):
26 # If only a single row in kappa.out, append a dimension
27 data = data.reshape(1, -1)
28 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split()
29 if len(data[0]) != len(tags):
30 raise ValueError(
31 f'Input file contains {len(data[0])} data columns.'
32 f' Expected {len(tags)} columns.'
33 )
34 df = DataFrame(data=data, columns=tags)
35 df['kx_tot'] = df.kx_in + df.kx_out
36 df['ky_tot'] = df.ky_in + df.ky_out
37 return df
40def read_msd(filename: str) -> DataFrame:
41 """Parses a file in ``msd.out`` format from GPUMD and returns the
42 content as a data frame. More information concerning file format,
43 content and units can be found `here
44 <https://gpumd.org/gpumd/output_files/msd_out.html>`__.
46 Parameters
47 ----------
48 filename
49 Input file name.
50 """
51 data = np.loadtxt(filename)
52 if isinstance(data[0], np.float64):
53 # If only a single row in msd.out, append a dimension
54 data = data.reshape(1, -1)
55 ncols = len(data[0])
56 ngroups = (ncols - 1) // 6
57 if ngroups * 6 + 1 != ncols:
58 raise ValueError(
59 f'Input file contains {ncols} data columns.'
60 f' Expected {1+ngroups*6} columns (1+6*ngroups).'
61 )
62 tags = ['time']
63 flds = 'msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split()
64 if ngroups == 1:
65 tags.extend(flds)
66 else:
67 tags.extend([f'{f}_{n}' for n in range(ngroups) for f in flds])
68 df = DataFrame(data=data, columns=tags)
69 return df
72def read_hac(filename: str,
73 exclude_currents: bool = True,
74 exclude_in_out: bool = True) -> DataFrame:
75 """Parses a file in ``hac.out`` format from GPUMD and returns the
76 content as a data frame. More information concerning file format,
77 content and units can be found `here
78 <https://gpumd.org/gpumd/output_files/hac_out.html>`__.
80 Parameters
81 ----------
82 filename
83 Input file name.
84 exclude_currents
85 Do not include currents in output to save memory.
86 exclude_in_out
87 Do not include `in` and `out` parts of conductivity in output to save memory.
88 """
89 data = np.loadtxt(filename)
90 if isinstance(data[0], np.float64):
91 # If only a single row in hac.out, append a dimension
92 data = data.reshape(1, -1)
93 tags = 'time'
94 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z'
95 tags += ' kx_in kx_out ky_in ky_out kz_tot'
96 tags = tags.split()
97 if len(data[0]) != len(tags):
98 raise ValueError(
99 f'Input file contains {len(data[0])} data columns.'
100 f' Expected {len(tags)} columns.'
101 )
102 df = DataFrame(data=data, columns=tags)
103 df['kx_tot'] = df.kx_in + df.kx_out
104 df['ky_tot'] = df.ky_in + df.ky_out
105 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x
106 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y
107 df['jjz_tot'] = df.jtot_jtot_z
108 del df['jtot_jtot_z']
109 if exclude_in_out:
110 # remove columns with in/out data to save space
111 for col in df:
112 if 'in' in col or 'out' in col:
113 del df[col]
114 if exclude_currents:
115 # remove columns with currents to save space
116 for col in df:
117 if col.startswith('j'):
118 del df[col]
119 return df
122def read_thermo(filename: str, natoms: int = 1) -> DataFrame:
123 """Parses a file in ``thermo.out`` format from GPUMD and returns the
124 content as a data frame. More information concerning file format,
125 content and units can be found `here
126 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__.
128 Parameters
129 ----------
130 filename
131 Input file name.
132 natoms
133 Number of atoms; used to normalize energies.
134 """
135 data = np.loadtxt(filename)
136 if len(data) == 0:
137 return DataFrame(data=data)
138 if isinstance(data[0], np.float64):
139 # If only a single row in loss.out, append a dimension
140 data = data.reshape(1, -1)
141 if len(data[0]) == 9:
142 # orthorhombic box
143 tags = 'temperature kinetic_energy potential_energy'
144 tags += ' stress_xx stress_yy stress_zz'
145 tags += ' cell_xx cell_yy cell_zz'
146 elif len(data[0]) == 12:
147 # orthorhombic box with stresses in Voigt notation (v3.3.1+)
148 tags = 'temperature kinetic_energy potential_energy'
149 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
150 tags += ' cell_xx cell_yy cell_zz'
151 elif len(data[0]) == 15:
152 # triclinic box
153 tags = 'temperature kinetic_energy potential_energy'
154 tags += ' stress_xx stress_yy stress_zz'
155 tags += (
156 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
157 )
158 elif len(data[0]) == 18:
159 # triclinic box with stresses in Voigt notation (v3.3.1+)
160 tags = 'temperature kinetic_energy potential_energy'
161 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
162 tags += (
163 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
164 )
165 else:
166 raise ValueError(
167 f'Input file contains {len(data[0])} data columns.'
168 ' Expected 9, 12, 15 or 18 columns.'
169 )
170 df = DataFrame(data=data, columns=tags.split())
171 assert natoms > 0, 'natoms must be positive'
172 df.kinetic_energy /= natoms
173 df.potential_energy /= natoms
174 return df
177def read_xyz(filename: str) -> Atoms:
178 """
179 Reads the structure input file (``model.xyz``) for GPUMD and returns the
180 structure.
182 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does
183 not read velocities properly. Specifically, the velocity unit is converted from GPUMD units
184 (Å/fs) to ASE units (1/sqrt(u/eV)).
186 Parameters
187 ----------
188 filename
189 Name of file from which to read the structure.
191 Returns
192 -------
193 Structure as ASE Atoms object with additional per-atom arrays
194 representing atomic masses, velocities etc.
195 """
196 structure = read(filename, format='extxyz')
197 if structure.has('vel'):
198 gpumd_to_ase_velocity = 1 / fs
199 structure.set_velocities(structure.get_array('vel') * gpumd_to_ase_velocity)
200 return structure
203def read_runfile(filename: str) -> List[Tuple[str, list]]:
204 """
205 Parses a GPUMD input file in ``run.in`` format and returns the
206 content in the form a list of keyword-value pairs.
208 Parameters
209 ----------
210 filename
211 Input file name.
213 Returns
214 -------
215 List of keyword-value pairs.
216 """
217 data = []
218 with open(filename, 'r') as f:
219 for k, line in enumerate(f.readlines()):
220 flds = line.split()
221 if len(flds) == 0:
222 continue
223 elif len(flds) == 1:
224 raise ValueError(f'Line {k} contains only one field:\n{line}')
225 keyword = flds[0]
226 values = tuple(flds[1:])
227 if keyword in ['time_step', 'velocity']:
228 values = float(values[0])
229 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']:
230 values = int(values[0])
231 elif len(values) == 1:
232 values = values[0]
233 data.append((keyword, values))
234 return data
237def write_runfile(
238 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]]
239):
240 """Write a file in run.in format to define input parameters for MD simulation.
242 Parameters
243 ----------
244 file
245 Path to file to be written.
247 parameters
248 Defines all command-parameter(s) pairs used in run.in file
249 (see GPUMD documentation for a complete list).
250 Values can be either floats, integers, or lists/tuples.
251 """
253 with open(file, 'w') as f:
254 # Write all keywords with parameter(s)
255 for key, val in parameters:
256 f.write(f'{key} ')
257 if isinstance(val, Iterable) and not isinstance(val, str):
258 for v in val:
259 f.write(f'{v} ')
260 else:
261 f.write(f'{val}')
262 f.write('\n')
265def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None):
266 """
267 Writes a structure into GPUMD input format (`model.xyz`).
269 This is a wrapper function around :func:`ase.io.write_xyz` since the ASE implementation does
270 not write velocities properly. Specifically, the velocity unit is converted from ASE units
271 (1/sqrt(u/eV)) to GPUMD units (Å/fs).
273 Parameters
274 ----------
275 filename
276 Name of file to which the structure should be written.
277 structure
278 Input structure.
279 groupings
280 Groups into which the individual atoms should be divided in the form of
281 a list of list of lists. Specifically, the outer list corresponds to
282 the grouping methods, of which there can be three at the most, which
283 contains a list of groups in the form of lists of site indices. The
284 sum of the lengths of the latter must be the same as the total number
285 of atoms.
287 Raises
288 ------
289 ValueError
290 Raised if parameters are incompatible.
291 """
292 # Make a local copy of the atoms object
293 _structure = structure.copy()
295 # Check velocties parameter
296 velocities = _structure.get_velocities()
297 if velocities is None or np.max(np.abs(velocities)) < 1e-6:
298 has_velocity = 0
299 else:
300 has_velocity = 1
302 # Check groupings parameter
303 if groupings is None:
304 number_of_grouping_methods = 0
305 else:
306 number_of_grouping_methods = len(groupings)
307 if number_of_grouping_methods > 3:
308 raise ValueError('There can be no more than 3 grouping methods!')
309 for g, grouping in enumerate(groupings):
310 all_indices = [i for group in grouping for i in group]
311 if len(all_indices) != len(_structure) or set(all_indices) != set(
312 range(len(_structure))
313 ):
314 raise ValueError(
315 f'The indices listed in grouping method {g} are'
316 ' not compatible with the input structure!'
317 )
319 # Allowed keyword=value pairs. Use ASEs extyz write functionality.
320 # pbc="pbc_a pbc_b pbc_c"
321 # lattice="ax ay az bx by bz cx cy cz"
322 # properties=property_name:data_type:number_of_columns
323 # species:S:1
324 # pos:R:3
325 # mass:R:1
326 # vel:R:3
327 # group:I:number_of_grouping_methods
328 if _structure.has('mass'):
329 # If structure already has masses set, use those
330 warn('Structure already has array "mass"; will use existing values.')
331 else:
332 _structure.new_array('mass', _structure.get_masses())
334 if has_velocity:
335 ase_to_gpumd_velocity = fs
336 _structure.new_array('vel', _structure.get_velocities() * ase_to_gpumd_velocity)
337 if groupings is not None:
338 group_indices = np.array(
339 [
340 [
341 [
342 group_index
343 for group_index, group in enumerate(grouping)
344 if structure_idx in group
345 ]
346 for grouping in groupings
347 ]
348 for structure_idx in range(len(_structure))
349 ]
350 ).squeeze() # pythoniccc
351 _structure.new_array('group', group_indices)
353 write(filename=filename, images=_structure, write_info=True, format='extxyz')
356def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame:
357 """Parses a Monte Carlo output file in ``mcmd.out`` format
358 and returns the content in the form of a DataFrame.
360 Parameters
361 ----------
362 filename
363 Path to file to be parsed.
364 accumulate
365 If ``True`` the MD steps between subsequent Monte Carlo
366 runs in the same output file will be accumulated.
368 Returns
369 -------
370 DataFrame containing acceptance ratios and concentrations (if available),
371 as well as key Monte Carlo parameters.
372 """
373 with open(filename, 'r') as f:
374 lines = f.readlines()
375 data = []
376 offset = 0
377 step = 0
378 accummulated_step = 0
379 for line in lines:
380 if line.startswith('# mc'):
381 flds = line.split()
382 mc_type = flds[2]
383 md_steps = int(flds[3])
384 mc_trials = int(flds[4])
385 temperature_initial = float(flds[5])
386 temperature_final = float(flds[6])
387 if mc_type.endswith('sgc'):
388 ntypes = int(flds[7])
389 species = [flds[8+2*k] for k in range(ntypes)]
390 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)}
391 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan
392 elif line.startswith('# num_MD_steps'):
393 continue
394 else:
395 flds = line.split()
396 previous_step = step
397 step = int(flds[0])
398 if step <= previous_step and accumulate:
399 offset += previous_step
400 accummulated_step = step + offset
401 record = dict(
402 step=accummulated_step,
403 mc_type=mc_type,
404 md_steps=md_steps,
405 mc_trials=mc_trials,
406 temperature_initial=temperature_initial,
407 temperature_final=temperature_final,
408 acceptance_ratio=float(flds[1]),
409 )
410 if mc_type.endswith('sgc'):
411 record.update(phis)
412 if mc_type == 'vcsgc':
413 record['kappa'] = kappa
414 concentrations = {f'conc_{s}': float(flds[k])
415 for k, s in enumerate(species, start=2)}
416 record.update(concentrations)
417 data.append(record)
418 df = DataFrame.from_dict(data)
419 return df
422def read_thermodynamic_data(
423 directory_name: str,
424 normalize: bool = False,
425) -> DataFrame:
426 """Parses the data in a GPUMD output directory
427 and returns the content in the form of a :class:`DataFrame`.
428 This function reads the ``thermo.out``, ``run.in``, and ``model.xyz``
429 (optionally) files, and returns the thermodynamic data including the
430 time (in ps), the pressure (in GPa), the side lengths of the simulation
431 cell (in Å), and the volume (in Å:sup:`3` or Å:sup:`3`/atom).
433 Parameters
434 ----------
435 directory_name
436 Path to directory to be parsed.
437 normalize
438 Normalize thermodynamic quantities per atom.
439 This requires the ``model.xyz`` file to be present.
441 Returns
442 -------
443 :class:`DataFrame` containing (augmented) thermodynamic data.
444 """
446 try:
447 params = read_runfile(f'{directory_name}/run.in')
448 except FileNotFoundError:
449 raise FileNotFoundError(f'No `run.in` file found in {directory_name}')
451 if normalize:
452 try:
453 structure = read(f'{directory_name}/model.xyz')
454 except FileNotFoundError:
455 raise FileNotFoundError(f'No `model.xyz` file found in {directory_name}')
456 natoms = len(structure)
457 else:
458 natoms = 1
460 blocks = []
461 time_step = 1.0 # GPUMD default
462 dump_thermo = None
463 for p, v in params:
464 if p == 'time_step':
465 time_step = v
466 elif p == 'dump_thermo':
467 dump_thermo = v
468 elif p == 'run':
469 if dump_thermo is None:
470 continue
471 # We do not require dump_thermo to exist for subsequent
472 # runs if it has been used for atleast one before.
473 # But if there has been no new dump_thermo, we
474 # should not create a block.
475 if (dump_thermo != 'DEFINEDONCE'):
476 blocks.append(dict(
477 nsteps=v,
478 time_step=time_step,
479 dump_thermo=dump_thermo,
480 ))
481 dump_thermo = 'DEFINEDONCE'
483 try:
484 df = read_thermo(f'{directory_name}/thermo.out', natoms=natoms)
485 except FileNotFoundError:
486 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}')
488 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0))
489 for b in blocks if b['dump_thermo'] is not None])
490 if len(df) != expected_rows:
491 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match'
492 f' expectation based on `run.in` file ({expected_rows}).')
493 if len(df) > expected_rows:
494 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to'
495 f' expectation based on `run.in` file ({expected_rows}).')
496 if len(df) == 0:
497 # Could be the case when a run has just started and thermo.out has been created
498 # but not populated yet
499 warn('`thermo.out` is empty')
500 return df
502 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet.
503 times = []
504 offset = 0.0
505 for b in blocks:
506 ns = int(round(b['nsteps'] / b['dump_thermo'], 0))
507 block_times = np.array(range(1, 1 + ns)) \
508 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps
509 block_times += offset
510 times.extend(block_times)
511 offset = times[-1]
512 df['time'] = times[:len(df)]
514 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3
515 if 'cell_xy' in df:
516 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2)
517 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2)
518 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2)
519 volume = (df.cell_xx * df.cell_yy * df.cell_zz +
520 df.cell_xy * df.cell_yz * df.cell_xz +
521 df.cell_xz * df.cell_yx * df.cell_zy -
522 df.cell_xx * df.cell_yz * df.cell_zy -
523 df.cell_xy * df.cell_yx * df.cell_zz -
524 df.cell_xz * df.cell_yy * df.cell_zx)
525 else:
526 df['alat'] = df.cell_xx
527 df['blat'] = df.cell_yy
528 df['clat'] = df.cell_zz
529 volume = (df.cell_xx * df.cell_yy * df.cell_zz)
530 df['volume'] = volume
531 if normalize:
532 df.volume /= natoms
534 return df