Coverage for calorine/gpumd/io.py: 100%
244 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-12-11 13:49 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2025-12-11 13:49 +0000
1from warnings import warn
2from collections.abc import Iterable
3from pathlib import Path
4from typing import List, Tuple, Union
6import numpy as np
7from ase import Atoms
8from ase.io import read, write
9from pandas import DataFrame
12def read_kappa(filename: str) -> DataFrame:
13 """Parses a file in ``kappa.out`` format from GPUMD and returns the
14 content as a data frame. More information concerning file format,
15 content and units can be found `here
16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__.
18 Parameters
19 ----------
20 filename
21 Input file name.
22 """
23 data = np.loadtxt(filename)
24 if isinstance(data[0], np.float64):
25 # If only a single row in kappa.out, append a dimension
26 data = data.reshape(1, -1)
27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split()
28 if len(data[0]) != len(tags):
29 raise ValueError(
30 f'Input file contains {len(data[0])} data columns.'
31 f' Expected {len(tags)} columns.'
32 )
33 df = DataFrame(data=data, columns=tags)
34 df['kx_tot'] = df.kx_in + df.kx_out
35 df['ky_tot'] = df.ky_in + df.ky_out
36 return df
39def read_msd(filename: str) -> DataFrame:
40 """Parses a file in ``msd.out`` format from GPUMD and returns the
41 content as a data frame. More information concerning file format,
42 content and units can be found `here
43 <https://gpumd.org/gpumd/output_files/msd_out.html>`__.
45 Parameters
46 ----------
47 filename
48 Input file name.
49 """
50 data = np.loadtxt(filename)
51 if isinstance(data[0], np.float64):
52 # If only a single row in msd.out, append a dimension
53 data = data.reshape(1, -1)
54 ncols = len(data[0])
55 ngroups = (ncols - 1) // 6
56 if ngroups * 6 + 1 != ncols:
57 raise ValueError(
58 f'Input file contains {ncols} data columns.'
59 f' Expected {1+ngroups*6} columns (1+6*ngroups).'
60 )
61 tags = ['time']
62 flds = 'msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split()
63 if ngroups == 1:
64 tags.extend(flds)
65 else:
66 tags.extend([f'{f}_{n}' for n in range(ngroups) for f in flds])
67 df = DataFrame(data=data, columns=tags)
68 return df
71def read_hac(filename: str,
72 exclude_currents: bool = True,
73 exclude_in_out: bool = True) -> DataFrame:
74 """Parses a file in ``hac.out`` format from GPUMD and returns the
75 content as a data frame. More information concerning file format,
76 content and units can be found `here
77 <https://gpumd.org/gpumd/output_files/hac_out.html>`__.
79 Parameters
80 ----------
81 filename
82 Input file name.
83 exclude_currents
84 Do not include currents in output to save memory.
85 exclude_in_out
86 Do not include `in` and `out` parts of conductivity in output to save memory.
87 """
88 data = np.loadtxt(filename)
89 if isinstance(data[0], np.float64):
90 # If only a single row in hac.out, append a dimension
91 data = data.reshape(1, -1)
92 tags = 'time'
93 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z'
94 tags += ' kx_in kx_out ky_in ky_out kz_tot'
95 tags = tags.split()
96 if len(data[0]) != len(tags):
97 raise ValueError(
98 f'Input file contains {len(data[0])} data columns.'
99 f' Expected {len(tags)} columns.'
100 )
101 df = DataFrame(data=data, columns=tags)
102 df['kx_tot'] = df.kx_in + df.kx_out
103 df['ky_tot'] = df.ky_in + df.ky_out
104 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x
105 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y
106 df['jjz_tot'] = df.jtot_jtot_z
107 del df['jtot_jtot_z']
108 if exclude_in_out:
109 # remove columns with in/out data to save space
110 for col in df:
111 if 'in' in col or 'out' in col:
112 del df[col]
113 if exclude_currents:
114 # remove columns with currents to save space
115 for col in df:
116 if col.startswith('j'):
117 del df[col]
118 return df
121def read_thermo(filename: str, natoms: int = 1) -> DataFrame:
122 """Parses a file in ``thermo.out`` format from GPUMD and returns the
123 content as a data frame. More information concerning file format,
124 content and units can be found `here
125 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__.
127 Parameters
128 ----------
129 filename
130 Input file name.
131 natoms
132 Number of atoms; used to normalize energies.
133 """
134 data = np.loadtxt(filename)
135 if len(data) == 0:
136 return DataFrame(data=data)
137 if isinstance(data[0], np.float64):
138 # If only a single row in loss.out, append a dimension
139 data = data.reshape(1, -1)
140 if len(data[0]) == 9:
141 # orthorhombic box
142 tags = 'temperature kinetic_energy potential_energy'
143 tags += ' stress_xx stress_yy stress_zz'
144 tags += ' cell_xx cell_yy cell_zz'
145 elif len(data[0]) == 12:
146 # orthorhombic box with stresses in Voigt notation (v3.3.1+)
147 tags = 'temperature kinetic_energy potential_energy'
148 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
149 tags += ' cell_xx cell_yy cell_zz'
150 elif len(data[0]) == 15:
151 # triclinic box
152 tags = 'temperature kinetic_energy potential_energy'
153 tags += ' stress_xx stress_yy stress_zz'
154 tags += (
155 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
156 )
157 elif len(data[0]) == 18:
158 # triclinic box with stresses in Voigt notation (v3.3.1+)
159 tags = 'temperature kinetic_energy potential_energy'
160 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
161 tags += (
162 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
163 )
164 else:
165 raise ValueError(
166 f'Input file contains {len(data[0])} data columns.'
167 ' Expected 9, 12, 15 or 18 columns.'
168 )
169 df = DataFrame(data=data, columns=tags.split())
170 assert natoms > 0, 'natoms must be positive'
171 df.kinetic_energy /= natoms
172 df.potential_energy /= natoms
173 return df
176def read_xyz(filename: str) -> Atoms:
177 """
178 Reads the structure input file (``model.xyz``) for GPUMD and returns the
179 structure.
181 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does
182 not read velocities properly.
184 Parameters
185 ----------
186 filename
187 Name of file from which to read the structure.
189 Returns
190 -------
191 Structure as ASE Atoms object with additional per-atom arrays
192 representing atomic masses, velocities etc.
193 """
194 structure = read(filename, format='extxyz')
195 if structure.has('vel'):
196 structure.set_velocities(structure.get_array('vel'))
197 return structure
200def read_runfile(filename: str) -> List[Tuple[str, list]]:
201 """
202 Parses a GPUMD input file in ``run.in`` format and returns the
203 content in the form a list of keyword-value pairs.
205 Parameters
206 ----------
207 filename
208 Input file name.
210 Returns
211 -------
212 List of keyword-value pairs.
213 """
214 data = []
215 with open(filename, 'r') as f:
216 for k, line in enumerate(f.readlines()):
217 flds = line.split()
218 if len(flds) == 0:
219 continue
220 elif len(flds) == 1:
221 raise ValueError(f'Line {k} contains only one field:\n{line}')
222 keyword = flds[0]
223 values = tuple(flds[1:])
224 if keyword in ['time_step', 'velocity']:
225 values = float(values[0])
226 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']:
227 values = int(values[0])
228 elif len(values) == 1:
229 values = values[0]
230 data.append((keyword, values))
231 return data
234def write_runfile(
235 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]]
236):
237 """Write a file in run.in format to define input parameters for MD simulation.
239 Parameters
240 ----------
241 file
242 Path to file to be written.
244 parameters
245 Defines all command-parameter(s) pairs used in run.in file
246 (see GPUMD documentation for a complete list).
247 Values can be either floats, integers, or lists/tuples.
248 """
250 with open(file, 'w') as f:
251 # Write all keywords with parameter(s)
252 for key, val in parameters:
253 f.write(f'{key} ')
254 if isinstance(val, Iterable) and not isinstance(val, str):
255 for v in val:
256 f.write(f'{v} ')
257 else:
258 f.write(f'{val}')
259 f.write('\n')
262def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None):
263 """
264 Writes a structure into GPUMD input format (`model.xyz`).
266 Parameters
267 ----------
268 filename
269 Name of file to which the structure should be written.
270 structure
271 Input structure.
272 groupings
273 Groups into which the individual atoms should be divided in the form of
274 a list of list of lists. Specifically, the outer list corresponds to
275 the grouping methods, of which there can be three at the most, which
276 contains a list of groups in the form of lists of site indices. The
277 sum of the lengths of the latter must be the same as the total number
278 of atoms.
280 Raises
281 ------
282 ValueError
283 Raised if parameters are incompatible.
284 """
285 # Make a local copy of the atoms object
286 _structure = structure.copy()
288 # Check velocties parameter
289 velocities = _structure.get_velocities()
290 if velocities is None or np.max(np.abs(velocities)) < 1e-6:
291 has_velocity = 0
292 else:
293 has_velocity = 1
295 # Check groupings parameter
296 if groupings is None:
297 number_of_grouping_methods = 0
298 else:
299 number_of_grouping_methods = len(groupings)
300 if number_of_grouping_methods > 3:
301 raise ValueError('There can be no more than 3 grouping methods!')
302 for g, grouping in enumerate(groupings):
303 all_indices = [i for group in grouping for i in group]
304 if len(all_indices) != len(_structure) or set(all_indices) != set(
305 range(len(_structure))
306 ):
307 raise ValueError(
308 f'The indices listed in grouping method {g} are'
309 ' not compatible with the input structure!'
310 )
312 # Allowed keyword=value pairs. Use ASEs extyz write functionality.
313 # pbc="pbc_a pbc_b pbc_c"
314 # lattice="ax ay az bx by bz cx cy cz"
315 # properties=property_name:data_type:number_of_columns
316 # species:S:1
317 # pos:R:3
318 # mass:R:1
319 # vel:R:3
320 # group:I:number_of_grouping_methods
321 if _structure.has('mass'):
322 # If structure already has masses set, use those
323 warn('Structure already has array "mass"; will use existing values.')
324 else:
325 _structure.new_array('mass', _structure.get_masses())
327 if has_velocity:
328 _structure.new_array('vel', _structure.get_velocities())
329 if groupings is not None:
330 group_indices = np.array(
331 [
332 [
333 [
334 group_index
335 for group_index, group in enumerate(grouping)
336 if structure_idx in group
337 ]
338 for grouping in groupings
339 ]
340 for structure_idx in range(len(_structure))
341 ]
342 ).squeeze() # pythoniccc
343 _structure.new_array('group', group_indices)
345 write(filename=filename, images=_structure, write_info=True, format='extxyz')
348def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame:
349 """Parses a Monte Carlo output file in ``mcmd.out`` format
350 and returns the content in the form of a DataFrame.
352 Parameters
353 ----------
354 filename
355 Path to file to be parsed.
356 accumulate
357 If ``True`` the MD steps between subsequent Monte Carlo
358 runs in the same output file will be accumulated.
360 Returns
361 -------
362 DataFrame containing acceptance ratios and concentrations (if available),
363 as well as key Monte Carlo parameters.
364 """
365 with open(filename, 'r') as f:
366 lines = f.readlines()
367 data = []
368 offset = 0
369 step = 0
370 accummulated_step = 0
371 for line in lines:
372 if line.startswith('# mc'):
373 flds = line.split()
374 mc_type = flds[2]
375 md_steps = int(flds[3])
376 mc_trials = int(flds[4])
377 temperature_initial = float(flds[5])
378 temperature_final = float(flds[6])
379 if mc_type.endswith('sgc'):
380 ntypes = int(flds[7])
381 species = [flds[8+2*k] for k in range(ntypes)]
382 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)}
383 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan
384 elif line.startswith('# num_MD_steps'):
385 continue
386 else:
387 flds = line.split()
388 previous_step = step
389 step = int(flds[0])
390 if step <= previous_step and accumulate:
391 offset += previous_step
392 accummulated_step = step + offset
393 record = dict(
394 step=accummulated_step,
395 mc_type=mc_type,
396 md_steps=md_steps,
397 mc_trials=mc_trials,
398 temperature_initial=temperature_initial,
399 temperature_final=temperature_final,
400 acceptance_ratio=float(flds[1]),
401 )
402 if mc_type.endswith('sgc'):
403 record.update(phis)
404 if mc_type == 'vcsgc':
405 record['kappa'] = kappa
406 concentrations = {f'conc_{s}': float(flds[k])
407 for k, s in enumerate(species, start=2)}
408 record.update(concentrations)
409 data.append(record)
410 df = DataFrame.from_dict(data)
411 return df
414def read_thermodynamic_data(
415 directory_name: str,
416 normalize: bool = False,
417) -> DataFrame:
418 """Parses the data in a GPUMD output directory
419 and returns the content in the form of a :class:`DataFrame`.
420 This function reads the ``thermo.out``, ``run.in``, and ``model.xyz``
421 (optionally) files, and returns the thermodynamic data including the
422 time (in ps), the pressure (in GPa), the side lengths of the simulation
423 cell (in Å), and the volume (in Å:sup:`3` or Å:sup:`3`/atom).
425 Parameters
426 ----------
427 directory_name
428 Path to directory to be parsed.
429 normalize
430 Normalize thermodynamic quantities per atom.
431 This requires the ``model.xyz`` file to be present.
433 Returns
434 -------
435 :class:`DataFrame` containing (augmented) thermodynamic data.
436 """
438 try:
439 params = read_runfile(f'{directory_name}/run.in')
440 except FileNotFoundError:
441 raise FileNotFoundError(f'No `run.in` file found in {directory_name}')
443 if normalize:
444 try:
445 structure = read(f'{directory_name}/model.xyz')
446 except FileNotFoundError:
447 raise FileNotFoundError(f'No `model.xyz` file found in {directory_name}')
448 natoms = len(structure)
449 else:
450 natoms = 1
452 blocks = []
453 time_step = 1.0 # GPUMD default
454 dump_thermo = None
455 for p, v in params:
456 if p == 'time_step':
457 time_step = v
458 elif p == 'dump_thermo':
459 dump_thermo = v
460 elif p == 'run':
461 if dump_thermo is None:
462 continue
463 # We do not require dump_thermo to exist for subsequent
464 # runs if it has been used for atleast one before.
465 # But if there has been no new dump_thermo, we
466 # should not create a block.
467 if (dump_thermo != 'DEFINEDONCE'):
468 blocks.append(dict(
469 nsteps=v,
470 time_step=time_step,
471 dump_thermo=dump_thermo,
472 ))
473 dump_thermo = 'DEFINEDONCE'
475 try:
476 df = read_thermo(f'{directory_name}/thermo.out', natoms=natoms)
477 except FileNotFoundError:
478 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}')
480 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0))
481 for b in blocks if b['dump_thermo'] is not None])
482 if len(df) != expected_rows:
483 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match'
484 f' expectation based on `run.in` file ({expected_rows}).')
485 if len(df) > expected_rows:
486 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to'
487 f' expectation based on `run.in` file ({expected_rows}).')
488 if len(df) == 0:
489 # Could be the case when a run has just started and thermo.out has been created
490 # but not populated yet
491 warn('`thermo.out` is empty')
492 return df
494 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet.
495 times = []
496 offset = 0.0
497 for b in blocks:
498 ns = int(round(b['nsteps'] / b['dump_thermo'], 0))
499 block_times = np.array(range(1, 1 + ns)) \
500 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps
501 block_times += offset
502 times.extend(block_times)
503 offset = times[-1]
504 df['time'] = times[:len(df)]
506 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3
507 if 'cell_xy' in df:
508 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2)
509 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2)
510 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2)
511 volume = (df.cell_xx * df.cell_yy * df.cell_zz +
512 df.cell_xy * df.cell_yz * df.cell_xz +
513 df.cell_xz * df.cell_yx * df.cell_zy -
514 df.cell_xx * df.cell_yz * df.cell_zy -
515 df.cell_xy * df.cell_yx * df.cell_zz -
516 df.cell_xz * df.cell_yy * df.cell_zx)
517 else:
518 df['alat'] = df.cell_xx
519 df['blat'] = df.cell_yy
520 df['clat'] = df.cell_zz
521 volume = (df.cell_xx * df.cell_yy * df.cell_zz)
522 df['volume'] = volume
523 if normalize:
524 df.volume /= natoms
526 return df