Coverage for calorine/gpumd/io.py: 100%
233 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-24 16:11 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-24 16:11 +0000
1from warnings import warn
2from collections.abc import Iterable
3from pathlib import Path
4from typing import List, Tuple, Union
6import numpy as np
7from ase import Atoms
8from ase.io import read, write
9from pandas import DataFrame
12def read_kappa(filename: str) -> DataFrame:
13 """Parses a file in ``kappa.out`` format from GPUMD and returns the
14 content as a data frame. More information concerning file format,
15 content and units can be found `here
16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__.
18 Parameters
19 ----------
20 filename
21 Input file name.
22 """
23 data = np.loadtxt(filename)
24 if isinstance(data[0], np.float64):
25 # If only a single row in kappa.out, append a dimension
26 data = data.reshape(1, -1)
27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split()
28 if len(data[0]) != len(tags):
29 raise ValueError(
30 f'Input file contains {len(data[0])} data columns.'
31 f' Expected {len(tags)} columns.'
32 )
33 df = DataFrame(data=data, columns=tags)
34 df['kx_tot'] = df.kx_in + df.kx_out
35 df['ky_tot'] = df.ky_in + df.ky_out
36 return df
39def read_msd(filename: str) -> DataFrame:
40 """Parses a file in ``msd.out`` format from GPUMD and returns the
41 content as a data frame. More information concerning file format,
42 content and units can be found `here
43 <https://gpumd.org/gpumd/output_files/msd_out.html>`__.
45 Parameters
46 ----------
47 filename
48 Input file name.
49 """
50 data = np.loadtxt(filename)
51 if isinstance(data[0], np.float64):
52 # If only a single row in msd.out, append a dimension
53 data = data.reshape(1, -1)
54 tags = 'time msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split()
55 if len(data[0]) != len(tags):
56 raise ValueError(
57 f'Input file contains {len(data[0])} data columns.'
58 f' Expected {len(tags)} columns.'
59 )
60 df = DataFrame(data=data, columns=tags)
61 return df
64def read_hac(filename: str,
65 exclude_currents: bool = True,
66 exclude_in_out: bool = True) -> DataFrame:
67 """Parses a file in ``hac.out`` format from GPUMD and returns the
68 content as a data frame. More information concerning file format,
69 content and units can be found `here
70 <https://gpumd.org/gpumd/output_files/hac_out.html>`__.
72 Parameters
73 ----------
74 filename
75 Input file name.
76 exclude_currents
77 Do not include currents in output to save memory.
78 exclude_in_out
79 Do not include `in` and `out` parts of conductivity in output to save memory.
80 """
81 data = np.loadtxt(filename)
82 if isinstance(data[0], np.float64):
83 # If only a single row in hac.out, append a dimension
84 data = data.reshape(1, -1)
85 tags = 'time'
86 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z'
87 tags += ' kx_in kx_out ky_in ky_out kz_tot'
88 tags = tags.split()
89 if len(data[0]) != len(tags):
90 raise ValueError(
91 f'Input file contains {len(data[0])} data columns.'
92 f' Expected {len(tags)} columns.'
93 )
94 df = DataFrame(data=data, columns=tags)
95 df['kx_tot'] = df.kx_in + df.kx_out
96 df['ky_tot'] = df.ky_in + df.ky_out
97 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x
98 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y
99 df['jjz_tot'] = df.jtot_jtot_z
100 del df['jtot_jtot_z']
101 if exclude_in_out:
102 # remove columns with in/out data to save space
103 for col in df:
104 if 'in' in col or 'out' in col:
105 del df[col]
106 if exclude_currents:
107 # remove columns with currents to save space
108 for col in df:
109 if col.startswith('j'):
110 del df[col]
111 return df
114def read_thermo(filename: str, natoms: int = 1) -> DataFrame:
115 """Parses a file in ``thermo.out`` format from GPUMD and returns the
116 content as a data frame. More information concerning file format,
117 content and units can be found `here
118 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__.
120 Parameters
121 ----------
122 filename
123 Input file name.
124 natoms
125 Number of atoms; used to normalize energies.
126 """
127 data = np.loadtxt(filename)
128 if isinstance(data[0], np.float64):
129 # If only a single row in loss.out, append a dimension
130 data = data.reshape(1, -1)
131 if len(data[0]) == 9:
132 # orthorhombic box
133 tags = 'temperature kinetic_energy potential_energy'
134 tags += ' stress_xx stress_yy stress_zz'
135 tags += ' cell_xx cell_yy cell_zz'
136 elif len(data[0]) == 12:
137 # orthorhombic box with stresses in Voigt notation (v3.3.1+)
138 tags = 'temperature kinetic_energy potential_energy'
139 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
140 tags += ' cell_xx cell_yy cell_zz'
141 elif len(data[0]) == 15:
142 # triclinic box
143 tags = 'temperature kinetic_energy potential_energy'
144 tags += ' stress_xx stress_yy stress_zz'
145 tags += (
146 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
147 )
148 elif len(data[0]) == 18:
149 # triclinic box with stresses in Voigt notation (v3.3.1+)
150 tags = 'temperature kinetic_energy potential_energy'
151 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
152 tags += (
153 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
154 )
155 else:
156 raise ValueError(
157 f'Input file contains {len(data[0])} data columns.'
158 ' Expected 9, 12, 15 or 18 columns.'
159 )
160 df = DataFrame(data=data, columns=tags.split())
161 assert natoms > 0, 'natoms must be positive'
162 df.kinetic_energy /= natoms
163 df.potential_energy /= natoms
164 return df
167def read_xyz(filename: str) -> Atoms:
168 """
169 Reads the structure input file (``model.xyz``) for GPUMD and returns the
170 structure.
172 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does
173 not read velocities properly.
175 Parameters
176 ----------
177 filename
178 Name of file from which to read the structure.
180 Returns
181 -------
182 Structure as ASE Atoms object with additional per-atom arrays
183 representing atomic masses, velocities etc.
184 """
185 structure = read(filename, format='extxyz')
186 if structure.has('vel'):
187 structure.set_velocities(structure.get_array('vel'))
188 return structure
191def read_runfile(filename: str) -> List[Tuple[str, list]]:
192 """
193 Parses a GPUMD input file in ``run.in`` format and returns the
194 content in the form a list of keyword-value pairs.
196 Parameters
197 ----------
198 filename
199 Input file name.
201 Returns
202 -------
203 List of keyword-value pairs.
204 """
205 data = []
206 with open(filename, 'r') as f:
207 for k, line in enumerate(f.readlines()):
208 flds = line.split()
209 if len(flds) == 0:
210 continue
211 elif len(flds) == 1:
212 raise ValueError(f'Line {k} contains only one field:\n{line}')
213 keyword = flds[0]
214 values = tuple(flds[1:])
215 if keyword in ['time_step', 'velocity']:
216 values = float(values[0])
217 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']:
218 values = int(values[0])
219 elif len(values) == 1:
220 values = values[0]
221 data.append((keyword, values))
222 return data
225def write_runfile(
226 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]]
227):
228 """Write a file in run.in format to define input parameters for MD simulation.
230 Parameters
231 ----------
232 file
233 Path to file to be written.
235 parameters
236 Defines all command-parameter(s) pairs used in run.in file
237 (see GPUMD documentation for a complete list).
238 Values can be either floats, integers, or lists/tuples.
239 """
241 with open(file, 'w') as f:
242 # Write all keywords with parameter(s)
243 for key, val in parameters:
244 f.write(f'{key} ')
245 if isinstance(val, Iterable) and not isinstance(val, str):
246 for v in val:
247 f.write(f'{v} ')
248 else:
249 f.write(f'{val}')
250 f.write('\n')
253def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None):
254 """
255 Writes a structure into GPUMD input format (`model.xyz`).
257 Parameters
258 ----------
259 filename
260 Name of file to which the structure should be written.
261 structure
262 Input structure.
263 groupings
264 Groups into which the individual atoms should be divided in the form of
265 a list of list of lists. Specifically, the outer list corresponds to
266 the grouping methods, of which there can be three at the most, which
267 contains a list of groups in the form of lists of site indices. The
268 sum of the lengths of the latter must be the same as the total number
269 of atoms.
271 Raises
272 ------
273 ValueError
274 Raised if parameters are incompatible.
275 """
276 # Make a local copy of the atoms object
277 _structure = structure.copy()
279 # Check velocties parameter
280 velocities = _structure.get_velocities()
281 if velocities is None or np.max(np.abs(velocities)) < 1e-6:
282 has_velocity = 0
283 else:
284 has_velocity = 1
286 # Check groupings parameter
287 if groupings is None:
288 number_of_grouping_methods = 0
289 else:
290 number_of_grouping_methods = len(groupings)
291 if number_of_grouping_methods > 3:
292 raise ValueError('There can be no more than 3 grouping methods!')
293 for g, grouping in enumerate(groupings):
294 all_indices = [i for group in grouping for i in group]
295 if len(all_indices) != len(_structure) or set(all_indices) != set(
296 range(len(_structure))
297 ):
298 raise ValueError(
299 f'The indices listed in grouping method {g} are'
300 ' not compatible with the input structure!'
301 )
303 # Allowed keyword=value pairs. Use ASEs extyz write functionality.
304 # pbc="pbc_a pbc_b pbc_c"
305 # lattice="ax ay az bx by bz cx cy cz"
306 # properties=property_name:data_type:number_of_columns
307 # species:S:1
308 # pos:R:3
309 # mass:R:1
310 # vel:R:3
311 # group:I:number_of_grouping_methods
312 if _structure.has('mass'):
313 # If structure already has masses set, use those
314 warn('Structure already has array "mass"; will use existing values.')
315 else:
316 _structure.new_array('mass', _structure.get_masses())
318 if has_velocity:
319 _structure.new_array('vel', _structure.get_velocities())
320 if groupings is not None:
321 group_indices = np.array(
322 [
323 [
324 [
325 group_index
326 for group_index, group in enumerate(grouping)
327 if structure_idx in group
328 ]
329 for grouping in groupings
330 ]
331 for structure_idx in range(len(_structure))
332 ]
333 ).squeeze() # pythoniccc
334 _structure.new_array('group', group_indices)
336 write(filename=filename, images=_structure, write_info=True, format='extxyz')
339def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame:
340 """Parses a Monte Carlo output file in ``mcmd.out`` format
341 and returns the content in the form of a DataFrame.
343 Parameters
344 ----------
345 filename
346 Path to file to be parsed.
347 accumulate
348 If ``True`` the MD steps between subsequent Monte Carlo
349 runs in the same output file will be accumulated.
351 Returns
352 -------
353 DataFrame containing acceptance ratios and concentrations (if available),
354 as well as key Monte Carlo parameters.
355 """
356 with open(filename, 'r') as f:
357 lines = f.readlines()
358 data = []
359 offset = 0
360 step = 0
361 accummulated_step = 0
362 for line in lines:
363 if line.startswith('# mc'):
364 flds = line.split()
365 mc_type = flds[2]
366 md_steps = int(flds[3])
367 mc_trials = int(flds[4])
368 temperature_initial = float(flds[5])
369 temperature_final = float(flds[6])
370 if mc_type.endswith('sgc'):
371 ntypes = int(flds[7])
372 species = [flds[8+2*k] for k in range(ntypes)]
373 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)}
374 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan
375 elif line.startswith('# num_MD_steps'):
376 continue
377 else:
378 flds = line.split()
379 previous_step = step
380 step = int(flds[0])
381 if step <= previous_step and accumulate:
382 offset += previous_step
383 accummulated_step = step + offset
384 record = dict(
385 step=accummulated_step,
386 mc_type=mc_type,
387 md_steps=md_steps,
388 mc_trials=mc_trials,
389 temperature_initial=temperature_initial,
390 temperature_final=temperature_final,
391 acceptance_ratio=float(flds[1]),
392 )
393 if mc_type.endswith('sgc'):
394 record.update(phis)
395 if mc_type == 'vcsgc':
396 record['kappa'] = kappa
397 concentrations = {f'conc_{s}': float(flds[k])
398 for k, s in enumerate(species, start=2)}
399 record.update(concentrations)
400 data.append(record)
401 df = DataFrame.from_dict(data)
402 return df
405def read_thermodynamic_data(
406 directory_name: str,
407 normalize: bool = False,
408) -> DataFrame:
409 """Parses the data in a GPUMD output directory
410 and returns the content in the form of a :class:`DataFrame`.
411 This function reads the ``thermo.out``, ``run.in``, and ``model.xyz``
412 (optionally) files, and returns the thermodynamic data including the
413 time (in ps), the pressure (in GPa), the side lengths of the simulation
414 cell (in Å), and the volume (in Å:sup:`3` or Å:sup:`3`/atom).
416 Parameters
417 ----------
418 directory_name
419 Path to directory to be parsed.
420 normalize
421 Normalize thermodynamic quantities per atom.
422 This requires the ``model.xyz`` file to be present.
424 Returns
425 -------
426 :class:`DataFrame` containing (augmented) thermodynamic data.
427 """
429 try:
430 params = read_runfile(f'{directory_name}/run.in')
431 except FileNotFoundError:
432 raise FileNotFoundError(f'No `run.in` file found in {directory_name}')
434 if normalize:
435 try:
436 structure = read(f'{directory_name}/model.xyz')
437 except FileNotFoundError:
438 raise FileNotFoundError(f'No `model.xyz` file found in {directory_name}')
439 natoms = len(structure)
440 else:
441 natoms = 1
443 blocks = []
444 time_step = 1.0 # GPUMD default
445 dump_thermo = None
446 for p, v in params:
447 if p == 'time_step':
448 time_step = v
449 elif p == 'dump_thermo':
450 dump_thermo = v
451 elif p == 'run':
452 if dump_thermo is None:
453 raise ValueError(
454 'Could not extract value of `dump_thermo` keyword from `run.in` file.')
455 # We do not require dump_thermo to exist for subsequent
456 # runs if it has been used for atleast one before.
457 # But if there has been no new dump_thermo, we
458 # should not create a block.
459 if (dump_thermo != 'DEFINEDONCE'):
460 blocks.append(dict(
461 nsteps=v,
462 time_step=time_step,
463 dump_thermo=dump_thermo,
464 ))
465 dump_thermo = 'DEFINEDONCE'
467 try:
468 df = read_thermo(f'{directory_name}/thermo.out', natoms=natoms)
469 except FileNotFoundError:
470 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}')
472 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0))
473 for b in blocks if b['dump_thermo'] is not None])
474 if len(df) != expected_rows:
475 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match'
476 f' expectation based on `run.in` file ({expected_rows}).')
477 if len(df) > expected_rows:
478 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to'
479 f' expectation based on `run.in` file ({expected_rows}).')
481 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet.
482 times = []
483 offset = 0.0
484 for b in blocks:
485 ns = int(round(b['nsteps'] / b['dump_thermo'], 0))
486 block_times = np.array(range(1, 1 + ns)) \
487 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps
488 block_times += offset
489 times.extend(block_times)
490 offset = times[-1]
491 df['time'] = times[:len(df)]
493 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3
494 if 'cell_xy' in df:
495 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2)
496 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2)
497 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2)
498 volume = (df.cell_xx * df.cell_yy * df.cell_zz +
499 df.cell_xy * df.cell_yz * df.cell_xz +
500 df.cell_xz * df.cell_yx * df.cell_zy -
501 df.cell_xx * df.cell_yz * df.cell_zy -
502 df.cell_xy * df.cell_yx * df.cell_zz -
503 df.cell_xz * df.cell_yy * df.cell_zx)
504 else:
505 df['alat'] = df.cell_xx
506 df['blat'] = df.cell_yy
507 df['clat'] = df.cell_zz
508 volume = (df.cell_xx * df.cell_yy * df.cell_zz)
509 df['volume'] = volume
510 if normalize:
511 df.volume /= natoms
513 return df