Coverage for calorine/gpumd/io.py: 99%
239 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-12-04 13:49 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2025-12-04 13:49 +0000
1from warnings import warn
2from collections.abc import Iterable
3from pathlib import Path
4from typing import List, Tuple, Union
6import numpy as np
7from ase import Atoms
8from ase.io import read, write
9from pandas import DataFrame
12def read_kappa(filename: str) -> DataFrame:
13 """Parses a file in ``kappa.out`` format from GPUMD and returns the
14 content as a data frame. More information concerning file format,
15 content and units can be found `here
16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__.
18 Parameters
19 ----------
20 filename
21 Input file name.
22 """
23 data = np.loadtxt(filename)
24 if isinstance(data[0], np.float64):
25 # If only a single row in kappa.out, append a dimension
26 data = data.reshape(1, -1)
27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split()
28 if len(data[0]) != len(tags):
29 raise ValueError(
30 f'Input file contains {len(data[0])} data columns.'
31 f' Expected {len(tags)} columns.'
32 )
33 df = DataFrame(data=data, columns=tags)
34 df['kx_tot'] = df.kx_in + df.kx_out
35 df['ky_tot'] = df.ky_in + df.ky_out
36 return df
39def read_msd(filename: str) -> DataFrame:
40 """Parses a file in ``msd.out`` format from GPUMD and returns the
41 content as a data frame. More information concerning file format,
42 content and units can be found `here
43 <https://gpumd.org/gpumd/output_files/msd_out.html>`__.
45 Parameters
46 ----------
47 filename
48 Input file name.
49 """
50 data = np.loadtxt(filename)
51 if isinstance(data[0], np.float64):
52 # If only a single row in msd.out, append a dimension
53 data = data.reshape(1, -1)
54 ncols = len(data[0])
55 ngroups = (ncols - 1) // 6
56 if ngroups * 6 + 1 != ncols:
57 raise ValueError(
58 f'Input file contains {ncols} data columns.'
59 f' Expected {1+ngroups*6} columns (1+6*ngroups).'
60 )
61 tags = ['time']
62 flds = 'msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split()
63 if ngroups == 1:
64 tags.extend(flds)
65 else:
66 tags.extend([f'{f}_{n}' for n in range(ngroups) for f in flds])
67 df = DataFrame(data=data, columns=tags)
68 return df
71def read_hac(filename: str,
72 exclude_currents: bool = True,
73 exclude_in_out: bool = True) -> DataFrame:
74 """Parses a file in ``hac.out`` format from GPUMD and returns the
75 content as a data frame. More information concerning file format,
76 content and units can be found `here
77 <https://gpumd.org/gpumd/output_files/hac_out.html>`__.
79 Parameters
80 ----------
81 filename
82 Input file name.
83 exclude_currents
84 Do not include currents in output to save memory.
85 exclude_in_out
86 Do not include `in` and `out` parts of conductivity in output to save memory.
87 """
88 data = np.loadtxt(filename)
89 if isinstance(data[0], np.float64):
90 # If only a single row in hac.out, append a dimension
91 data = data.reshape(1, -1)
92 tags = 'time'
93 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z'
94 tags += ' kx_in kx_out ky_in ky_out kz_tot'
95 tags = tags.split()
96 if len(data[0]) != len(tags):
97 raise ValueError(
98 f'Input file contains {len(data[0])} data columns.'
99 f' Expected {len(tags)} columns.'
100 )
101 df = DataFrame(data=data, columns=tags)
102 df['kx_tot'] = df.kx_in + df.kx_out
103 df['ky_tot'] = df.ky_in + df.ky_out
104 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x
105 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y
106 df['jjz_tot'] = df.jtot_jtot_z
107 del df['jtot_jtot_z']
108 if exclude_in_out:
109 # remove columns with in/out data to save space
110 for col in df:
111 if 'in' in col or 'out' in col:
112 del df[col]
113 if exclude_currents:
114 # remove columns with currents to save space
115 for col in df:
116 if col.startswith('j'):
117 del df[col]
118 return df
121def read_thermo(filename: str, natoms: int = 1) -> DataFrame:
122 """Parses a file in ``thermo.out`` format from GPUMD and returns the
123 content as a data frame. More information concerning file format,
124 content and units can be found `here
125 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__.
127 Parameters
128 ----------
129 filename
130 Input file name.
131 natoms
132 Number of atoms; used to normalize energies.
133 """
134 data = np.loadtxt(filename)
135 if isinstance(data[0], np.float64):
136 # If only a single row in loss.out, append a dimension
137 data = data.reshape(1, -1)
138 if len(data[0]) == 9:
139 # orthorhombic box
140 tags = 'temperature kinetic_energy potential_energy'
141 tags += ' stress_xx stress_yy stress_zz'
142 tags += ' cell_xx cell_yy cell_zz'
143 elif len(data[0]) == 12:
144 # orthorhombic box with stresses in Voigt notation (v3.3.1+)
145 tags = 'temperature kinetic_energy potential_energy'
146 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
147 tags += ' cell_xx cell_yy cell_zz'
148 elif len(data[0]) == 15:
149 # triclinic box
150 tags = 'temperature kinetic_energy potential_energy'
151 tags += ' stress_xx stress_yy stress_zz'
152 tags += (
153 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
154 )
155 elif len(data[0]) == 18:
156 # triclinic box with stresses in Voigt notation (v3.3.1+)
157 tags = 'temperature kinetic_energy potential_energy'
158 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
159 tags += (
160 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
161 )
162 else:
163 raise ValueError(
164 f'Input file contains {len(data[0])} data columns.'
165 ' Expected 9, 12, 15 or 18 columns.'
166 )
167 df = DataFrame(data=data, columns=tags.split())
168 assert natoms > 0, 'natoms must be positive'
169 df.kinetic_energy /= natoms
170 df.potential_energy /= natoms
171 return df
174def read_xyz(filename: str) -> Atoms:
175 """
176 Reads the structure input file (``model.xyz``) for GPUMD and returns the
177 structure.
179 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does
180 not read velocities properly.
182 Parameters
183 ----------
184 filename
185 Name of file from which to read the structure.
187 Returns
188 -------
189 Structure as ASE Atoms object with additional per-atom arrays
190 representing atomic masses, velocities etc.
191 """
192 structure = read(filename, format='extxyz')
193 if structure.has('vel'):
194 structure.set_velocities(structure.get_array('vel'))
195 return structure
198def read_runfile(filename: str) -> List[Tuple[str, list]]:
199 """
200 Parses a GPUMD input file in ``run.in`` format and returns the
201 content in the form a list of keyword-value pairs.
203 Parameters
204 ----------
205 filename
206 Input file name.
208 Returns
209 -------
210 List of keyword-value pairs.
211 """
212 data = []
213 with open(filename, 'r') as f:
214 for k, line in enumerate(f.readlines()):
215 flds = line.split()
216 if len(flds) == 0:
217 continue
218 elif len(flds) == 1:
219 raise ValueError(f'Line {k} contains only one field:\n{line}')
220 keyword = flds[0]
221 values = tuple(flds[1:])
222 if keyword in ['time_step', 'velocity']:
223 values = float(values[0])
224 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']:
225 values = int(values[0])
226 elif len(values) == 1:
227 values = values[0]
228 data.append((keyword, values))
229 return data
232def write_runfile(
233 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]]
234):
235 """Write a file in run.in format to define input parameters for MD simulation.
237 Parameters
238 ----------
239 file
240 Path to file to be written.
242 parameters
243 Defines all command-parameter(s) pairs used in run.in file
244 (see GPUMD documentation for a complete list).
245 Values can be either floats, integers, or lists/tuples.
246 """
248 with open(file, 'w') as f:
249 # Write all keywords with parameter(s)
250 for key, val in parameters:
251 f.write(f'{key} ')
252 if isinstance(val, Iterable) and not isinstance(val, str):
253 for v in val:
254 f.write(f'{v} ')
255 else:
256 f.write(f'{val}')
257 f.write('\n')
260def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None):
261 """
262 Writes a structure into GPUMD input format (`model.xyz`).
264 Parameters
265 ----------
266 filename
267 Name of file to which the structure should be written.
268 structure
269 Input structure.
270 groupings
271 Groups into which the individual atoms should be divided in the form of
272 a list of list of lists. Specifically, the outer list corresponds to
273 the grouping methods, of which there can be three at the most, which
274 contains a list of groups in the form of lists of site indices. The
275 sum of the lengths of the latter must be the same as the total number
276 of atoms.
278 Raises
279 ------
280 ValueError
281 Raised if parameters are incompatible.
282 """
283 # Make a local copy of the atoms object
284 _structure = structure.copy()
286 # Check velocties parameter
287 velocities = _structure.get_velocities()
288 if velocities is None or np.max(np.abs(velocities)) < 1e-6:
289 has_velocity = 0
290 else:
291 has_velocity = 1
293 # Check groupings parameter
294 if groupings is None:
295 number_of_grouping_methods = 0
296 else:
297 number_of_grouping_methods = len(groupings)
298 if number_of_grouping_methods > 3:
299 raise ValueError('There can be no more than 3 grouping methods!')
300 for g, grouping in enumerate(groupings):
301 all_indices = [i for group in grouping for i in group]
302 if len(all_indices) != len(_structure) or set(all_indices) != set(
303 range(len(_structure))
304 ):
305 raise ValueError(
306 f'The indices listed in grouping method {g} are'
307 ' not compatible with the input structure!'
308 )
310 # Allowed keyword=value pairs. Use ASEs extyz write functionality.
311 # pbc="pbc_a pbc_b pbc_c"
312 # lattice="ax ay az bx by bz cx cy cz"
313 # properties=property_name:data_type:number_of_columns
314 # species:S:1
315 # pos:R:3
316 # mass:R:1
317 # vel:R:3
318 # group:I:number_of_grouping_methods
319 if _structure.has('mass'):
320 # If structure already has masses set, use those
321 warn('Structure already has array "mass"; will use existing values.')
322 else:
323 _structure.new_array('mass', _structure.get_masses())
325 if has_velocity:
326 _structure.new_array('vel', _structure.get_velocities())
327 if groupings is not None:
328 group_indices = np.array(
329 [
330 [
331 [
332 group_index
333 for group_index, group in enumerate(grouping)
334 if structure_idx in group
335 ]
336 for grouping in groupings
337 ]
338 for structure_idx in range(len(_structure))
339 ]
340 ).squeeze() # pythoniccc
341 _structure.new_array('group', group_indices)
343 write(filename=filename, images=_structure, write_info=True, format='extxyz')
346def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame:
347 """Parses a Monte Carlo output file in ``mcmd.out`` format
348 and returns the content in the form of a DataFrame.
350 Parameters
351 ----------
352 filename
353 Path to file to be parsed.
354 accumulate
355 If ``True`` the MD steps between subsequent Monte Carlo
356 runs in the same output file will be accumulated.
358 Returns
359 -------
360 DataFrame containing acceptance ratios and concentrations (if available),
361 as well as key Monte Carlo parameters.
362 """
363 with open(filename, 'r') as f:
364 lines = f.readlines()
365 data = []
366 offset = 0
367 step = 0
368 accummulated_step = 0
369 for line in lines:
370 if line.startswith('# mc'):
371 flds = line.split()
372 mc_type = flds[2]
373 md_steps = int(flds[3])
374 mc_trials = int(flds[4])
375 temperature_initial = float(flds[5])
376 temperature_final = float(flds[6])
377 if mc_type.endswith('sgc'):
378 ntypes = int(flds[7])
379 species = [flds[8+2*k] for k in range(ntypes)]
380 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)}
381 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan
382 elif line.startswith('# num_MD_steps'):
383 continue
384 else:
385 flds = line.split()
386 previous_step = step
387 step = int(flds[0])
388 if step <= previous_step and accumulate:
389 offset += previous_step
390 accummulated_step = step + offset
391 record = dict(
392 step=accummulated_step,
393 mc_type=mc_type,
394 md_steps=md_steps,
395 mc_trials=mc_trials,
396 temperature_initial=temperature_initial,
397 temperature_final=temperature_final,
398 acceptance_ratio=float(flds[1]),
399 )
400 if mc_type.endswith('sgc'):
401 record.update(phis)
402 if mc_type == 'vcsgc':
403 record['kappa'] = kappa
404 concentrations = {f'conc_{s}': float(flds[k])
405 for k, s in enumerate(species, start=2)}
406 record.update(concentrations)
407 data.append(record)
408 df = DataFrame.from_dict(data)
409 return df
412def read_thermodynamic_data(
413 directory_name: str,
414 normalize: bool = False,
415) -> DataFrame:
416 """Parses the data in a GPUMD output directory
417 and returns the content in the form of a :class:`DataFrame`.
418 This function reads the ``thermo.out``, ``run.in``, and ``model.xyz``
419 (optionally) files, and returns the thermodynamic data including the
420 time (in ps), the pressure (in GPa), the side lengths of the simulation
421 cell (in Å), and the volume (in Å:sup:`3` or Å:sup:`3`/atom).
423 Parameters
424 ----------
425 directory_name
426 Path to directory to be parsed.
427 normalize
428 Normalize thermodynamic quantities per atom.
429 This requires the ``model.xyz`` file to be present.
431 Returns
432 -------
433 :class:`DataFrame` containing (augmented) thermodynamic data.
434 """
436 try:
437 params = read_runfile(f'{directory_name}/run.in')
438 except FileNotFoundError:
439 raise FileNotFoundError(f'No `run.in` file found in {directory_name}')
441 if normalize:
442 try:
443 structure = read(f'{directory_name}/model.xyz')
444 except FileNotFoundError:
445 raise FileNotFoundError(f'No `model.xyz` file found in {directory_name}')
446 natoms = len(structure)
447 else:
448 natoms = 1
450 blocks = []
451 time_step = 1.0 # GPUMD default
452 dump_thermo = None
453 for p, v in params:
454 if p == 'time_step':
455 time_step = v
456 elif p == 'dump_thermo':
457 dump_thermo = v
458 elif p == 'run':
459 if dump_thermo is None: 459 ↛ 460line 459 didn't jump to line 460 because the condition on line 459 was never true
460 continue
461 # We do not require dump_thermo to exist for subsequent
462 # runs if it has been used for atleast one before.
463 # But if there has been no new dump_thermo, we
464 # should not create a block.
465 if (dump_thermo != 'DEFINEDONCE'):
466 blocks.append(dict(
467 nsteps=v,
468 time_step=time_step,
469 dump_thermo=dump_thermo,
470 ))
471 dump_thermo = 'DEFINEDONCE'
473 try:
474 df = read_thermo(f'{directory_name}/thermo.out', natoms=natoms)
475 except FileNotFoundError:
476 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}')
478 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0))
479 for b in blocks if b['dump_thermo'] is not None])
480 if len(df) != expected_rows:
481 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match'
482 f' expectation based on `run.in` file ({expected_rows}).')
483 if len(df) > expected_rows:
484 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to'
485 f' expectation based on `run.in` file ({expected_rows}).')
487 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet.
488 times = []
489 offset = 0.0
490 for b in blocks:
491 ns = int(round(b['nsteps'] / b['dump_thermo'], 0))
492 block_times = np.array(range(1, 1 + ns)) \
493 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps
494 block_times += offset
495 times.extend(block_times)
496 offset = times[-1]
497 df['time'] = times[:len(df)]
499 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3
500 if 'cell_xy' in df:
501 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2)
502 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2)
503 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2)
504 volume = (df.cell_xx * df.cell_yy * df.cell_zz +
505 df.cell_xy * df.cell_yz * df.cell_xz +
506 df.cell_xz * df.cell_yx * df.cell_zy -
507 df.cell_xx * df.cell_yz * df.cell_zy -
508 df.cell_xy * df.cell_yx * df.cell_zz -
509 df.cell_xz * df.cell_yy * df.cell_zx)
510 else:
511 df['alat'] = df.cell_xx
512 df['blat'] = df.cell_yy
513 df['clat'] = df.cell_zz
514 volume = (df.cell_xx * df.cell_yy * df.cell_zz)
515 df['volume'] = volume
516 if normalize:
517 df.volume /= natoms
519 return df