Coverage for calorine/gpumd/io.py: 100%
224 statements
« prev ^ index » next coverage.py v7.6.4, created at 2025-02-05 14:53 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2025-02-05 14:53 +0000
1from warnings import warn
2from collections.abc import Iterable
3from pathlib import Path
4from typing import List, Tuple, Union
6import numpy as np
7from ase import Atoms
8from ase.io import read, write
9from pandas import DataFrame
12def read_kappa(filename: str) -> DataFrame:
13 """Parses a file in ``kappa.out`` format from GPUMD and returns the
14 content as a data frame. More information concerning file format,
15 content and units can be found `here
16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__.
18 Parameters
19 ----------
20 filename
21 Input file name.
22 """
23 data = np.loadtxt(filename)
24 if isinstance(data[0], np.float64):
25 # If only a single row in kappa.out, append a dimension
26 data = data.reshape(1, -1)
27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split()
28 if len(data[0]) != len(tags):
29 raise ValueError(
30 f'Input file contains {len(data[0])} data columns.'
31 f' Expected {len(tags)} columns.'
32 )
33 df = DataFrame(data=data, columns=tags)
34 df['kx_tot'] = df.kx_in + df.kx_out
35 df['ky_tot'] = df.ky_in + df.ky_out
36 return df
39def read_msd(filename: str) -> DataFrame:
40 """Parses a file in ``msd.out`` format from GPUMD and returns the
41 content as a data frame. More information concerning file format,
42 content and units can be found `here
43 <https://gpumd.org/gpumd/output_files/msd_out.html>`__.
45 Parameters
46 ----------
47 filename
48 Input file name.
49 """
50 data = np.loadtxt(filename)
51 if isinstance(data[0], np.float64):
52 # If only a single row in msd.out, append a dimension
53 data = data.reshape(1, -1)
54 tags = 'time msd_x msd_y msd_z sdc_x sdc_y sdc_z'.split()
55 if len(data[0]) != len(tags):
56 raise ValueError(
57 f'Input file contains {len(data[0])} data columns.'
58 f' Expected {len(tags)} columns.'
59 )
60 df = DataFrame(data=data, columns=tags)
61 return df
64def read_hac(filename: str,
65 exclude_currents: bool = True,
66 exclude_in_out: bool = True) -> DataFrame:
67 """Parses a file in ``hac.out`` format from GPUMD and returns the
68 content as a data frame. More information concerning file format,
69 content and units can be found `here
70 <https://gpumd.org/gpumd/output_files/hac_out.html>`__.
72 Parameters
73 ----------
74 filename
75 Input file name.
76 exclude_currents
77 Do not include currents in output to save memory.
78 exclude_in_out
79 Do not include `in` and `out` parts of conductivity in output to save memory.
80 """
81 data = np.loadtxt(filename)
82 if isinstance(data[0], np.float64):
83 # If only a single row in hac.out, append a dimension
84 data = data.reshape(1, -1)
85 tags = 'time'
86 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z'
87 tags += ' kx_in kx_out ky_in ky_out kz_tot'
88 tags = tags.split()
89 if len(data[0]) != len(tags):
90 raise ValueError(
91 f'Input file contains {len(data[0])} data columns.'
92 f' Expected {len(tags)} columns.'
93 )
94 df = DataFrame(data=data, columns=tags)
95 df['kx_tot'] = df.kx_in + df.kx_out
96 df['ky_tot'] = df.ky_in + df.ky_out
97 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x
98 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y
99 df['jjz_tot'] = df.jtot_jtot_z
100 del df['jtot_jtot_z']
101 if exclude_in_out:
102 # remove columns with in/out data to save space
103 for col in df:
104 if 'in' in col or 'out' in col:
105 del df[col]
106 if exclude_currents:
107 # remove columns with currents to save space
108 for col in df:
109 if col.startswith('j'):
110 del df[col]
111 return df
114def read_thermo(filename: str, natoms: int = 1) -> DataFrame:
115 """Parses a file in ``thermo.out`` format from GPUMD and returns the
116 content as a data frame. More information concerning file format,
117 content and units can be found `here
118 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__.
120 Parameters
121 ----------
122 filename
123 Input file name.
124 natoms
125 Number of atoms; used to normalize energies.
126 """
127 data = np.loadtxt(filename)
128 if isinstance(data[0], np.float64):
129 # If only a single row in loss.out, append a dimension
130 data = data.reshape(1, -1)
131 if len(data[0]) == 9:
132 # orthorhombic box
133 tags = 'temperature kinetic_energy potential_energy'
134 tags += ' stress_xx stress_yy stress_zz'
135 tags += ' cell_xx cell_yy cell_zz'
136 elif len(data[0]) == 12:
137 # orthorhombic box with stresses in Voigt notation (v3.3.1+)
138 tags = 'temperature kinetic_energy potential_energy'
139 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
140 tags += ' cell_xx cell_yy cell_zz'
141 elif len(data[0]) == 15:
142 # triclinic box
143 tags = 'temperature kinetic_energy potential_energy'
144 tags += ' stress_xx stress_yy stress_zz'
145 tags += (
146 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
147 )
148 elif len(data[0]) == 18:
149 # triclinic box with stresses in Voigt notation (v3.3.1+)
150 tags = 'temperature kinetic_energy potential_energy'
151 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
152 tags += (
153 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
154 )
155 else:
156 raise ValueError(
157 f'Input file contains {len(data[0])} data columns.'
158 ' Expected 9, 12, 15 or 18 columns.'
159 )
160 df = DataFrame(data=data, columns=tags.split())
161 assert natoms > 0, 'natoms must be positive'
162 df.kinetic_energy /= natoms
163 df.potential_energy /= natoms
164 return df
167def read_xyz(filename: str) -> Atoms:
168 """
169 Reads the structure input file (``model.xyz``) for GPUMD and returns the
170 structure.
172 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does
173 not read velocities properly.
175 Parameters
176 ----------
177 filename
178 Name of file from which to read the structure.
180 Returns
181 -------
182 Structure as ASE Atoms object with additional per-atom arrays
183 representing atomic masses, velocities etc.
184 """
185 structure = read(filename, format='extxyz')
186 if structure.has('vel'):
187 structure.set_velocities(structure.get_array('vel'))
188 return structure
191def read_runfile(filename: str) -> List[Tuple[str, list]]:
192 """
193 Parses a GPUMD input file in ``run.in`` format and returns the
194 content in the form a list of keyword-value pairs.
196 Parameters
197 ----------
198 filename
199 Input file name.
201 Returns
202 -------
203 List of keyword-value pairs.
204 """
205 data = []
206 with open(filename, 'r') as f:
207 for k, line in enumerate(f.readlines()):
208 flds = line.split()
209 if len(flds) == 0:
210 continue
211 elif len(flds) == 1:
212 raise ValueError(f'Line {k} contains only one field:\n{line}')
213 keyword = flds[0]
214 values = tuple(flds[1:])
215 if keyword in ['time_step', 'velocity']:
216 values = float(values[0])
217 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']:
218 values = int(values[0])
219 elif len(values) == 1:
220 values = values[0]
221 data.append((keyword, values))
222 return data
225def write_runfile(
226 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]]
227):
228 """Write a file in run.in format to define input parameters for MD simulation.
230 Parameters
231 ----------
232 file
233 Path to file to be written.
235 parameters : dict
236 Defines all key-value pairs used in run.in file
237 (see GPUMD documentation for a complete list).
238 Values can be either floats, integers, or lists/tuples.
239 """
241 with open(file, 'w') as f:
242 # Write all keywords with parameter(s)
243 for key, val in parameters:
244 f.write(f'{key} ')
245 if isinstance(val, Iterable) and not isinstance(val, str):
246 for v in val:
247 f.write(f'{v} ')
248 else:
249 f.write(f'{val}')
250 f.write('\n')
253def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None):
254 """
255 Writes a structure into GPUMD input format (`model.xyz`).
257 Parameters
258 ----------
259 filename
260 Name of file to which the structure should be written.
261 structure
262 Input structure.
263 groupings
264 Groups into which the individual atoms should be divided in the form of
265 a list of list of lists. Specifically, the outer list corresponds to
266 the grouping methods, of which there can be three at the most, which
267 contains a list of groups in the form of lists of site indices. The
268 sum of the lengths of the latter must be the same as the total number
269 of atoms.
271 Raises
272 ------
273 ValueError
274 Raised if parameters are incompatible.
275 """
276 # Make a local copy of the atoms object
277 _structure = structure.copy()
279 # Check velocties parameter
280 velocities = _structure.get_velocities()
281 if velocities is None or np.max(np.abs(velocities)) < 1e-6:
282 has_velocity = 0
283 else:
284 has_velocity = 1
286 # Check groupings parameter
287 if groupings is None:
288 number_of_grouping_methods = 0
289 else:
290 number_of_grouping_methods = len(groupings)
291 if number_of_grouping_methods > 3:
292 raise ValueError('There can be no more than 3 grouping methods!')
293 for g, grouping in enumerate(groupings):
294 all_indices = [i for group in grouping for i in group]
295 if len(all_indices) != len(_structure) or set(all_indices) != set(
296 range(len(_structure))
297 ):
298 raise ValueError(
299 f'The indices listed in grouping method {g} are'
300 ' not compatible with the input structure!'
301 )
303 # Allowed keyword=value pairs. Use ASEs extyz write functionality.
304 # pbc="pbc_a pbc_b pbc_c"
305 # lattice="ax ay az bx by bz cx cy cz"
306 # properties=property_name:data_type:number_of_columns
307 # species:S:1
308 # pos:R:3
309 # mass:R:1
310 # vel:R:3
311 # group:I:number_of_grouping_methods
312 if _structure.has('mass'):
313 # If structure already has masses set, use those
314 warn('Structure already has array "mass"; will use existing values.')
315 else:
316 _structure.new_array('mass', _structure.get_masses())
318 if has_velocity:
319 _structure.new_array('vel', _structure.get_velocities())
320 if groupings is not None:
321 group_indices = np.array(
322 [
323 [
324 [
325 group_index
326 for group_index, group in enumerate(grouping)
327 if structure_idx in group
328 ]
329 for grouping in groupings
330 ]
331 for structure_idx in range(len(_structure))
332 ]
333 ).squeeze() # pythoniccc
334 _structure.new_array('group', group_indices)
336 write(filename=filename, images=_structure, write_info=True, format='extxyz')
339def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame:
340 """Parses a Monte Carlo output file in ``mcmd.out`` format
341 and returns the content in the form of a DataFrame.
343 Parameters
344 ----------
345 filename
346 Path to file to be parsed.
347 accumulate
348 If ``True`` the MD steps between subsequent Monte Carlo
349 runs in the same output file will be accumulated.
351 Returns
352 -------
353 DataFrame containing acceptance ratios and concentrations (if available),
354 as well as key Monte Carlo parameters.
355 """
356 with open(filename, 'r') as f:
357 lines = f.readlines()
358 data = []
359 offset = 0
360 step = 0
361 accummulated_step = 0
362 for line in lines:
363 if line.startswith('# mc'):
364 flds = line.split()
365 mc_type = flds[2]
366 md_steps = int(flds[3])
367 mc_trials = int(flds[4])
368 temperature_initial = float(flds[5])
369 temperature_final = float(flds[6])
370 if mc_type.endswith('sgc'):
371 ntypes = int(flds[7])
372 species = [flds[8+2*k] for k in range(ntypes)]
373 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)}
374 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan
375 elif line.startswith('# num_MD_steps'):
376 continue
377 else:
378 flds = line.split()
379 previous_step = step
380 step = int(flds[0])
381 if step <= previous_step and accumulate:
382 offset += previous_step
383 accummulated_step = step + offset
384 record = dict(
385 step=accummulated_step,
386 mc_type=mc_type,
387 md_steps=md_steps,
388 mc_trials=mc_trials,
389 temperature_initial=temperature_initial,
390 temperature_final=temperature_final,
391 acceptance_ratio=float(flds[1]),
392 )
393 if mc_type.endswith('sgc'):
394 record.update(phis)
395 if mc_type == 'vcsgc':
396 record['kappa'] = kappa
397 concentrations = {f'conc_{s}': float(flds[k])
398 for k, s in enumerate(species, start=2)}
399 record.update(concentrations)
400 data.append(record)
401 df = DataFrame.from_dict(data)
402 return df
405def read_thermodynamic_data(directory_name: str) -> DataFrame:
406 """Parses the data in a GPUMD output directory
407 and returns the content in the form of a :class:`DataFrame`.
408 This function reads the ``thermo.out`` and ``run.in`` files,
409 and returns the thermodynamic data including the time (in ps),
410 the pressure (in GPa), the side lengths of the simulation cell
411 (in Å), and the volume (in Å:sup:`3`).
413 Parameters
414 ----------
415 directory_name
416 Path to directory to be parsed.
418 Returns
419 -------
420 :class:`DataFrame` containing (augmented) thermodynamic data.
421 """
423 try:
424 params = read_runfile(f'{directory_name}/run.in')
425 except FileNotFoundError:
426 raise FileNotFoundError(f'No `run.in` file found in {directory_name}')
428 blocks = []
429 time_step = 1.0 # GPUMD default
430 dump_thermo = None
431 for p, v in params:
432 if p == 'time_step':
433 time_step = v
434 elif p == 'dump_thermo':
435 dump_thermo = v
436 elif p == 'run':
437 if dump_thermo is None:
438 raise ValueError(
439 'Could not extract value of `dump_thermo` keyword from `run.in` file.')
440 # We do not require dump_thermo to exist for subsequent
441 # runs if it has been used for atleast one before.
442 # But if there has been no new dump_thermo, we
443 # should not create a block.
444 if (dump_thermo != 'DEFINEDONCE'):
445 blocks.append(dict(
446 nsteps=v,
447 time_step=time_step,
448 dump_thermo=dump_thermo,
449 ))
450 dump_thermo = 'DEFINEDONCE'
452 try:
453 df = read_thermo(f'{directory_name}/thermo.out')
454 except FileNotFoundError:
455 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}')
457 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0))
458 for b in blocks if b['dump_thermo'] is not None])
459 if len(df) != expected_rows:
460 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match'
461 f' expectation based on `run.in` file ({expected_rows}).')
462 if len(df) > expected_rows:
463 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to'
464 f' expectation based on `run.in` file ({expected_rows}).')
466 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet.
467 times = []
468 offset = 0.0
469 for b in blocks:
470 ns = int(round(b['nsteps'] / b['dump_thermo'], 0))
471 block_times = np.array(range(1, 1 + ns)) \
472 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps
473 block_times += offset
474 times.extend(block_times)
475 offset = times[-1]
476 df['time'] = times[:len(df)]
478 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3
479 if 'cell_xy' in df:
480 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2)
481 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2)
482 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2)
483 volume = (df.cell_xx * df.cell_yy * df.cell_zz +
484 df.cell_xy * df.cell_yz * df.cell_xz +
485 df.cell_xz * df.cell_yx * df.cell_zy -
486 df.cell_xx * df.cell_yz * df.cell_zy -
487 df.cell_xy * df.cell_yx * df.cell_zz -
488 df.cell_xz * df.cell_yy * df.cell_zx)
489 else:
490 df['alat'] = df.cell_xx
491 df['blat'] = df.cell_yy
492 df['clat'] = df.cell_zz
493 volume = (df.cell_xx * df.cell_yy * df.cell_zz)
494 df['volume'] = volume
496 return df