Coverage for calorine/gpumd/io.py: 100%
215 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-10 08:26 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-10 08:26 +0000
1from warnings import warn
2from collections.abc import Iterable
3from pathlib import Path
4from typing import List, Tuple, Union
6import numpy as np
7from ase import Atoms
8from ase.io import read, write
9from pandas import DataFrame
12def read_kappa(filename: str) -> DataFrame:
13 """Parses a file in ``kappa.out`` format from GPUMD and returns the
14 content as a data frame. More information concerning file format,
15 content and units can be found `here
16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__.
18 Parameters
19 ----------
20 filename
21 Input file name.
22 """
23 data = np.loadtxt(filename)
24 if isinstance(data[0], np.float64):
25 # If only a single row in kappa.out, append a dimension
26 data = data.reshape(1, -1)
27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split()
28 if len(data[0]) != len(tags):
29 raise ValueError(
30 f'Input file contains {len(data[0])} data columns.'
31 f' Expected {len(tags)} columns.'
32 )
33 df = DataFrame(data=data, columns=tags)
34 df['kx_tot'] = df.kx_in + df.kx_out
35 df['ky_tot'] = df.ky_in + df.ky_out
36 return df
39def read_hac(filename: str,
40 exclude_currents: bool = True,
41 exclude_in_out: bool = True) -> DataFrame:
42 """Parses a file in ``hac.out`` format from GPUMD and returns the
43 content as a data frame. More information concerning file format,
44 content and units can be found `here
45 <https://gpumd.org/gpumd/output_files/hac_out.html>`__.
47 Parameters
48 ----------
49 filename
50 Input file name.
51 exclude_currents
52 Do not include currents in output to save memory.
53 exclude_in_out
54 Do not include `in` and `out` parts of conductivity in output to save memory.
55 """
56 data = np.loadtxt(filename)
57 if isinstance(data[0], np.float64):
58 # If only a single row in hac.out, append a dimension
59 data = data.reshape(1, -1)
60 tags = 'time'
61 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z'
62 tags += ' kx_in kx_out ky_in ky_out kz_tot'
63 tags = tags.split()
64 if len(data[0]) != len(tags):
65 raise ValueError(
66 f'Input file contains {len(data[0])} data columns.'
67 f' Expected {len(tags)} columns.'
68 )
69 df = DataFrame(data=data, columns=tags)
70 df['kx_tot'] = df.kx_in + df.kx_out
71 df['ky_tot'] = df.ky_in + df.ky_out
72 df['jjx_tot'] = df.jin_jtot_x + df.jout_jtot_x
73 df['jjy_tot'] = df.jin_jtot_y + df.jout_jtot_y
74 df['jjz_tot'] = df.jtot_jtot_z
75 del df['jtot_jtot_z']
76 if exclude_in_out:
77 # remove columns with in/out data to save space
78 for col in df:
79 if 'in' in col or 'out' in col:
80 del df[col]
81 if exclude_currents:
82 # remove columns with currents to save space
83 for col in df:
84 if col.startswith('j'):
85 del df[col]
86 return df
89def read_thermo(filename: str, natoms: int = 1) -> DataFrame:
90 """Parses a file in ``thermo.out`` format from GPUMD and returns the
91 content as a data frame. More information concerning file format,
92 content and units can be found `here
93 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__.
95 Parameters
96 ----------
97 filename
98 Input file name.
99 natoms
100 Number of atoms; used to normalize energies.
101 """
102 data = np.loadtxt(filename)
103 if isinstance(data[0], np.float64):
104 # If only a single row in loss.out, append a dimension
105 data = data.reshape(1, -1)
106 if len(data[0]) == 9:
107 # orthorhombic box
108 tags = 'temperature kinetic_energy potential_energy'
109 tags += ' stress_xx stress_yy stress_zz'
110 tags += ' cell_xx cell_yy cell_zz'
111 elif len(data[0]) == 12:
112 # orthorhombic box with stresses in Voigt notation (v3.3.1+)
113 tags = 'temperature kinetic_energy potential_energy'
114 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
115 tags += ' cell_xx cell_yy cell_zz'
116 elif len(data[0]) == 15:
117 # triclinic box
118 tags = 'temperature kinetic_energy potential_energy'
119 tags += ' stress_xx stress_yy stress_zz'
120 tags += (
121 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
122 )
123 elif len(data[0]) == 18:
124 # triclinic box with stresses in Voigt notation (v3.3.1+)
125 tags = 'temperature kinetic_energy potential_energy'
126 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
127 tags += (
128 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
129 )
130 else:
131 raise ValueError(
132 f'Input file contains {len(data[0])} data columns.'
133 ' Expected 9, 12, 15 or 18 columns.'
134 )
135 df = DataFrame(data=data, columns=tags.split())
136 assert natoms > 0, 'natoms must be positive'
137 df.kinetic_energy /= natoms
138 df.potential_energy /= natoms
139 return df
142def read_xyz(filename: str) -> Atoms:
143 """
144 Reads the structure input file (``model.xyz``) for GPUMD and returns the
145 structure.
147 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does
148 not read velocities properly.
150 Parameters
151 ----------
152 filename
153 Name of file from which to read the structure.
155 Returns
156 -------
157 Structure as ASE Atoms object with additional per-atom arrays
158 representing atomic masses, velocities etc.
159 """
160 structure = read(filename, format='extxyz')
161 if structure.has('vel'):
162 structure.set_velocities(structure.get_array('vel'))
163 return structure
166def read_runfile(filename: str) -> List[Tuple[str, list]]:
167 """
168 Parses a GPUMD input file in ``run.in`` format and returns the
169 content in the form a list of keyword-value pairs.
171 Parameters
172 ----------
173 filename
174 Input file name.
176 Returns
177 -------
178 List of keyword-value pairs.
179 """
180 data = []
181 with open(filename, 'r') as f:
182 for k, line in enumerate(f.readlines()):
183 flds = line.split()
184 if len(flds) == 0:
185 continue
186 elif len(flds) == 1:
187 raise ValueError(f'Line {k} contains only one field:\n{line}')
188 keyword = flds[0]
189 values = tuple(flds[1:])
190 if keyword in ['time_step', 'velocity']:
191 values = float(values[0])
192 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']:
193 values = int(values[0])
194 elif len(values) == 1:
195 values = values[0]
196 data.append((keyword, values))
197 return data
200def write_runfile(
201 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]]
202):
203 """Write a file in run.in format to define input parameters for MD simulation.
205 Parameters
206 ----------
207 file
208 Path to file to be written.
210 parameters : dict
211 Defines all key-value pairs used in run.in file
212 (see GPUMD documentation for a complete list).
213 Values can be either floats, integers, or lists/tuples.
214 """
216 with open(file, 'w') as f:
217 # Write all keywords with parameter(s)
218 for key, val in parameters:
219 f.write(f'{key} ')
220 if isinstance(val, Iterable) and not isinstance(val, str):
221 for v in val:
222 f.write(f'{v} ')
223 else:
224 f.write(f'{val}')
225 f.write('\n')
228def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None):
229 """
230 Writes a structure into GPUMD input format (`model.xyz`).
232 Parameters
233 ----------
234 filename
235 Name of file to which the structure should be written.
236 structure
237 Input structure.
238 groupings
239 Groups into which the individual atoms should be divided in the form of
240 a list of list of lists. Specifically, the outer list corresponds to
241 the grouping methods, of which there can be three at the most, which
242 contains a list of groups in the form of lists of site indices. The
243 sum of the lengths of the latter must be the same as the total number
244 of atoms.
246 Raises
247 ------
248 ValueError
249 Raised if parameters are incompatible.
250 """
251 # Make a local copy of the atoms object
252 _structure = structure.copy()
254 # Check velocties parameter
255 velocities = _structure.get_velocities()
256 if velocities is None or np.max(np.abs(velocities)) < 1e-6:
257 has_velocity = 0
258 else:
259 has_velocity = 1
261 # Check groupings parameter
262 if groupings is None:
263 number_of_grouping_methods = 0
264 else:
265 number_of_grouping_methods = len(groupings)
266 if number_of_grouping_methods > 3:
267 raise ValueError('There can be no more than 3 grouping methods!')
268 for g, grouping in enumerate(groupings):
269 all_indices = [i for group in grouping for i in group]
270 if len(all_indices) != len(_structure) or set(all_indices) != set(
271 range(len(_structure))
272 ):
273 raise ValueError(
274 f'The indices listed in grouping method {g} are'
275 ' not compatible with the input structure!'
276 )
278 # Allowed keyword=value pairs. Use ASEs extyz write functionality.
279 # pbc="pbc_a pbc_b pbc_c"
280 # lattice="ax ay az bx by bz cx cy cz"
281 # properties=property_name:data_type:number_of_columns
282 # species:S:1
283 # pos:R:3
284 # mass:R:1
285 # vel:R:3
286 # group:I:number_of_grouping_methods
287 if _structure.has('mass'):
288 # If structure already has masses set, use those
289 warn('Structure already has array "mass"; will use existing values.')
290 else:
291 _structure.new_array('mass', _structure.get_masses())
293 if has_velocity:
294 _structure.new_array('vel', _structure.get_velocities())
295 if groupings is not None:
296 group_indices = np.array(
297 [
298 [
299 [
300 group_index
301 for group_index, group in enumerate(grouping)
302 if structure_idx in group
303 ]
304 for grouping in groupings
305 ]
306 for structure_idx in range(len(_structure))
307 ]
308 ).squeeze() # pythoniccc
309 _structure.new_array('group', group_indices)
311 write(filename=filename, images=_structure, write_info=True, format='extxyz')
314def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame:
315 """Parses a Monte Carlo output file in ``mcmd.out`` format
316 and returns the content in the form of a DataFrame.
318 Parameters
319 ----------
320 filename
321 Path to file to be parsed.
322 accumulate
323 If ``True`` the MD steps between subsequent Monte Carlo
324 runs in the same output file will be accumulated.
326 Returns
327 -------
328 DataFrame containing acceptance ratios and concentrations (if available),
329 as well as key Monte Carlo parameters.
330 """
331 with open(filename, 'r') as f:
332 lines = f.readlines()
333 data = []
334 offset = 0
335 step = 0
336 accummulated_step = 0
337 for line in lines:
338 if line.startswith('# mc'):
339 flds = line.split()
340 mc_type = flds[2]
341 md_steps = int(flds[3])
342 mc_trials = int(flds[4])
343 temperature_initial = float(flds[5])
344 temperature_final = float(flds[6])
345 if mc_type.endswith('sgc'):
346 ntypes = int(flds[7])
347 species = [flds[8+2*k] for k in range(ntypes)]
348 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)}
349 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan
350 elif line.startswith('# num_MD_steps'):
351 continue
352 else:
353 flds = line.split()
354 previous_step = step
355 step = int(flds[0])
356 if step <= previous_step and accumulate:
357 offset += previous_step
358 accummulated_step = step + offset
359 record = dict(
360 step=accummulated_step,
361 mc_type=mc_type,
362 md_steps=md_steps,
363 mc_trials=mc_trials,
364 temperature_initial=temperature_initial,
365 temperature_final=temperature_final,
366 acceptance_ratio=float(flds[1]),
367 )
368 if mc_type.endswith('sgc'):
369 record.update(phis)
370 if mc_type == 'vcsgc':
371 record['kappa'] = kappa
372 concentrations = {f'conc_{s}': float(flds[k])
373 for k, s in enumerate(species, start=2)}
374 record.update(concentrations)
375 data.append(record)
376 df = DataFrame.from_dict(data)
377 return df
380def read_thermodynamic_data(directory_name: str) -> DataFrame:
381 """Parses the data in a GPUMD output directory
382 and returns the content in the form of a :class:`DataFrame`.
383 This function reads the ``thermo.out`` and ``run.in`` files,
384 and returns the thermodynamic data including the time (in ps),
385 the pressure (in GPa), the side lengths of the simulation cell
386 (in Å), and the volume (in Å:sup:`3`).
388 Parameters
389 ----------
390 directory_name
391 Path to directory to be parsed.
393 Returns
394 -------
395 :class:`DataFrame` containing (augmented) thermodynamic data.
396 """
398 try:
399 params = read_runfile(f'{directory_name}/run.in')
400 except FileNotFoundError:
401 raise FileNotFoundError(f'No `run.in` file found in {directory_name}')
403 blocks = []
404 time_step = 1.0 # GPUMD default
405 dump_thermo = None
406 for p, v in params:
407 if p == 'time_step':
408 time_step = v
409 elif p == 'dump_thermo':
410 dump_thermo = v
411 elif p == 'run':
412 if dump_thermo is None:
413 raise ValueError(
414 'Could not extract value of `dump_thermo` keyword from `run.in` file.')
415 # We do not require dump_thermo to exist for subsequent
416 # runs if it has been used for atleast one before.
417 # But if there has been no new dump_thermo, we
418 # should not create a block.
419 if (dump_thermo != 'DEFINEDONCE'):
420 blocks.append(dict(
421 nsteps=v,
422 time_step=time_step,
423 dump_thermo=dump_thermo,
424 ))
425 dump_thermo = 'DEFINEDONCE'
427 try:
428 df = read_thermo(f'{directory_name}/thermo.out')
429 except FileNotFoundError:
430 raise FileNotFoundError(f'No `thermo.out` file found in {directory_name}')
432 expected_rows = sum([int(round(b['nsteps'] / b['dump_thermo'], 0))
433 for b in blocks if b['dump_thermo'] is not None])
434 if len(df) != expected_rows:
435 warn(f'Number of rows in `thermo.out` file ({len(df)}) does not match'
436 f' expectation based on `run.in` file ({expected_rows}).')
437 if len(df) > expected_rows:
438 raise ValueError(f'Too many rows in `thermo.out` file ({len(df)}) compared to'
439 f' expectation based on `run.in` file ({expected_rows}).')
441 # Fewer rows than expected should be ok, since the run may have crashed/not completed yet.
442 times = []
443 offset = 0.0
444 for b in blocks:
445 ns = int(round(b['nsteps'] / b['dump_thermo'], 0))
446 block_times = np.array(range(1, 1 + ns)) \
447 * b['dump_thermo'] * b['time_step'] * 1e-3 # in ps
448 block_times += offset
449 times.extend(block_times)
450 offset = times[-1]
451 df['time'] = times[:len(df)]
453 df['pressure'] = (df.stress_xx + df.stress_yy + df.stress_zz) / 3
454 if 'cell_xy' in df:
455 df['alat'] = np.sqrt(df.cell_xx ** 2 + df.cell_xy ** 2 + df.cell_xz ** 2)
456 df['blat'] = np.sqrt(df.cell_yx ** 2 + df.cell_yy ** 2 + df.cell_yz ** 2)
457 df['clat'] = np.sqrt(df.cell_zx ** 2 + df.cell_zy ** 2 + df.cell_zz ** 2)
458 volume = (df.cell_xx * df.cell_yy * df.cell_zz +
459 df.cell_xy * df.cell_yz * df.cell_xz +
460 df.cell_xz * df.cell_yx * df.cell_zy -
461 df.cell_xx * df.cell_yz * df.cell_zy -
462 df.cell_xy * df.cell_yx * df.cell_zz -
463 df.cell_xz * df.cell_yy * df.cell_zx)
464 else:
465 df['alat'] = df.cell_xx
466 df['blat'] = df.cell_yy
467 df['clat'] = df.cell_zz
468 volume = (df.cell_xx * df.cell_yy * df.cell_zz)
469 df['volume'] = volume
471 return df