Coverage for calorine/gpumd/io.py: 100%
157 statements
« prev ^ index » next coverage.py v7.2.5, created at 2024-07-26 09:34 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2024-07-26 09:34 +0000
1from warnings import warn
2from collections.abc import Iterable
3from pathlib import Path
4from typing import List, Tuple, Union
6import numpy as np
7from ase import Atoms
8from ase.io import read, write
9from pandas import DataFrame
12def read_kappa(filename: str) -> DataFrame:
13 """Parses a file in ``kappa.out`` format from GPUMD and returns the
14 content as a data frame. More information concerning file format,
15 content and units can be found `here
16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__.
18 Parameters
19 ----------
20 filename
21 Input file name.
22 """
23 data = np.loadtxt(filename)
24 if isinstance(data[0], np.float64):
25 # If only a single row in kappa.out, append a dimension
26 data = data.reshape(1, -1)
27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split()
28 if len(data[0]) != len(tags):
29 raise ValueError(
30 f'Input file contains {len(data[0])} data columns.'
31 f' Expected {len(tags)} columns.'
32 )
33 df = DataFrame(data=data, columns=tags)
34 df['kx_tot'] = df.kx_in + df.kx_out
35 df['ky_tot'] = df.ky_in + df.ky_out
36 return df
39def read_hac(filename: str) -> DataFrame:
40 """Parses a file in ``hac.out`` format from GPUMD and returns the
41 content as a data frame. More information concerning file format,
42 content and units can be found `here
43 <https://gpumd.org/gpumd/output_files/hac_out.html>`__.
45 Parameters
46 ----------
47 filename
48 Input file name.
49 """
50 data = np.loadtxt(filename)
51 if isinstance(data[0], np.float64):
52 # If only a single row in hac.out, append a dimension
53 data = data.reshape(1, -1)
54 tags = 'time'
55 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z'
56 tags += ' kx_in kx_out ky_in ky_out kz_tot'
57 tags = tags.split()
58 if len(data[0]) != len(tags):
59 raise ValueError(
60 f'Input file contains {len(data[0])} data columns.'
61 f' Expected {len(tags)} columns.'
62 )
63 df = DataFrame(data=data, columns=tags)
64 df['kx_tot'] = df.kx_in + df.kx_out
65 df['ky_tot'] = df.ky_in + df.ky_out
66 # remove columns with less relevant data to save space
67 for col in df:
68 if 'jtot' in col or '_in' in col:
69 del df[col]
70 return df
73def read_thermo(filename: str, natoms: int = 1) -> DataFrame:
74 """Parses a file in ``thermo.out`` format from GPUMD and returns the
75 content as a data frame. More information concerning file format,
76 content and units can be found `here
77 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__.
79 Parameters
80 ----------
81 filename
82 Input file name.
83 natoms
84 Number of atoms; used to normalize energies.
85 """
86 data = np.loadtxt(filename)
87 if isinstance(data[0], np.float64):
88 # If only a single row in loss.out, append a dimension
89 data = data.reshape(1, -1)
90 if len(data[0]) == 9:
91 # orthorhombic box
92 tags = 'temperature kinetic_energy potential_energy'
93 tags += ' stress_xx stress_yy stress_zz'
94 tags += ' cell_xx cell_yy cell_zz'
95 elif len(data[0]) == 12:
96 # orthorhombic box with stresses in Voigt notation (v3.3.1+)
97 tags = 'temperature kinetic_energy potential_energy'
98 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
99 tags += ' cell_xx cell_yy cell_zz'
100 elif len(data[0]) == 15:
101 # triclinic box
102 tags = 'temperature kinetic_energy potential_energy'
103 tags += ' stress_xx stress_yy stress_zz'
104 tags += (
105 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
106 )
107 elif len(data[0]) == 18:
108 # triclinic box with stresses in Voigt notation (v3.3.1+)
109 tags = 'temperature kinetic_energy potential_energy'
110 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'
111 tags += (
112 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'
113 )
114 else:
115 raise ValueError(
116 f'Input file contains {len(data[0])} data columns.'
117 ' Expected 9, 12, 15 or 18 columns.'
118 )
119 df = DataFrame(data=data, columns=tags.split())
120 assert natoms > 0, 'natoms must be positive'
121 df.kinetic_energy /= natoms
122 df.potential_energy /= natoms
123 return df
126def read_xyz(filename: str) -> Atoms:
127 """
128 Reads the structure input file (``model.xyz``) for GPUMD and returns the
129 structure.
131 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does
132 not read velocities properly.
134 Parameters
135 ----------
136 filename
137 Name of file from which to read the structure.
139 Returns
140 -------
141 Structure as ASE Atoms object with additional per-atom arrays
142 representing atomic masses, velocities etc.
143 """
144 structure = read(filename, format='extxyz')
145 if structure.has('vel'):
146 structure.set_velocities(structure.get_array('vel'))
147 return structure
150def read_runfile(filename: str) -> List[Tuple[str, list]]:
151 """
152 Parses a GPUMD input file in ``run.in`` format and returns the
153 content in the form a list of keyword-value pairs.
155 Parameters
156 ----------
157 filename
158 Input file name.
160 Returns
161 -------
162 List of keyword-value pairs.
163 """
164 data = []
165 with open(filename, 'r') as f:
166 for k, line in enumerate(f.readlines()):
167 flds = line.split()
168 if len(flds) == 0:
169 continue
170 elif len(flds) == 1:
171 raise ValueError(f'Line {k} contains only one field:\n{line}')
172 keyword = flds[0]
173 values = tuple(flds[1:])
174 if keyword in ['time_step', 'velocity']:
175 values = float(values[0])
176 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']:
177 values = int(values[0])
178 elif len(values) == 1:
179 values = values[0]
180 data.append((keyword, values))
181 return data
184def write_runfile(
185 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]]
186):
187 """Write a file in run.in format to define input parameters for MD simulation.
189 Parameters
190 ----------
191 file
192 Path to file to be written.
194 parameters : dict
195 Defines all key-value pairs used in run.in file
196 (see GPUMD documentation for a complete list).
197 Values can be either floats, integers, or lists/tuples.
198 """
200 with open(file, 'w') as f:
201 # Write all keywords with parameter(s)
202 for key, val in parameters:
203 f.write(f'{key} ')
204 if isinstance(val, Iterable) and not isinstance(val, str):
205 for v in val:
206 f.write(f'{v} ')
207 else:
208 f.write(f'{val}')
209 f.write('\n')
212def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None):
213 """
214 Writes a structure into GPUMD input format (`model.xyz`).
216 Parameters
217 ----------
218 filename
219 Name of file to which the structure should be written.
220 structure
221 Input structure.
222 groupings
223 Groups into which the individual atoms should be divided in the form of
224 a list of list of lists. Specifically, the outer list corresponds to
225 the grouping methods, of which there can be three at the most, which
226 contains a list of groups in the form of lists of site indices. The
227 sum of the lengths of the latter must be the same as the total number
228 of atoms.
230 Raises
231 ------
232 ValueError
233 Raised if parameters are incompatible.
234 """
235 # Make a local copy of the atoms object
236 _structure = structure.copy()
238 # Check velocties parameter
239 velocities = _structure.get_velocities()
240 if velocities is None or np.max(np.abs(velocities)) < 1e-6:
241 has_velocity = 0
242 else:
243 has_velocity = 1
245 # Check groupings parameter
246 if groupings is None:
247 number_of_grouping_methods = 0
248 else:
249 number_of_grouping_methods = len(groupings)
250 if number_of_grouping_methods > 3:
251 raise ValueError('There can be no more than 3 grouping methods!')
252 for g, grouping in enumerate(groupings):
253 all_indices = [i for group in grouping for i in group]
254 if len(all_indices) != len(_structure) or set(all_indices) != set(
255 range(len(_structure))
256 ):
257 raise ValueError(
258 f'The indices listed in grouping method {g} are'
259 ' not compatible with the input structure!'
260 )
262 # Allowed keyword=value pairs. Use ASEs extyz write functionality.
263 # pbc="pbc_a pbc_b pbc_c"
264 # lattice="ax ay az bx by bz cx cy cz"
265 # properties=property_name:data_type:number_of_columns
266 # species:S:1
267 # pos:R:3
268 # mass:R:1
269 # vel:R:3
270 # group:I:number_of_grouping_methods
271 if _structure.has('mass'):
272 # If structure already has masses set, use those
273 warn('Structure already has array "mass"; will use existing values.')
274 else:
275 _structure.new_array('mass', _structure.get_masses())
277 if has_velocity:
278 _structure.new_array('vel', _structure.get_velocities())
279 if groupings is not None:
280 group_indices = np.array(
281 [
282 [
283 [
284 group_index
285 for group_index, group in enumerate(grouping)
286 if structure_idx in group
287 ]
288 for grouping in groupings
289 ]
290 for structure_idx in range(len(_structure))
291 ]
292 ).squeeze() # pythoniccc
293 _structure.new_array('group', group_indices)
295 write(filename=filename, images=_structure, write_info=True, format='extxyz')
298def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame:
299 """Parses a Monte Carlo output file in ``mcmd.out`` format
300 and returns the content in the form of a DataFrame.
302 Parameters
303 ----------
304 filename
305 Path to file to be parsed.
306 accumulate
307 If ``True`` the MD steps between subsequent Monte Carlo
308 runs in the same output file will be accumulated.
310 Returns
311 -------
312 DataFrame containing acceptance ratios and concentrations (if available),
313 as well as key Monte Carlo parameters.
314 """
315 with open(filename, 'r') as f:
316 lines = f.readlines()
317 data = []
318 offset = 0
319 step = 0
320 accummulated_step = 0
321 for line in lines:
322 if line.startswith('# mc'):
323 flds = line.split()
324 mc_type = flds[2]
325 md_steps = int(flds[3])
326 mc_trials = int(flds[4])
327 temperature_initial = float(flds[5])
328 temperature_final = float(flds[6])
329 if mc_type.endswith('sgc'):
330 ntypes = int(flds[7])
331 species = [flds[8+2*k] for k in range(ntypes)]
332 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)}
333 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan
334 elif line.startswith('# num_MD_steps'):
335 continue
336 else:
337 flds = line.split()
338 previous_step = step
339 step = int(flds[0])
340 if step <= previous_step and accumulate:
341 offset += previous_step
342 accummulated_step = step + offset
343 record = dict(
344 step=accummulated_step,
345 mc_type=mc_type,
346 md_steps=md_steps,
347 mc_trials=mc_trials,
348 temperature_initial=temperature_initial,
349 temperature_final=temperature_final,
350 acceptance_ratio=float(flds[1]),
351 )
352 if mc_type.endswith('sgc'):
353 record.update(phis)
354 if mc_type == 'vcsgc':
355 record['kappa'] = kappa
356 concentrations = {f'conc_{s}': float(flds[k])
357 for k, s in enumerate(species, start=2)}
358 record.update(concentrations)
359 data.append(record)
360 df = DataFrame.from_dict(data)
361 return df