Coverage for calorine/gpumd/io.py: 100%

1from warnings import warn

2from collections.abc import Iterable

3from pathlib import Path

4from typing import List, Tuple, Union

6import numpy as np

7from ase import Atoms

8from ase.io import read, write

9from pandas import DataFrame

12def read_kappa(filename: str) -> DataFrame:

13 """Parses a file in ``kappa.out`` format from GPUMD and returns the

14 content as a data frame. More information concerning file format,

15 content and units can be found `here

16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__.

18 Parameters

19 ----------

20 filename

21 Input file name.

22 """

23 data = np.loadtxt(filename)

24 if isinstance(data[0], np.float64):

25 # If only a single row in kappa.out, append a dimension

26 data = data.reshape(1, -1)

27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split()

28 if len(data[0]) != len(tags):

29 raise ValueError(

30 f'Input file contains {len(data[0])} data columns.'

31 f' Expected {len(tags)} columns.'

32 )

33 df = DataFrame(data=data, columns=tags)

34 df['kx_tot'] = df.kx_in + df.kx_out

35 df['ky_tot'] = df.ky_in + df.ky_out

36 return df

39def read_hac(filename: str) -> DataFrame:

40 """Parses a file in ``hac.out`` format from GPUMD and returns the

41 content as a data frame. More information concerning file format,

42 content and units can be found `here

43 <https://gpumd.org/gpumd/output_files/hac_out.html>`__.

45 Parameters

46 ----------

47 filename

48 Input file name.

49 """

50 data = np.loadtxt(filename)

51 if isinstance(data[0], np.float64):

52 # If only a single row in hac.out, append a dimension

53 data = data.reshape(1, -1)

54 tags = 'time'

55 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z'

56 tags += ' kx_in kx_out ky_in ky_out kz_tot'

57 tags = tags.split()

58 if len(data[0]) != len(tags):

59 raise ValueError(

60 f'Input file contains {len(data[0])} data columns.'

61 f' Expected {len(tags)} columns.'

62 )

63 df = DataFrame(data=data, columns=tags)

64 df['kx_tot'] = df.kx_in + df.kx_out

65 df['ky_tot'] = df.ky_in + df.ky_out

66 # remove columns with less relevant data to save space

67 for col in df:

68 if 'jtot' in col or '_in' in col:

69 del df[col]

70 return df

73def read_thermo(filename: str, natoms: int = 1) -> DataFrame:

74 """Parses a file in ``thermo.out`` format from GPUMD and returns the

75 content as a data frame. More information concerning file format,

76 content and units can be found `here

77 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__.

79 Parameters

80 ----------

81 filename

82 Input file name.

83 natoms

84 Number of atoms; used to normalize energies.

85 """

86 data = np.loadtxt(filename)

87 if isinstance(data[0], np.float64):

88 # If only a single row in loss.out, append a dimension

89 data = data.reshape(1, -1)

90 if len(data[0]) == 9:

91 # orthorhombic box

92 tags = 'temperature kinetic_energy potential_energy'

93 tags += ' stress_xx stress_yy stress_zz'

94 tags += ' cell_xx cell_yy cell_zz'

95 elif len(data[0]) == 12:

96 # orthorhombic box with stresses in Voigt notation (v3.3.1+)

97 tags = 'temperature kinetic_energy potential_energy'

98 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'

99 tags += ' cell_xx cell_yy cell_zz'

100 elif len(data[0]) == 15:

101 # triclinic box

102 tags = 'temperature kinetic_energy potential_energy'

103 tags += ' stress_xx stress_yy stress_zz'

104 tags += (

105 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'

106 )

107 elif len(data[0]) == 18:

108 # triclinic box with stresses in Voigt notation (v3.3.1+)

109 tags = 'temperature kinetic_energy potential_energy'

110 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy'

111 tags += (

112 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz'

113 )

114 else:

115 raise ValueError(

116 f'Input file contains {len(data[0])} data columns.'

117 ' Expected 9, 12, 15 or 18 columns.'

118 )

119 df = DataFrame(data=data, columns=tags.split())

120 assert natoms > 0, 'natoms must be positive'

121 df.kinetic_energy /= natoms

122 df.potential_energy /= natoms

123 return df

124

125

126def read_xyz(filename: str) -> Atoms:

127 """

128 Reads the structure input file (``model.xyz``) for GPUMD and returns the

129 structure.

130

131 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does

132 not read velocities properly.

133

134 Parameters

135 ----------

136 filename

137 Name of file from which to read the structure.

138

139 Returns

140 -------

141 Structure as ASE Atoms object with additional per-atom arrays

142 representing atomic masses, velocities etc.

143 """

144 structure = read(filename, format='extxyz')

145 if structure.has('vel'):

146 structure.set_velocities(structure.get_array('vel'))

147 return structure

148

149

150def read_runfile(filename: str) -> List[Tuple[str, list]]:

151 """

152 Parses a GPUMD input file in ``run.in`` format and returns the

153 content in the form a list of keyword-value pairs.

154

155 Parameters

156 ----------

157 filename

158 Input file name.

159

160 Returns

161 -------

162 List of keyword-value pairs.

163 """

164 data = []

165 with open(filename, 'r') as f:

166 for k, line in enumerate(f.readlines()):

167 flds = line.split()

168 if len(flds) == 0:

169 continue

170 elif len(flds) == 1:

171 raise ValueError(f'Line {k} contains only one field:\n{line}')

172 keyword = flds[0]

173 values = tuple(flds[1:])

174 if keyword in ['time_step', 'velocity']:

175 values = float(values[0])

176 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']:

177 values = int(values[0])

178 elif len(values) == 1:

179 values = values[0]

180 data.append((keyword, values))

181 return data

182

183

184def write_runfile(

185 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]]

186):

187 """Write a file in run.in format to define input parameters for MD simulation.

188

189 Parameters

190 ----------

191 file

192 Path to file to be written.

193

194 parameters : dict

195 Defines all key-value pairs used in run.in file

196 (see GPUMD documentation for a complete list).

197 Values can be either floats, integers, or lists/tuples.

198 """

199

200 with open(file, 'w') as f:

201 # Write all keywords with parameter(s)

202 for key, val in parameters:

203 f.write(f'{key} ')

204 if isinstance(val, Iterable) and not isinstance(val, str):

205 for v in val:

206 f.write(f'{v} ')

207 else:

208 f.write(f'{val}')

209 f.write('\n')

210

211

212def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None):

213 """

214 Writes a structure into GPUMD input format (`model.xyz`).

215

216 Parameters

217 ----------

218 filename

219 Name of file to which the structure should be written.

220 structure

221 Input structure.

222 groupings

223 Groups into which the individual atoms should be divided in the form of

224 a list of list of lists. Specifically, the outer list corresponds to

225 the grouping methods, of which there can be three at the most, which

226 contains a list of groups in the form of lists of site indices. The

227 sum of the lengths of the latter must be the same as the total number

228 of atoms.

229

230 Raises

231 ------

232 ValueError

233 Raised if parameters are incompatible.

234 """

235 # Make a local copy of the atoms object

236 _structure = structure.copy()

237

238 # Check velocties parameter

239 velocities = _structure.get_velocities()

240 if velocities is None or np.max(np.abs(velocities)) < 1e-6:

241 has_velocity = 0

242 else:

243 has_velocity = 1

244

245 # Check groupings parameter

246 if groupings is None:

247 number_of_grouping_methods = 0

248 else:

249 number_of_grouping_methods = len(groupings)

250 if number_of_grouping_methods > 3:

251 raise ValueError('There can be no more than 3 grouping methods!')

252 for g, grouping in enumerate(groupings):

253 all_indices = [i for group in grouping for i in group]

254 if len(all_indices) != len(_structure) or set(all_indices) != set(

255 range(len(_structure))

256 ):

257 raise ValueError(

258 f'The indices listed in grouping method {g} are'

259 ' not compatible with the input structure!'

260 )

261

262 # Allowed keyword=value pairs. Use ASEs extyz write functionality.

263 # pbc="pbc_a pbc_b pbc_c"

264 # lattice="ax ay az bx by bz cx cy cz"

265 # properties=property_name:data_type:number_of_columns

266 # species:S:1

267 # pos:R:3

268 # mass:R:1

269 # vel:R:3

270 # group:I:number_of_grouping_methods

271 if _structure.has('mass'):

272 # If structure already has masses set, use those

273 warn('Structure already has array "mass"; will use existing values.')

274 else:

275 _structure.new_array('mass', _structure.get_masses())

276

277 if has_velocity:

278 _structure.new_array('vel', _structure.get_velocities())

279 if groupings is not None:

280 group_indices = np.array(

281 [

282 [

283 [

284 group_index

285 for group_index, group in enumerate(grouping)

286 if structure_idx in group

287 ]

288 for grouping in groupings

289 ]

290 for structure_idx in range(len(_structure))

291 ]

292 ).squeeze() # pythoniccc

293 _structure.new_array('group', group_indices)

294

295 write(filename=filename, images=_structure, write_info=True, format='extxyz')

296

297

298def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame:

299 """Parses a Monte Carlo output file in ``mcmd.out`` format

300 and returns the content in the form of a DataFrame.

301

302 Parameters

303 ----------

304 filename

305 Path to file to be parsed.

306 accumulate

307 If ``True`` the MD steps between subsequent Monte Carlo

308 runs in the same output file will be accumulated.

309

310 Returns

311 -------

312 DataFrame containing acceptance ratios and concentrations (if available),

313 as well as key Monte Carlo parameters.

314 """

315 with open(filename, 'r') as f:

316 lines = f.readlines()

317 data = []

318 offset = 0

319 step = 0

320 accummulated_step = 0

321 for line in lines:

322 if line.startswith('# mc'):

323 flds = line.split()

324 mc_type = flds[2]

325 md_steps = int(flds[3])

326 mc_trials = int(flds[4])

327 temperature_initial = float(flds[5])

328 temperature_final = float(flds[6])

329 if mc_type.endswith('sgc'):

330 ntypes = int(flds[7])

331 species = [flds[8+2*k] for k in range(ntypes)]

332 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)}

333 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan

334 elif line.startswith('# num_MD_steps'):

335 continue

336 else:

337 flds = line.split()

338 previous_step = step

339 step = int(flds[0])

340 if step <= previous_step and accumulate:

341 offset += previous_step

342 accummulated_step = step + offset

343 record = dict(

344 step=accummulated_step,

345 mc_type=mc_type,

346 md_steps=md_steps,

347 mc_trials=mc_trials,

348 temperature_initial=temperature_initial,

349 temperature_final=temperature_final,

350 acceptance_ratio=float(flds[1]),

351 )

352 if mc_type.endswith('sgc'):

353 record.update(phis)

354 if mc_type == 'vcsgc':

355 record['kappa'] = kappa

356 concentrations = {f'conc_{s}': float(flds[k])

357 for k, s in enumerate(species, start=2)}

358 record.update(concentrations)

359 data.append(record)

360 df = DataFrame.from_dict(data)

361 return df