Coverage for calorine/gpumd/io.py: 100%

157 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2024-07-26 09:34 +0000

1from warnings import warn 

2from collections.abc import Iterable 

3from pathlib import Path 

4from typing import List, Tuple, Union 

5 

6import numpy as np 

7from ase import Atoms 

8from ase.io import read, write 

9from pandas import DataFrame 

10 

11 

12def read_kappa(filename: str) -> DataFrame: 

13 """Parses a file in ``kappa.out`` format from GPUMD and returns the 

14 content as a data frame. More information concerning file format, 

15 content and units can be found `here 

16 <https://gpumd.org/gpumd/output_files/kappa_out.html>`__. 

17 

18 Parameters 

19 ---------- 

20 filename 

21 Input file name. 

22 """ 

23 data = np.loadtxt(filename) 

24 if isinstance(data[0], np.float64): 

25 # If only a single row in kappa.out, append a dimension 

26 data = data.reshape(1, -1) 

27 tags = 'kx_in kx_out ky_in ky_out kz_tot'.split() 

28 if len(data[0]) != len(tags): 

29 raise ValueError( 

30 f'Input file contains {len(data[0])} data columns.' 

31 f' Expected {len(tags)} columns.' 

32 ) 

33 df = DataFrame(data=data, columns=tags) 

34 df['kx_tot'] = df.kx_in + df.kx_out 

35 df['ky_tot'] = df.ky_in + df.ky_out 

36 return df 

37 

38 

39def read_hac(filename: str) -> DataFrame: 

40 """Parses a file in ``hac.out`` format from GPUMD and returns the 

41 content as a data frame. More information concerning file format, 

42 content and units can be found `here 

43 <https://gpumd.org/gpumd/output_files/hac_out.html>`__. 

44 

45 Parameters 

46 ---------- 

47 filename 

48 Input file name. 

49 """ 

50 data = np.loadtxt(filename) 

51 if isinstance(data[0], np.float64): 

52 # If only a single row in hac.out, append a dimension 

53 data = data.reshape(1, -1) 

54 tags = 'time' 

55 tags += ' jin_jtot_x jout_jtot_x jin_jtot_y jout_jtot_y jtot_jtot_z' 

56 tags += ' kx_in kx_out ky_in ky_out kz_tot' 

57 tags = tags.split() 

58 if len(data[0]) != len(tags): 

59 raise ValueError( 

60 f'Input file contains {len(data[0])} data columns.' 

61 f' Expected {len(tags)} columns.' 

62 ) 

63 df = DataFrame(data=data, columns=tags) 

64 df['kx_tot'] = df.kx_in + df.kx_out 

65 df['ky_tot'] = df.ky_in + df.ky_out 

66 # remove columns with less relevant data to save space 

67 for col in df: 

68 if 'jtot' in col or '_in' in col: 

69 del df[col] 

70 return df 

71 

72 

73def read_thermo(filename: str, natoms: int = 1) -> DataFrame: 

74 """Parses a file in ``thermo.out`` format from GPUMD and returns the 

75 content as a data frame. More information concerning file format, 

76 content and units can be found `here 

77 <https://gpumd.org/gpumd/output_files/thermo_out.html>`__. 

78 

79 Parameters 

80 ---------- 

81 filename 

82 Input file name. 

83 natoms 

84 Number of atoms; used to normalize energies. 

85 """ 

86 data = np.loadtxt(filename) 

87 if isinstance(data[0], np.float64): 

88 # If only a single row in loss.out, append a dimension 

89 data = data.reshape(1, -1) 

90 if len(data[0]) == 9: 

91 # orthorhombic box 

92 tags = 'temperature kinetic_energy potential_energy' 

93 tags += ' stress_xx stress_yy stress_zz' 

94 tags += ' cell_xx cell_yy cell_zz' 

95 elif len(data[0]) == 12: 

96 # orthorhombic box with stresses in Voigt notation (v3.3.1+) 

97 tags = 'temperature kinetic_energy potential_energy' 

98 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

99 tags += ' cell_xx cell_yy cell_zz' 

100 elif len(data[0]) == 15: 

101 # triclinic box 

102 tags = 'temperature kinetic_energy potential_energy' 

103 tags += ' stress_xx stress_yy stress_zz' 

104 tags += ( 

105 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

106 ) 

107 elif len(data[0]) == 18: 

108 # triclinic box with stresses in Voigt notation (v3.3.1+) 

109 tags = 'temperature kinetic_energy potential_energy' 

110 tags += ' stress_xx stress_yy stress_zz stress_yz stress_xz stress_xy' 

111 tags += ( 

112 ' cell_xx cell_xy cell_xz cell_yx cell_yy cell_yz cell_zx cell_zy cell_zz' 

113 ) 

114 else: 

115 raise ValueError( 

116 f'Input file contains {len(data[0])} data columns.' 

117 ' Expected 9, 12, 15 or 18 columns.' 

118 ) 

119 df = DataFrame(data=data, columns=tags.split()) 

120 assert natoms > 0, 'natoms must be positive' 

121 df.kinetic_energy /= natoms 

122 df.potential_energy /= natoms 

123 return df 

124 

125 

126def read_xyz(filename: str) -> Atoms: 

127 """ 

128 Reads the structure input file (``model.xyz``) for GPUMD and returns the 

129 structure. 

130 

131 This is a wrapper function around :func:`ase.io.read_xyz` since the ASE implementation does 

132 not read velocities properly. 

133 

134 Parameters 

135 ---------- 

136 filename 

137 Name of file from which to read the structure. 

138 

139 Returns 

140 ------- 

141 Structure as ASE Atoms object with additional per-atom arrays 

142 representing atomic masses, velocities etc. 

143 """ 

144 structure = read(filename, format='extxyz') 

145 if structure.has('vel'): 

146 structure.set_velocities(structure.get_array('vel')) 

147 return structure 

148 

149 

150def read_runfile(filename: str) -> List[Tuple[str, list]]: 

151 """ 

152 Parses a GPUMD input file in ``run.in`` format and returns the 

153 content in the form a list of keyword-value pairs. 

154 

155 Parameters 

156 ---------- 

157 filename 

158 Input file name. 

159 

160 Returns 

161 ------- 

162 List of keyword-value pairs. 

163 """ 

164 data = [] 

165 with open(filename, 'r') as f: 

166 for k, line in enumerate(f.readlines()): 

167 flds = line.split() 

168 if len(flds) == 0: 

169 continue 

170 elif len(flds) == 1: 

171 raise ValueError(f'Line {k} contains only one field:\n{line}') 

172 keyword = flds[0] 

173 values = tuple(flds[1:]) 

174 if keyword in ['time_step', 'velocity']: 

175 values = float(values[0]) 

176 elif keyword in ['dump_thermo', 'dump_position', 'dump_restart', 'run']: 

177 values = int(values[0]) 

178 elif len(values) == 1: 

179 values = values[0] 

180 data.append((keyword, values)) 

181 return data 

182 

183 

184def write_runfile( 

185 file: Path, parameters: List[Tuple[str, Union[int, float, Tuple[str, float]]]] 

186): 

187 """Write a file in run.in format to define input parameters for MD simulation. 

188 

189 Parameters 

190 ---------- 

191 file 

192 Path to file to be written. 

193 

194 parameters : dict 

195 Defines all key-value pairs used in run.in file 

196 (see GPUMD documentation for a complete list). 

197 Values can be either floats, integers, or lists/tuples. 

198 """ 

199 

200 with open(file, 'w') as f: 

201 # Write all keywords with parameter(s) 

202 for key, val in parameters: 

203 f.write(f'{key} ') 

204 if isinstance(val, Iterable) and not isinstance(val, str): 

205 for v in val: 

206 f.write(f'{v} ') 

207 else: 

208 f.write(f'{val}') 

209 f.write('\n') 

210 

211 

212def write_xyz(filename: str, structure: Atoms, groupings: List[List[List[int]]] = None): 

213 """ 

214 Writes a structure into GPUMD input format (`model.xyz`). 

215 

216 Parameters 

217 ---------- 

218 filename 

219 Name of file to which the structure should be written. 

220 structure 

221 Input structure. 

222 groupings 

223 Groups into which the individual atoms should be divided in the form of 

224 a list of list of lists. Specifically, the outer list corresponds to 

225 the grouping methods, of which there can be three at the most, which 

226 contains a list of groups in the form of lists of site indices. The 

227 sum of the lengths of the latter must be the same as the total number 

228 of atoms. 

229 

230 Raises 

231 ------ 

232 ValueError 

233 Raised if parameters are incompatible. 

234 """ 

235 # Make a local copy of the atoms object 

236 _structure = structure.copy() 

237 

238 # Check velocties parameter 

239 velocities = _structure.get_velocities() 

240 if velocities is None or np.max(np.abs(velocities)) < 1e-6: 

241 has_velocity = 0 

242 else: 

243 has_velocity = 1 

244 

245 # Check groupings parameter 

246 if groupings is None: 

247 number_of_grouping_methods = 0 

248 else: 

249 number_of_grouping_methods = len(groupings) 

250 if number_of_grouping_methods > 3: 

251 raise ValueError('There can be no more than 3 grouping methods!') 

252 for g, grouping in enumerate(groupings): 

253 all_indices = [i for group in grouping for i in group] 

254 if len(all_indices) != len(_structure) or set(all_indices) != set( 

255 range(len(_structure)) 

256 ): 

257 raise ValueError( 

258 f'The indices listed in grouping method {g} are' 

259 ' not compatible with the input structure!' 

260 ) 

261 

262 # Allowed keyword=value pairs. Use ASEs extyz write functionality. 

263 # pbc="pbc_a pbc_b pbc_c" 

264 # lattice="ax ay az bx by bz cx cy cz" 

265 # properties=property_name:data_type:number_of_columns 

266 # species:S:1 

267 # pos:R:3 

268 # mass:R:1 

269 # vel:R:3 

270 # group:I:number_of_grouping_methods 

271 if _structure.has('mass'): 

272 # If structure already has masses set, use those 

273 warn('Structure already has array "mass"; will use existing values.') 

274 else: 

275 _structure.new_array('mass', _structure.get_masses()) 

276 

277 if has_velocity: 

278 _structure.new_array('vel', _structure.get_velocities()) 

279 if groupings is not None: 

280 group_indices = np.array( 

281 [ 

282 [ 

283 [ 

284 group_index 

285 for group_index, group in enumerate(grouping) 

286 if structure_idx in group 

287 ] 

288 for grouping in groupings 

289 ] 

290 for structure_idx in range(len(_structure)) 

291 ] 

292 ).squeeze() # pythoniccc 

293 _structure.new_array('group', group_indices) 

294 

295 write(filename=filename, images=_structure, write_info=True, format='extxyz') 

296 

297 

298def read_mcmd(filename: str, accumulate: bool = True) -> DataFrame: 

299 """Parses a Monte Carlo output file in ``mcmd.out`` format 

300 and returns the content in the form of a DataFrame. 

301 

302 Parameters 

303 ---------- 

304 filename 

305 Path to file to be parsed. 

306 accumulate 

307 If ``True`` the MD steps between subsequent Monte Carlo 

308 runs in the same output file will be accumulated. 

309 

310 Returns 

311 ------- 

312 DataFrame containing acceptance ratios and concentrations (if available), 

313 as well as key Monte Carlo parameters. 

314 """ 

315 with open(filename, 'r') as f: 

316 lines = f.readlines() 

317 data = [] 

318 offset = 0 

319 step = 0 

320 accummulated_step = 0 

321 for line in lines: 

322 if line.startswith('# mc'): 

323 flds = line.split() 

324 mc_type = flds[2] 

325 md_steps = int(flds[3]) 

326 mc_trials = int(flds[4]) 

327 temperature_initial = float(flds[5]) 

328 temperature_final = float(flds[6]) 

329 if mc_type.endswith('sgc'): 

330 ntypes = int(flds[7]) 

331 species = [flds[8+2*k] for k in range(ntypes)] 

332 phis = {f'phi_{flds[8+2*k]}': float(flds[9+2*k]) for k in range(ntypes)} 

333 kappa = float(flds[8+2*ntypes]) if mc_type == 'vcsgc' else np.nan 

334 elif line.startswith('# num_MD_steps'): 

335 continue 

336 else: 

337 flds = line.split() 

338 previous_step = step 

339 step = int(flds[0]) 

340 if step <= previous_step and accumulate: 

341 offset += previous_step 

342 accummulated_step = step + offset 

343 record = dict( 

344 step=accummulated_step, 

345 mc_type=mc_type, 

346 md_steps=md_steps, 

347 mc_trials=mc_trials, 

348 temperature_initial=temperature_initial, 

349 temperature_final=temperature_final, 

350 acceptance_ratio=float(flds[1]), 

351 ) 

352 if mc_type.endswith('sgc'): 

353 record.update(phis) 

354 if mc_type == 'vcsgc': 

355 record['kappa'] = kappa 

356 concentrations = {f'conc_{s}': float(flds[k]) 

357 for k, s in enumerate(species, start=2)} 

358 record.update(concentrations) 

359 data.append(record) 

360 df = DataFrame.from_dict(data) 

361 return df