Source code for calorine.nep.io

from os.path import exists
from os.path import join as join_path
from typing import Any, Dict, Iterable, List, NamedTuple, TextIO, Tuple
from warnings import warn

import numpy as np
from ase import Atoms
from ase.io import read, write
from ase.stress import voigt_6_to_full_3x3_stress
from ase.units import GPa
from pandas import DataFrame


[docs]def read_loss(filename: str) -> DataFrame: """Parses a file in ``loss.out`` format from GPUMD and returns the content as a data frame. More information concerning file format, content and units can be found `here <https://gpumd.org/nep/output_files/loss_out.html>`__. Parameters ---------- filename input file name """ data = np.loadtxt(filename) if isinstance(data[0], np.float64): # If only a single row in loss.out, append a dimension data = data.reshape(1, -1) if len(data[0]) == 6: tags = 'total_loss L1 L2' tags += ' RMSE_P_train' tags += ' RMSE_P_test' elif len(data[0]) == 10: tags = 'total_loss L1 L2' tags += ' RMSE_E_train RMSE_F_train RMSE_V_train' tags += ' RMSE_E_test RMSE_F_test RMSE_V_test' else: raise ValueError( f'Input file contains {len(data[0])} data columns. Expected 6 or 10 columns.' ) generations = range(100, len(data) * 100 + 1, 100) df = DataFrame(data=data[:, 1:], columns=tags.split(), index=generations) return df
def _write_structure_in_nep_format(structure: Atoms, f: TextIO) -> None: """Write structure block into a file-like object in format readable by nep executable. Parameters ---------- structure input structure; must hold information regarding energy and forces f file-like object to which to write """ # Allowed keyword=value pairs. Use ASEs extyz write functionality.: # lattice="ax ay az bx by bz cx cy cz" (mandatory) # energy=energy_value (mandatory) # virial="vxx vxy vxz vyx vyy vyz vzx vzy vzz" (optional) # weight=relative_weight (optional) # properties=property_name:data_type:number_of_columns # species:S:1 (mandatory) # pos:R:3 (mandatory) # force:R:3 or forces:R:3 (mandatory) try: energy = structure.get_potential_energy() structure.get_forces() # calculate forces to have them on the Atoms object structure.info['energy'] = energy except RuntimeError: raise RuntimeError('Failed to retrieve energy and/or forces for structure') if np.isclose(structure.get_volume(), 0): raise ValueError('Structure cell must have a non-zero volume!') try: stress = structure.get_stress(voigt=False).flatten() stress = ' '.join([f'{s:.6f}' for s in stress]) structure.info['stress'] = f'"{stress}"' except RuntimeError: warn('Failed to retrieve stresses for structure') write(filename=f, images=structure, write_info=True, format='extxyz')
[docs]def write_structures(outfile: str, structures: List[Atoms]) -> None: """Writes structures for training/testing in format readable by nep executable. Parameters ---------- outfile output filename structures list of structures with energy, forces, and (possibly) stresses """ with open(outfile, 'w') as f: for structure in structures: _write_structure_in_nep_format(structure, f)
[docs]def write_nepfile(parameters: NamedTuple, dirname: str) -> None: """Writes parameters file for NEP construction. Parameters ---------- parameters input parameters; see `here <https://gpumd.org/nep/input_parameters/index.html>`__ dirname directory in which to place input file and links """ with open(join_path(dirname, 'nep.in'), 'w') as f: for key, val in parameters.items(): f.write(f'{key} ') if isinstance(val, Iterable): f.write(' '.join([f'{v}' for v in val])) else: f.write(f'{val}') f.write('\n')
[docs]def read_nepfile(filename: str) -> Dict[str, Any]: """Returns the content of a configuration file (`nep.in`) as a dictionary. Parameters ---------- filename input file name """ settings = {} with open(filename) as f: for line in f.readlines(): flds = line.split() if len(flds) == 0: continue if flds[0].startswith('#'): continue settings[flds[0]] = ' '.join(flds[1:]) for key, val in settings.items(): if key in ['version', 'neuron', 'generation', 'batch', 'population', 'mode', 'model_type']: settings[key] = int(val) elif key in [ 'lambda_1', 'lambda_2', 'lambda_e', 'lambda_f', 'lambda_v', 'lambda_shear', 'force_delta', ]: settings[key] = float(val) elif key in ['cutoff', 'n_max', 'l_max', 'basis_size', 'zbl', 'type_weight']: settings[key] = [float(v) for v in val.split()] elif key == 'type': types = val.split() types[0] = int(types[0]) settings[key] = types return settings
[docs]def read_structures(dirname: str) -> Tuple[List[Atoms], List[Atoms]]: """Parses the ``energy_*.out``, ``force_*.out``, ``virial_*.out``, ``polarizability_*.out`` and ``dipole_*.out`` files from a nep run and returns their content as lists. The first and second list contain the structures from the training and test sets, respectively. Each list entry corresponds to an ASE Atoms object, which in turn contains predicted and target energies, forces and virials/stresses or polarizability/diople stored in the `info` property. Parameters ---------- dirname directory from which to read output files """ path = join_path(dirname) if not exists(path): raise ValueError(f'Directory {path} does not exist') nep_info = read_nepfile(f'{path}/nep.in') if 'mode' in nep_info or 'model_type' in nep_info: ftype = nep_info.get('mode', nep_info.get('model_type')) if ftype == 2 or ftype == 1: return _read_structures_tensors(dirname, ftype) return _read_structures_potential(dirname)
def _read_structures_tensors(dirname: str, ftype: int) \ -> Tuple[List[Atoms], List[Atoms]]: """Parses the ``polarizability_*.out`` and ``dipole_*.out`` files from a nep run and returns their content as lists. The first and second list contain the structures from the training and test sets, respectively. Each list entry corresponds to an ASE Atoms object, which in turn contains predicted and target dipole or polarizability stored in the `info` property. Parameters ---------- dirname directory from which to read output files """ path = join_path(dirname) structures = {} if ftype == 1: sname = 'dipole' else: sname = 'polarizability' for stype in ['train', 'test']: filename = join_path(dirname, f'{stype}.xyz') try: structures[stype] = read(filename, format='extxyz', index=':') except FileNotFoundError: warn(f'File {filename} not found.') structures[stype] = [] continue n_structures = len(structures[stype]) ts, ps = _read_data_file(path, f'{sname}_{stype}.out') if ftype == 1: ts = np.array(ts).reshape((-1, 3)) ps = np.array(ps).reshape((-1, 3)) else: ts = np.array(ts).reshape((-1, 6)) ps = np.array(ps).reshape((-1, 6)) assert len(ts) == n_structures, \ f'Number of structures in {sname}_{stype}.out ({len(ts)})' \ f' and {stype}.xyz ({n_structures}) inconsistent' for structure, t, p in zip(structures[stype], ts, ps): if ftype == 1: assert np.shape(t) == (3,) assert np.shape(p) == (3,) else: assert np.shape(t) == (6,) assert np.shape(p) == (6,) structure.info[f'{sname}_target'] = t structure.info[f'{sname}_predicted'] = p return structures['train'], structures['test'] def _read_structures_potential(dirname: str) -> Tuple[List[Atoms], List[Atoms]]: """Parses the ``energy_*.out``, ``force_*.out``, ``virial_*.out`` files from a nep run and returns their content as lists. The first and second list contain the structures from the training and test sets, respectively. Each list entry corresponds to an ASE Atoms object, which in turn contains predicted and target energies, forces and virials/stresses stored in the `info` property. Parameters ---------- dirname directory from which to read output files """ path = join_path(dirname) structures = {} for stype in ['train', 'test']: file_path = join_path(dirname, f'{stype}.xyz') if not exists(file_path): warn(f'File {file_path} not found.') structures[stype] = [] continue structures[stype] = read( file_path, format='extxyz', index=':' ) ts, ps = _read_data_file(path, f'energy_{stype}.out') n_structures = len(structures[stype]) assert len(ts) == n_structures, ( f'Number of structures in energy_{stype}.out ({len(ts)})' f' and {stype}.xyz ({n_structures}) inconsistent' ) for structure, t, p in zip(structures[stype], ts, ps): structure.info['energy_target'] = t structure.info['energy_predicted'] = p ts, ps = _read_data_file(path, f'force_{stype}.out') n_atoms_total = sum([len(s) for s in structures[stype]]) assert len(ts) == n_atoms_total, ( f'Number of structures in force_{stype}.out ({len(ts)})' f' and {stype}.xyz ({n_structures}) inconsistent' ) n = 0 for structure in structures[stype]: nat = len(structure) structure.info['force_target'] = np.array(ts[n: n + nat]).reshape(nat, 3) structure.info['force_predicted'] = np.array(ps[n: n + nat]).reshape( nat, 3 ) n += nat ts, ps = _read_data_file(path, f'virial_{stype}.out') ts, ps = np.array(ts), np.array(ps) N = len(structures[stype]) if ts.shape == (6*N,): # GPUMD <=v3.6 style virial_*.out # First column are NEP predictions, second are targets # Order: First N values are xx, second are yy etc. ts = np.array(ts).reshape((6, -1)).T # GPUMD 3.6 compatibility ps = np.array(ps).reshape((6, -1)).T elif ts.shape == (N, 6): # GPUMD >=v3.7 style virial_*.out # First 6 columns are NEP predictions, last 6 are targets # Order: xx, yy, zz, xy, yz, zx pass else: raise ValueError(f'virial_*.out has invalid shape, {ts.shape}') assert len(ts) == n_structures, \ f'Number of structures in virial_{stype}.out ({len(ts)})' \ f' and {stype}.xyz ({n_structures}) inconsistent' for structure, t, p in zip(structures[stype], ts, ps): assert np.shape(t) == (6,) structure.info['virial_target'] = t structure.info['virial_predicted'] = p conv = len(structure) / structure.get_volume() / GPa structure.info['stress_target'] = t * conv structure.info['stress_predicted'] = p * conv return structures['train'], structures['test'] def _read_data_file(dirname: str, fname: str): """Private function that parses energy/force/virial_*.out files and returns their content for further processing. """ path = join_path(dirname, fname) if not exists(path): raise ValueError(f'Directory {path} does not exist') with open(path, 'r') as f: lines = f.readlines() target, predicted = [], [] for line in lines: flds = line.split() if len(flds) == 12: # Virial after GPUMD 3.7 predicted.append([float(s) for s in flds[0:6]]) target.append([float(s) for s in flds[6:12]]) elif len(flds) == 6: # Force predicted.append([float(s) for s in flds[0:3]]) target.append([float(s) for s in flds[3:6]]) elif len(flds) == 2: # Energy, virial before GPUMD 3.7 predicted.append(float(flds[0])) target.append(float(flds[1])) else: raise ValueError(f'Malformed file: {path}') return target, predicted
[docs]def get_parity_data( structures: List[Atoms], property: str, selection: List[str] = None, flatten: bool = True, ) -> DataFrame: """Returns the predicted and target energies, forces, virials or stresses from a list of structures in a format suitable for generating parity plots. The structures should have been read using :func:`read_structures <calorine.nep.read_structures>`, such that the ``info``-object is populated with keys on the form ``<property>_<type>`` where ``<property>`` is one of ``energy``, ``force``, ``virial``, and stress, and ``<type>`` is one of ``predicted`` or ``target``. The resulting parity data is returned as a tuple of dicts, where each entry corresponds to a list. Parameters ---------- structures List of structures as read with :func:`read_structures <calorine.nep.read_structures>`. property One of ``energy``, ``force``, ``virial``, ``stress``, ``polarizability``, ``dipole``. selection A list containing which components to return, and/or the absolute value. Possible values are ``x``, ``y``, ``z``, ``xx``, ``yy``, ``zz``, ``yz``, ``xz``, ``xy``, ``abs``, ``pressure``. flatten if True return flattened lists; this is useful for flattening the components of force or virials into a simple list """ data = {'predicted': [], 'target': []} voigt_mapping = { 'x': 0, 'y': 1, 'z': 2, 'xx': 0, 'yy': 1, 'zz': 2, 'yz': 3, 'xz': 4, 'xy': 5, } if property not in ('energy', 'force', 'virial', 'stress', 'polarizability', 'dipole'): raise ValueError( "`property` must be one of 'energy', 'force', 'virial', 'stress'," " 'polarizability', 'dipole'." ) if property == 'energy' and selection: raise ValueError('Selection does nothing for scalar-valued `energy`.') if property != 'stress' and selection and 'pressure' in selection: raise ValueError(f'Cannot calculate pressure for `{property}`.') for structure in structures: for stype in data: property_with_stype = f'{property}_{stype}' if property_with_stype not in structure.info.keys(): raise KeyError(f'{property_with_stype} does not exist in info object!') extracted_property = np.array(structure.info[property_with_stype]) if selection is None or len(selection) == 0: data[stype].append(extracted_property) continue selected_values = [] for select in selection: if property == 'force': # flip to get (n_components, n_structures) extracted_property = extracted_property.T if select == 'abs': if property == 'force': selected_values.append(np.linalg.norm(extracted_property, axis=0)) else: # property can only be in ('virial', 'stress') full_tensor = voigt_6_to_full_3x3_stress(extracted_property) selected_values.append(np.linalg.norm(full_tensor)) continue if select == 'pressure' and property == 'stress': total_stress = extracted_property selected_values.append(-np.sum(total_stress[:3]) / 3) continue if select not in voigt_mapping: raise ValueError(f'Selection `{select}` is not allowed.') index = voigt_mapping[select] if index >= extracted_property.shape[0]: raise ValueError( f'Selection `{select}` is not compatible with property `{property}`.' ) selected_values.append(extracted_property[index]) data[stype].append(selected_values) if flatten: for key, value in data.items(): if len(np.shape(value[0])) > 0: data[key] = np.concatenate(value).ravel().tolist() df = DataFrame(data) # In case of flatten, cast to float64 for compatibility # with e.g. seaborn. # Casting in this way breaks tensorial properties though, # so skip it there. if flatten: df['target'] = df.target.astype('float64') df['predicted'] = df.predicted.astype('float64') return df