from dataclasses import dataclass
from itertools import product
from typing import Dict, List, Tuple
import numpy as np
def _get_model_type(first_row: List[str]) -> str:
"""Parses a the first row of a ``nep.txt`` file,
and returns the type of NEP model. Available types
are ``potential``, ``dipole`` and ``polarizability``.
Parameters
----------
first_row
first row of a NEP file, split by white space.
"""
model_type = first_row[0]
if 'dipole' in model_type:
return 'dipole'
elif 'polarizability' in model_type:
return 'polarizability'
return 'potential'
def _get_nep_contents(filename: str) -> Tuple[Dict, List]:
"""Parses a ``nep.txt`` file, and returns a dict describing the header
and an unformatted list of all model parameters.
Intended to be used by the :func:`read_model <calorine.nep.read_model>` function.
Parameters
----------
filename
input file name
"""
# parse file and split header and parameters
header = []
parameters = []
nheader = 5 # 5 rows for NEP2, 6-7 rows for NEP3 onwards
base_line = 3
with open(filename) as f:
for k, line in enumerate(f.readlines()):
flds = line.split()
assert len(flds) != 0, f'Empty line number {k}'
if k == 0 and 'zbl' in flds[0]:
base_line += 1
nheader += 1
if k == base_line and 'basis_size' in flds[0]:
# Introduced in nep.txt after GPUMD v3.2
nheader += 1
if k < nheader:
header.append(tuple(flds))
elif len(flds) == 1:
parameters.append(float(flds[0]))
else:
raise IOError(f'Failed to parse line {k} from {filename}')
# compile data from the header into a dict
data = {}
for flds in header:
if flds[0] in ['cutoff', 'zbl']:
data[flds[0]] = tuple(map(float, flds[1:]))
elif flds[0] in ['n_max', 'l_max', 'ANN', 'basis_size']:
data[flds[0]] = tuple(map(int, flds[1:]))
elif flds[0].startswith('nep'):
version = flds[0].replace('nep', '').split('_')[0]
if version == '':
# Case for NEP2
version = 2
else:
version = int(version)
data['version'] = version
data['types'] = flds[2:]
data['model_type'] = _get_model_type(flds)
else:
raise ValueError(f'Unknown field: {flds[0]}')
return data, parameters
[docs]@dataclass
class Model:
r"""Objects of this class represent a NEP model in a form suitable for
inspection and manipulation. Typically a :class:`Model` object is instantiated
by calling the :func:`read_model <calorine.nep.read_model>` function.
Attributes
----------
version : int
NEP version
model_type: str
One of ``potential``, ``dipole`` or ``polarizability``.
types : Tuple[str, ...]
chemical species that this model represents
radial_cutoff : float
the radial cutoff parameter in Å
angular_cutoff : float
the angular cutoff parameter in Å
max_neighbors_radial : int
maximum number of neighbors in neighbor list for radial terms
max_neighbors_angular : int
maximum number of neighbors in neighbor list for angular terms
zbl : Tuple[float, float]
inner and outer cutoff for transition to ZBL potential
n_basis_radial : int
number of radial basis functions :math:`n_\mathrm{basis}^\mathrm{R}`
n_basis_angular : int
number of angular basis functions :math:`n_\mathrm{basis}^\mathrm{A}`
n_max_radial : int
maximum order of Chebyshev polymonials included in
radial expansion :math:`n_\mathrm{max}^\mathrm{R}`
n_max_angular : int
maximum order of Chebyshev polymonials included in
angular expansion :math:`n_\mathrm{max}^\mathrm{A}`
l_max_3b : int
maximum expansion order for three-body terms :math:`l_\mathrm{max}^\mathrm{3b}`
l_max_4b : int
maximum expansion order for four-body terms :math:`l_\mathrm{max}^\mathrm{4b}`
l_max_5b : int
maximum expansion order for five-body terms :math:`l_\mathrm{max}^\mathrm{5b}`
n_descriptor_radial : int
dimension of radial part of descriptor
n_descriptor_angular : int
dimension of angular part of descriptor
n_neuron : int
number of neurons in hidden layer
n_parameters : int
total number of parameters including scalers (which are not fit parameters)
n_descriptor_parameters : int
number of parameters in descriptor
n_ann_parameters : int
number of neural network weights
ann_parameters : Dict[Tuple[str, Dict[str, np.darray]]]
neural network weights
q_scaler : List[float]
scaling parameters
radial_descriptor_weights : Dict[Tuple[str, str], np.ndarray]
radial descriptor weights by combination of species; the array for each combination
has dimensions of
:math:`(n_\mathrm{max}^\mathrm{R}+1) \times (n_\mathrm{basis}^\mathrm{R}+1)`
angular_descriptor_weights : Dict[Tuple[str, str], np.ndarray]
angular descriptor weights by combination of species; the array for each combination
has dimensions of
:math:`(n_\mathrm{max}^\mathrm{A}+1) \times (n_\mathrm{basis}^\mathrm{A}+1)`
"""
version: int
model_type: str
types: Tuple[str, ...]
radial_cutoff: float
angular_cutoff: float
n_basis_radial: int
n_basis_angular: int
n_max_radial: int
n_max_angular: int
l_max_3b: int
l_max_4b: int
l_max_5b: int
n_descriptor_radial: int
n_descriptor_angular: int
n_neuron: int
n_parameters: int
n_descriptor_parameters: int
n_ann_parameters: int
ann_parameters: Dict[str, Dict[str, np.ndarray]]
q_scaler: List[float]
radial_descriptor_weights: Dict[Tuple[str, str], np.ndarray]
angular_descriptor_weights: Dict[Tuple[str, str], np.ndarray]
zbl: Tuple[float, float] = None
max_neighbors_radial: int = None
max_neighbors_angular: int = None
_special_fields = [
'ann_parameters',
'q_scaler',
'radial_descriptor_weights',
'angular_descriptor_weights',
]
def __str__(self) -> str:
s = []
for fld in self.__dataclass_fields__:
if fld not in self._special_fields:
s += [f'{fld:22} : {getattr(self, fld)}']
return '\n'.join(s)
def _repr_html_(self) -> str:
s = []
s += ['<table border="1" class="dataframe"']
s += [
'<thead><tr><th style="text-align: left;">Field</th><th>Value</th></tr></thead>'
]
s += ['<tbody>']
for fld in self.__dataclass_fields__:
if fld not in self._special_fields:
s += [
f'<tr><td style="text-align: left;">{fld:22}</td>'
f'<td>{getattr(self, fld)}</td><tr>'
]
for fld in self._special_fields:
d = getattr(self, fld)
if fld.endswith('descriptor_weights'):
dim = list(d.values())[0].shape
if fld == 'ann_parameters' and self.version == 4:
dim = (len(self.types), len(list(d.values())[0]))
else:
dim = len(d)
s += [
f'<tr><td style="text-align: left;">Dimension of {fld:22}</td><td>{dim}</td><tr>'
]
s += ['</tbody>']
s += ['</table>']
return ''.join(s)
[docs] def write(self, filename: str) -> None:
"""Write NEP model to file in ``nep.txt`` format."""
with open(filename, 'w') as f:
# header
version_name = f'nep{self.version}'
if self.zbl is not None:
version_name += '_zbl'
elif self.model_type != 'potential':
version_name += f'_{self.model_type}'
f.write(f'{version_name} {len(self.types)} {" ".join(self.types)}\n')
if self.zbl is not None:
f.write(f'zbl {" ".join(map(str, self.zbl))}\n')
f.write(f'cutoff {self.radial_cutoff} {self.angular_cutoff}')
if (
self.max_neighbors_radial is not None
and self.max_neighbors_angular is not None
):
f.write(f' {self.max_neighbors_radial} {self.max_neighbors_angular}')
f.write('\n')
f.write(f'n_max {self.n_max_radial} {self.n_max_angular}\n')
f.write(f'basis_size {self.n_basis_radial} {self.n_basis_angular}\n')
f.write(f'l_max {self.l_max_3b} {self.l_max_4b} {self.l_max_5b}\n')
f.write(f'ANN {self.n_neuron} 0\n')
# neural network weights
keys = self.types if self.version == 4 else ['all_species']
suffixes = ['', '_polar'] if self.model_type == 'polarizability' else ['']
for suffix in suffixes:
for s in keys:
# Order: w0, b0, w1
# w0 indexed as: n*N_descriptor + nu
w0 = self.ann_parameters[s][f'w0{suffix}']
b0 = self.ann_parameters[s][f'b0{suffix}']
w1 = self.ann_parameters[s][f'w1{suffix}']
for n in range(self.n_neuron):
for nu in range(
self.n_descriptor_radial + self.n_descriptor_angular
):
f.write(f'{w0[n, nu]:15.7e}\n')
for b in b0[:, 0]:
f.write(f'{b:15.7e}\n')
for v in w1[0, :]:
f.write(f'{v:15.7e}\n')
b1 = self.ann_parameters[f'b1{suffix}']
f.write(f'{b1:15.7e}\n')
# descriptor weights
mat = []
for s1 in self.types:
for s2 in self.types:
mat = np.hstack(
[mat, self.radial_descriptor_weights[(s1, s2)].flatten()]
)
mat = np.hstack(
[mat, self.angular_descriptor_weights[(s1, s2)].flatten()]
)
n_types = len(self.types)
n = int(len(mat) / (n_types * n_types))
mat = mat.reshape((n_types * n_types, n)).T
for v in mat.flatten():
f.write(f'{v:15.7e}\n')
# scaler
for v in self.q_scaler:
f.write(f'{v:15.7e}\n')
[docs]def read_model(filename: str) -> Model:
"""Parses a file in ``nep.txt`` format and returns the
content in the form of a :class:`Model <calorine.nep.model.Model>`
object.
Parameters
----------
filename
input file name
"""
data, parameters = _get_nep_contents(filename)
# sanity checks
for fld in ['cutoff', 'basis_size', 'n_max', 'l_max', 'ANN']:
assert fld in data, f'Invalid model file; {fld} line is missing'
assert data['version'] in [
3,
4,
], 'Invalid model files; only NEP versions 3 and 4 are currently supported'
# split up cutoff tuple
assert len(data['cutoff']) in [2, 4]
data['radial_cutoff'] = data['cutoff'][0]
data['angular_cutoff'] = data['cutoff'][1]
if len(data['cutoff']) == 4:
data['max_neighbors_radial'] = int(data['cutoff'][2])
data['max_neighbors_angular'] = int(data['cutoff'][3])
del data['cutoff']
# split up basis_size tuple
assert len(data['basis_size']) == 2
data['n_basis_radial'] = data['basis_size'][0]
data['n_basis_angular'] = data['basis_size'][1]
del data['basis_size']
# split up n_max tuple
assert len(data['n_max']) == 2
data['n_max_radial'] = data['n_max'][0]
data['n_max_angular'] = data['n_max'][1]
del data['n_max']
# split up nl_max tuple
len_l = len(data['l_max'])
assert len_l in [1, 2, 3]
data['l_max_3b'] = data['l_max'][0]
data['l_max_4b'] = data['l_max'][1] if len_l > 1 else 0
data['l_max_5b'] = data['l_max'][2] if len_l > 2 else 0
del data['l_max']
# compute dimensions of descriptor components
data['n_descriptor_radial'] = data['n_max_radial'] + 1
l_max_enh = data['l_max_3b'] + (data['l_max_4b'] > 0) + (data['l_max_5b'] > 0)
data['n_descriptor_angular'] = (data['n_max_angular'] + 1) * l_max_enh
n_descriptor = data['n_descriptor_radial'] + data['n_descriptor_angular']
# compute number of parameters
data['n_neuron'] = data['ANN'][0]
del data['ANN']
n_types = len(data['types'])
if data['version'] == 3:
n = 1
else: # version 4
# one hidden layer per atomic species
n = n_types
n_ann_input_weights = (n_descriptor + 1) * data['n_neuron'] # weights + bias
n_ann_output_weights = data['n_neuron'] # only weights
n_ann_parameters = (
n_ann_input_weights + n_ann_output_weights
) * n + 1 # + single output bias
n_descriptor_weights = n_types**2 * (
(data['n_max_radial'] + 1) * (data['n_basis_radial'] + 1)
+ (data['n_max_angular'] + 1) * (data['n_basis_angular'] + 1)
)
data['n_parameters'] = n_ann_parameters + n_descriptor_weights + n_descriptor
is_polarizability_model = data['model_type'] == 'polarizability'
if data['n_parameters'] + n_ann_parameters == len(parameters):
data['n_parameters'] += n_ann_parameters
assert is_polarizability_model, (
'Model is not labelled as a polarizability model, but the number of '
'parameters matches a polarizability model.\n'
'If this is a polarizability model trained with GPUMD <=v3.8, please '
'modify the header in the nep.txt file to read '
f'`nep{data["version"]}_polarizability`.\n'
)
assert data['n_parameters'] == len(parameters), (
'Parsing of parameters inconsistent; please submit a bug report\n'
f'{data["n_parameters"]} != {len(parameters)}'
)
data['n_ann_parameters'] = n_ann_parameters
# split up parameters into the ANN weights, descriptor weights, and scaling parameters
n1 = n_ann_parameters
n1 *= 2 if is_polarizability_model else 1
n2 = n1 + n_descriptor_weights
data['ann_parameters'] = parameters[:n1]
descriptor_weights = np.array(parameters[n1:n2])
data['q_scaler'] = parameters[n2:]
# Group ANN parameters
pars = {}
n1 = 0
n_network_params = n_ann_input_weights + n_ann_output_weights # except last bias
n_neuron = data['n_neuron']
keys = data['types'] if data['version'] == 4 else ['all_species']
n_count = 2 if is_polarizability_model else 1
for count in range(n_count):
# if polarizability model, all parameters including bias are repeated
# need to offset n1 by +1 to handle bias
n1 += count
for s in keys:
# Get the parameters for the ANN; in the case of NEP4, there is effectively
# one network per atomic species.
ann_parameters = data['ann_parameters'][n1 : n1 + n_network_params]
ann_input_weights = ann_parameters[:n_ann_input_weights]
w0 = np.zeros((n_neuron, n_descriptor))
w0[...] = np.nan
b0 = np.zeros((n_neuron, 1))
b0[...] = np.nan
for n in range(n_neuron):
for nu in range(n_descriptor):
w0[n, nu] = ann_input_weights[n * n_descriptor + nu]
b0[:, 0] = ann_input_weights[n_neuron * n_descriptor :]
assert np.all(
w0.shape == (n_neuron, n_descriptor)
), f'w0 has invalid shape for key {s}; please submit a bug report'
assert np.all(
b0.shape == (n_neuron, 1)
), f'b0 has invalid shape for key {s}; please submit a bug report'
assert not np.any(
np.isnan(w0)
), f'some weights in w0 are nan for key {s}; please submit a bug report'
assert not np.any(
np.isnan(b0)
), f'some weights in b0 are nan for key {s}; please submit a bug report'
ann_output_weights = ann_parameters[
n_ann_input_weights : n_ann_input_weights + n_ann_output_weights
]
w1 = np.zeros((1, n_neuron))
w1[0, :] = ann_output_weights[:]
assert np.all(
w1.shape == (1, n_neuron)
), f'w1 has invalid shape for key {s}; please submit a bug report'
assert not np.any(
np.isnan(w1)
), f'some weights in w1 are nan for key {s}; please submit a bug report'
if count == 0:
pars[s] = dict(w0=w0, b0=b0, w1=w1)
else:
pars[s].update({'w0_polar': w0, 'b0_polar': b0, 'w1_polar': w1})
n1 += n_network_params
if count == 0:
pars['b1'] = data['ann_parameters'][n1]
else:
pars['b1_polar'] = data['ann_parameters'][n1]
sum = 0
for s in pars.keys():
if s.startswith('b1'):
sum += 1
else:
sum += np.sum([np.count_nonzero(p) for p in pars[s].values()])
assert sum == n_ann_parameters * n_count, (
'Inconsistent number of parameters accounted for; please submit a bug report\n'
f'{sum} != {n_ann_parameters}'
)
data['ann_parameters'] = pars
# split up descriptor by chemical species and radial/angular
data['n_descriptor_parameters'] = len(descriptor_weights)
n = int(len(descriptor_weights) / (n_types * n_types))
n_max_radial = data['n_max_radial']
n_max_angular = data['n_max_angular']
n_basis_radial = data['n_basis_radial']
n_basis_angular = data['n_basis_angular']
m = (n_max_radial + 1) * (n_basis_radial + 1)
descriptor_weights = descriptor_weights.reshape((n, n_types * n_types)).T
descriptor_weights_radial = descriptor_weights[:, :m]
descriptor_weights_angular = descriptor_weights[:, m:]
# add descriptors to data dict
data['radial_descriptor_weights'] = {}
data['angular_descriptor_weights'] = {}
m = -1
for i, j in product(range(n_types), repeat=2):
m += 1
s1, s2 = data['types'][i], data['types'][j]
subdata = descriptor_weights_radial[m, :].reshape(
(n_max_radial + 1, n_basis_radial + 1)
)
data['radial_descriptor_weights'][(s1, s2)] = subdata
subdata = descriptor_weights_angular[m, :].reshape(
(n_max_angular + 1, n_basis_angular + 1)
)
data['angular_descriptor_weights'][(s1, s2)] = subdata
return Model(**data)