from ase import units
from typing import Optional
import numpy as np
import pandas as pd
from scipy import stats
[docs]
def analyze_data(data: np.ndarray, max_lag: int = None) -> dict:
""" Carries out an extensive analysis of the data series.
Parameters
----------
data
data series to compute autocorrelation function for
max_lag
maximum lag between two data points, used for computing autocorrelation
Returns
-------
dict
calculated properties of the data including, mean, standard deviation,
correlation length and a 95% error estimate.
"""
summary = dict(mean=data.mean(),
std=data.std())
acf = get_autocorrelation_function(data, max_lag)
correlation_length = _estimate_correlation_length_from_acf(acf)
if correlation_length is not None:
error_estimate = _estimate_error(data, correlation_length, confidence=0.95)
summary['correlation_length'] = correlation_length
summary['error_estimate'] = error_estimate
else:
summary['correlation_length'] = np.nan
summary['error_estimate'] = np.nan
return summary
[docs]
def get_autocorrelation_function(data: np.ndarray, max_lag: int = None) -> np.ndarray:
""" Returns autocorrelation function.
The autocorrelation function is computed using `pandas.Series.autocorr
<https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.autocorr.html>`_.
Parameters
----------
data
data series to compute autocorrelation function for
max_lag
maximum lag between two data points
Returns
-------
calculated autocorrelation function
"""
if max_lag is None:
max_lag = len(data) - 1
if max_lag < 1 or max_lag >= len(data):
raise ValueError('max_lag should be between 1 and len(data)-1.')
series = pd.Series(data)
acf = [series.autocorr(lag) for lag in range(0, max_lag)]
return np.array(acf)
[docs]
def get_correlation_length(data: np.ndarray) -> Optional[int]:
""" Returns estimate of the correlation length of data.
The correlation length is taken as the first point where the
autocorrelation functions is less than :math:`\\exp(-2)`. If the
correlation function never drops below :math:`\\exp(-2)` ``np.nan`` is
returned.
If the correlation length cannot be computed since the auto-correlation
function is unconverged the function returns `None`.
Parameters
----------
data
data series for which to the compute autocorrelation function
Returns
-------
correlation length
"""
acf = get_autocorrelation_function(data)
correlation_length = _estimate_correlation_length_from_acf(acf)
if correlation_length is None:
return None
return correlation_length
[docs]
def get_error_estimate(data: np.ndarray, confidence: float = 0.95) -> Optional[float]:
"""Returns estimate of standard error :math:`\\mathrm{error}`
with confidence interval.
.. math::
\\mathrm{error} = t_\\mathrm{factor} * \\mathrm{std}(\\mathrm{data}) / \\sqrt{N_s}
where :math:`t_{factor}` is the factor corresponding to the confidence
interval and :math:`N_s` is the number of independent measurements
(with correlation taken into account).
If the correlation length cannot be computed since the auto-correlation
function is unconverged the function returns `None`.
Parameters
----------
data
data series for which to estimate the error
Returns
-------
error estimate
"""
correlation_length = get_correlation_length(data)
if correlation_length is None:
return None
error_estimate = _estimate_error(data, correlation_length, confidence)
return error_estimate
def _estimate_correlation_length_from_acf(acf: np.ndarray) -> Optional[int]:
"""Estimates correlation length from acf. Returns None if the auto-correlation
function is uncoverged."""
for i, a in enumerate(acf):
if a < np.exp(-2):
return i
return None # np.nan
def _estimate_error(data: np.ndarray,
correlation_length: int,
confidence: float) -> float:
""" Estimates error using correlation length. """
t_factor = stats.t.ppf((1 + confidence) / 2, len(data)-1) # type: float
error = t_factor * np.std(data) / np.sqrt(len(data) / correlation_length) # type: float
return error
[docs]
def get_rtc_from_hac(hac: np.ndarray, V: float, T: float, dt: float) -> np.ndarray:
"""Returns the running thermal conductivity (RTC) in W/m/K using a
heat auto-correlation (HAC) as input.
Parameters
----------
hac
The HAC :math:`\\langle j(t)j(0)\\rangle` in units of eV\\ :sup:`3`/amu as given by GPUMD
V
Volume of cell in Å\\ :sup:`3`
T
Temperature in Kelvin
dt
Time step in the HAC in ps
running thermal conductivity in W/m/K
"""
hac = np.asarray(hac)
if hac.ndim > 1:
raise ValueError('hac must be 1D')
kB = units.kB # eV/K
J_per_eV = 1 / units.J
kg_per_u = 1 / units.kg
s_per_ps = 1e-12
m_per_Å = 1 / units.m
# The integration and normalization
rtc = np.cumsum(hac) * dt / (kB * T**2 * V)
# From eV/ps/Å/K to J/s/m/K
rtc *= (J_per_eV**3 / kg_per_u) * s_per_ps / (J_per_eV * m_per_Å**3)
return rtc