Coverage for calorine/tools/analysis.py: 100%
57 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-10 08:26 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-10 08:26 +0000
1from ase import units
2from typing import Optional
4import numpy as np
5import pandas as pd
6from scipy import stats
9def analyze_data(data: np.ndarray, max_lag: int = None) -> dict:
10 """ Carries out an extensive analysis of the data series.
12 Parameters
13 ----------
14 data
15 data series to compute autocorrelation function for
16 max_lag
17 maximum lag between two data points, used for computing autocorrelation
19 Returns
20 -------
21 dict
22 calculated properties of the data including, mean, standard deviation,
23 correlation length and a 95% error estimate.
24 """
25 summary = dict(mean=data.mean(),
26 std=data.std())
27 acf = get_autocorrelation_function(data, max_lag)
28 correlation_length = _estimate_correlation_length_from_acf(acf)
29 if correlation_length is not None:
30 error_estimate = _estimate_error(data, correlation_length, confidence=0.95)
31 summary['correlation_length'] = correlation_length
32 summary['error_estimate'] = error_estimate
33 else:
34 summary['correlation_length'] = np.nan
35 summary['error_estimate'] = np.nan
36 return summary
39def get_autocorrelation_function(data: np.ndarray, max_lag: int = None) -> np.ndarray:
40 """ Returns autocorrelation function.
42 The autocorrelation function is computed using `pandas.Series.autocorr
43 <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.autocorr.html>`_.
45 Parameters
46 ----------
47 data
48 data series to compute autocorrelation function for
49 max_lag
50 maximum lag between two data points
52 Returns
53 -------
54 calculated autocorrelation function
55 """
56 if max_lag is None:
57 max_lag = len(data) - 1
58 if max_lag < 1 or max_lag >= len(data):
59 raise ValueError('max_lag should be between 1 and len(data)-1.')
60 series = pd.Series(data)
61 acf = [series.autocorr(lag) for lag in range(0, max_lag)]
62 return np.array(acf)
65def get_correlation_length(data: np.ndarray) -> Optional[int]:
66 """ Returns estimate of the correlation length of data.
68 The correlation length is taken as the first point where the
69 autocorrelation functions is less than :math:`\\exp(-2)`. If the
70 correlation function never drops below :math:`\\exp(-2)` ``np.nan`` is
71 returned.
73 If the correlation length cannot be computed since the auto-correlation
74 function is unconverged the function returns `None`.
76 Parameters
77 ----------
78 data
79 data series for which to the compute autocorrelation function
81 Returns
82 -------
83 correlation length
84 """
86 acf = get_autocorrelation_function(data)
87 correlation_length = _estimate_correlation_length_from_acf(acf)
88 if correlation_length is None:
89 return None
90 return correlation_length
93def get_error_estimate(data: np.ndarray, confidence: float = 0.95) -> Optional[float]:
94 """Returns estimate of standard error :math:`\\mathrm{error}`
95 with confidence interval.
97 .. math::
99 \\mathrm{error} = t_\\mathrm{factor} * \\mathrm{std}(\\mathrm{data}) / \\sqrt{N_s}
101 where :math:`t_{factor}` is the factor corresponding to the confidence
102 interval and :math:`N_s` is the number of independent measurements
103 (with correlation taken into account).
105 If the correlation length cannot be computed since the auto-correlation
106 function is unconverged the function returns `None`.
108 Parameters
109 ----------
110 data
111 data series for which to estimate the error
113 Returns
114 -------
115 error estimate
117 """
118 correlation_length = get_correlation_length(data)
119 if correlation_length is None:
120 return None
121 error_estimate = _estimate_error(data, correlation_length, confidence)
122 return error_estimate
125def _estimate_correlation_length_from_acf(acf: np.ndarray) -> Optional[int]:
126 """Estimates correlation length from acf. Returns None if the auto-correlation
127 function is uncoverged."""
128 for i, a in enumerate(acf):
129 if a < np.exp(-2):
130 return i
131 return None # np.nan
134def _estimate_error(data: np.ndarray,
135 correlation_length: int,
136 confidence: float) -> float:
137 """ Estimates error using correlation length. """
138 t_factor = stats.t.ppf((1 + confidence) / 2, len(data)-1) # type: float
139 error = t_factor * np.std(data) / np.sqrt(len(data) / correlation_length) # type: float
140 return error
143def get_rtc_from_hac(hac: np.ndarray, V: float, T: float, dt: float) -> np.ndarray:
144 """Returns the running thermal conductivity (RTC) in W/m/K using a
145 heat auto-correlation (HAC) as input.
147 Parameters
148 ----------
149 hac
150 The HAC :math:`\\langle j(t)j(0)\\rangle` in units of eV\\ :sup:`3`/amu as given by GPUMD
151 V
152 Volume of cell in Å\\ :sup:`3`
153 T
154 Temperature in Kelvin
155 dt
156 Time step in the HAC in ps
157 running thermal conductivity in W/m/K
158 """
160 hac = np.asarray(hac)
162 if hac.ndim > 1:
163 raise ValueError('hac must be 1D')
165 kB = units.kB # eV/K
167 J_per_eV = 1 / units.J
168 kg_per_u = 1 / units.kg
169 s_per_ps = 1e-12
170 m_per_Å = 1 / units.m
172 # The integration and normalization
173 rtc = np.cumsum(hac) * dt / (kB * T**2 * V)
175 # From eV/ps/Å/K to J/s/m/K
176 rtc *= (J_per_eV**3 / kg_per_u) * s_per_ps / (J_per_eV * m_per_Å**3)
178 return rtc