Coverage for calorine/tools/analysis.py: 100%

1from ase import units

2from typing import Optional

4import numpy as np

5import pandas as pd

6from scipy import stats

9def analyze_data(data: np.ndarray, max_lag: int = None) -> dict:

10 """ Carries out an extensive analysis of the data series.

12 Parameters

13 ----------

14 data

15 data series to compute autocorrelation function for

16 max_lag

17 maximum lag between two data points, used for computing autocorrelation

19 Returns

20 -------

21 dict

22 calculated properties of the data including, mean, standard deviation,

23 correlation length and a 95% error estimate.

24 """

25 summary = dict(mean=data.mean(),

26 std=data.std())

27 acf = get_autocorrelation_function(data, max_lag)

28 correlation_length = _estimate_correlation_length_from_acf(acf)

29 if correlation_length is not None:

30 error_estimate = _estimate_error(data, correlation_length, confidence=0.95)

31 summary['correlation_length'] = correlation_length

32 summary['error_estimate'] = error_estimate

33 else:

34 summary['correlation_length'] = np.nan

35 summary['error_estimate'] = np.nan

36 return summary

39def get_autocorrelation_function(data: np.ndarray, max_lag: int = None) -> np.ndarray:

40 """ Returns autocorrelation function.

42 The autocorrelation function is computed using `pandas.Series.autocorr

43 <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.autocorr.html>`_.

45 Parameters

46 ----------

47 data

48 data series to compute autocorrelation function for

49 max_lag

50 maximum lag between two data points

52 Returns

53 -------

54 calculated autocorrelation function

55 """

56 if max_lag is None:

57 max_lag = len(data) - 1

58 if max_lag < 1 or max_lag >= len(data):

59 raise ValueError('max_lag should be between 1 and len(data)-1.')

60 series = pd.Series(data)

61 acf = [series.autocorr(lag) for lag in range(0, max_lag)]

62 return np.array(acf)

65def get_correlation_length(data: np.ndarray) -> Optional[int]:

66 """ Returns estimate of the correlation length of data.

68 The correlation length is taken as the first point where the

69 autocorrelation functions is less than :math:`\\exp(-2)`. If the

70 correlation function never drops below :math:`\\exp(-2)` ``np.nan`` is

71 returned.

73 If the correlation length cannot be computed since the auto-correlation

74 function is unconverged the function returns `None`.

76 Parameters

77 ----------

78 data

79 data series for which to the compute autocorrelation function

81 Returns

82 -------

83 correlation length

84 """

86 acf = get_autocorrelation_function(data)

87 correlation_length = _estimate_correlation_length_from_acf(acf)

88 if correlation_length is None:

89 return None

90 return correlation_length

93def get_error_estimate(data: np.ndarray, confidence: float = 0.95) -> Optional[float]:

94 """Returns estimate of standard error :math:`\\mathrm{error}`

95 with confidence interval.

97 .. math::

99 \\mathrm{error} = t_\\mathrm{factor} * \\mathrm{std}(\\mathrm{data}) / \\sqrt{N_s}

100

101 where :math:`t_{factor}` is the factor corresponding to the confidence

102 interval and :math:`N_s` is the number of independent measurements

103 (with correlation taken into account).

104

105 If the correlation length cannot be computed since the auto-correlation

106 function is unconverged the function returns `None`.

107

108 Parameters

109 ----------

110 data

111 data series for which to estimate the error

112

113 Returns

114 -------

115 error estimate

116

117 """

118 correlation_length = get_correlation_length(data)

119 if correlation_length is None:

120 return None

121 error_estimate = _estimate_error(data, correlation_length, confidence)

122 return error_estimate

123

124

125def _estimate_correlation_length_from_acf(acf: np.ndarray) -> Optional[int]:

126 """Estimates correlation length from acf. Returns None if the auto-correlation

127 function is uncoverged."""

128 for i, a in enumerate(acf):

129 if a < np.exp(-2):

130 return i

131 return None # np.nan

132

133

134def _estimate_error(data: np.ndarray,

135 correlation_length: int,

136 confidence: float) -> float:

137 """ Estimates error using correlation length. """

138 t_factor = stats.t.ppf((1 + confidence) / 2, len(data)-1) # type: float

139 error = t_factor * np.std(data) / np.sqrt(len(data) / correlation_length) # type: float

140 return error

141

142

143def get_rtc_from_hac(hac: np.ndarray, V: float, T: float, dt: float) -> np.ndarray:

144 """Returns the running thermal conductivity (RTC) in W/m/K using a

145 heat auto-correlation (HAC) as input.

146

147 Parameters

148 ----------

149 hac

150 The HAC :math:`\\langle j(t)j(0)\\rangle` in units of eV\\ :sup:`3`/amu as given by GPUMD

151 V

152 Volume of cell in Å\\ :sup:`3`

153 T

154 Temperature in Kelvin

155 dt

156 Time step in the HAC in ps

157 running thermal conductivity in W/m/K

158 """

159

160 hac = np.asarray(hac)

161

162 if hac.ndim > 1:

163 raise ValueError('hac must be 1D')

164

165 kB = units.kB # eV/K

166

167 J_per_eV = 1 / units.J

168 kg_per_u = 1 / units.kg

169 s_per_ps = 1e-12

170 m_per_Å = 1 / units.m

171

172 # The integration and normalization

173 rtc = np.cumsum(hac) * dt / (kB * T**2 * V)

174

175 # From eV/ps/Å/K to J/s/m/K

176 rtc *= (J_per_eV**3 / kg_per_u) * s_per_ps / (J_per_eV * m_per_Å**3)

177

178 return rtc