Coverage for calorine/tools/entropy.py: 100%

47 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-18 13:01 +0000

1from warnings import warn 

2import numpy as np 

3 

4try: 

5 import torch 

6 torch_available = True 

7except ImportError: # pragma: no cover 

8 torch_available = False 

9 # If pytorch is not available, we still need a torch object 

10 # to satisfy type hints etc. Otherwise the file will not run. 

11 

12 class _DummyTensor: 

13 """Fallback Tensor placeholder.""" 

14 pass 

15 

16 class _DummyDevice: 

17 """Fallback device placeholder.""" 

18 def __init__(self, *args, **kwargs): 

19 pass 

20 

21 class _DummyDType: 

22 """Fallback dtype placeholder.""" 

23 pass 

24 

25 class _DummyTorchModule: 

26 Tensor = _DummyTensor 

27 device = _DummyDevice 

28 dtype = _DummyDType 

29 

30 # Provide common dtype names you might reference 

31 float32 = _DummyDType() 

32 float64 = _DummyDType() 

33 

34 torch = _DummyTorchModule() 

35 

36try: 

37 import warnings as _warnings 

38 with _warnings.catch_warnings(): 

39 _warnings.filterwarnings('ignore', message='IProgress not found') 

40 from tqdm.auto import tqdm 

41except ImportError: # pragma: no cover 

42 def tqdm(x): 

43 return x 

44 

45 

46def get_entropy( 

47 descriptors: np.ndarray | torch.Tensor, 

48 width: float, 

49 use_tqdm: bool = True, 

50 device: str | torch.device = 'cuda', 

51 block: int = 1024, 

52 dtype: torch.dtype = torch.float32, 

53 eps: float = 1e-12, 

54) -> tuple[float, np.ndarray]: 

55 r""" 

56 Computes an estimate for the information entropy :math:`H(\mathbf{X})` of a 

57 set of descriptors :math:`\mathbf{X}`. The estimate is described in 

58 [Nat. Comm. **16**, 4014 (2025)](https://doi.org/10.1038/s41467-025-59232-0) 

59 and given by 

60 

61 .. math:: 

62 

63 \mathcal{H}(\{\mathbf{X}\}) = -\frac{1}{n} \sum_{i=1}^{n} p_i 

64 

65 where 

66 

67 .. math:: 

68 

69 p_i 

70 = \log \left[ 

71 \frac{1}{n} \sum_{j=1}^{n} 

72 K_h(\mathbf{X}_i, \mathbf{X}_j) 

73 \right] 

74 

75 with a Gaussian kernel 

76 

77 .. math:: 

78 

79 K_h(\mathbf{X}_i, \mathbf{X}_j) 

80 = \exp\!\left( 

81 -\frac{\lVert \mathbf{X}_i - \mathbf{X}_j \rVert^2}{2h^2}. 

82 \right) 

83 

84 The calculation is done via torch if the library has been installed, 

85 and numpy otherwise. When using torch the calculation is run via CUDA. 

86 The latter behavior can be controlled using the :attr:`device` argument. 

87 

88 Parameters 

89 ---------- 

90 descriptors 

91 The set of descriptors :math:`\mathbf{X}` for which to evaluate to 

92 the entropy. Typically each row corresponds to one atom and the 

93 columns correspond to the different descriptor components. 

94 width 

95 Width :math:`h` of the Gaussian kernel. 

96 use_tqdm 

97 Use `tqdm <https://tqdm.github.io/>`_ to show a progress bar. 

98 Note that this requires tqdm to be installed. 

99 block 

100 In order to limit the memory needs, the kernel density estimate 

101 matrix is handled in blocks. This parameter controls the size of 

102 each block. Smaller numbers imply a smaller memory footprint. 

103 eps 

104 Smallest (absolute) permissible value. 

105 device 

106 Device to use for calculation. The documentation of 

107 [`torch.device`](https://docs.pytorch.org/docs/stable/tensor_attributes.html#torch.device) 

108 provides more information. 

109 Only used when pytorch is available. 

110 dtype 

111 Floating point precision used for the computation. The documentation of 

112 [`torch.dtype`](https://docs.pytorch.org/docs/stable/tensor_attributes.html) 

113 provides an overview. 

114 Only used when pytorch is available. 

115 

116 Returns 

117 ------- 

118 A tuple comprising the total entropy :math:`H(\mathbf{X})` and the entropy contributions 

119 :math:`p_i` from each row in the input descriptor matrix. 

120 """ 

121 if torch_available: 

122 res = _get_entropy_torch(descriptors, width, use_tqdm, block, eps, device, dtype) 

123 return res 

124 else: # pragma: no cover 

125 warn('Using the numpy implementation.' 

126 ' Install torch in order to use GPUs and achieve a considerable speed-up.') 

127 res = _get_entropy_numpy(descriptors, width, use_tqdm, block, eps) 

128 return res 

129 

130 

131def _get_entropy_numpy( 

132 descriptors: np.ndarray, 

133 width: float, 

134 use_tqdm: bool, 

135 block: int, 

136 eps: float, 

137) -> float: 

138 """Compute the informational entropy using numpy. 

139 See get_entropy for documentation. 

140 """ 

141 X = np.asarray(descriptors, dtype=np.float64) 

142 N, d = X.shape 

143 s = np.sum(X * X, axis=1) # (N,) 

144 inv_two_sigma2 = 1.0 / (2.0 * width * width) 

145 

146 row_sums = np.zeros(N, dtype=np.float64) 

147 for i0 in tqdm(range(0, N, block), leave=False): 

148 i1 = min(i0 + block, N) 

149 # D2[i0:i1, :] = s[i0:i1,None] + s[None,:] - 2*X[i0:i1]@X.T 

150 G = X[i0:i1] @ X.T # (B,N) 

151 D2_blk = (s[i0:i1, None] + s[None, :] - 2.0 * G) 

152 row_sums[i0:i1] = np.sum(np.exp(-D2_blk * inv_two_sigma2), axis=1) 

153 

154 subvals = -np.log(np.clip(row_sums / N, min=eps)) 

155 subvals /= N 

156 entropy = np.sum(subvals) 

157 return -float(entropy), subvals 

158 

159 

160def _get_entropy_torch( 

161 descriptors: np.ndarray | torch.Tensor, 

162 width: float, 

163 use_tqdm: bool, 

164 block: int, 

165 eps: float, 

166 device: str | torch.device = 'cuda', 

167 dtype: torch.dtype = torch.float32, 

168) -> tuple[float, np.ndarray]: 

169 """Compute the informational entropy using torch. 

170 See get_entropy for documentation. 

171 """ 

172 with torch.no_grad(): 

173 # Move data to device 

174 if not torch.cuda.is_available() and str(device) == 'cuda': # pragma: no cover 

175 device = 'cpu' 

176 X = torch.as_tensor(descriptors, dtype=dtype, device=device) 

177 N, d = X.shape 

178 inv_two_sigma2 = 1.0 / (2.0 * width * width) 

179 

180 # Precompute norms once 

181 s = (X * X).sum(dim=1) # (N,) 

182 row_sums = torch.zeros(N, dtype=dtype, device=device) 

183 

184 # Block over rows i; each block computes K[i0:i1, :].sum(-1) 

185 # D2 = s[i] + s[j] - 2 * X[i] @ X[j]^T (formed in blocks to save memory) 

186 XT = X.T # reuse in matmuls 

187 for i0 in tqdm(range(0, N, block), leave=False): 

188 i1 = min(i0 + block, N) 

189 try: 

190 G = X[i0:i1] @ XT # (B, N) 

191 D2_blk = s[i0:i1, None] + s[None, :] - 2.0 * G 

192 except torch.cuda.OutOfMemoryError: # pragma: no cover 

193 torch.cuda.empty_cache() 

194 raise ValueError( 

195 'Tried to allocate too much GPU memory.' 

196 f' Try to reduce the value of block, e.g., to {block//2}.') 

197 row_sums[i0:i1] = torch.exp(-D2_blk * inv_two_sigma2).sum(dim=1) 

198 

199 subvals = -torch.log(torch.clamp(row_sums / N, min=eps)) 

200 subvals /= N 

201 entropy = torch.sum(subvals) 

202 return -float(entropy.detach().cpu().item()), subvals.detach().cpu().numpy()