Coverage for calorine/tools/entropy.py: 100%
47 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-18 13:01 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-18 13:01 +0000
1from warnings import warn
2import numpy as np
4try:
5 import torch
6 torch_available = True
7except ImportError: # pragma: no cover
8 torch_available = False
9 # If pytorch is not available, we still need a torch object
10 # to satisfy type hints etc. Otherwise the file will not run.
12 class _DummyTensor:
13 """Fallback Tensor placeholder."""
14 pass
16 class _DummyDevice:
17 """Fallback device placeholder."""
18 def __init__(self, *args, **kwargs):
19 pass
21 class _DummyDType:
22 """Fallback dtype placeholder."""
23 pass
25 class _DummyTorchModule:
26 Tensor = _DummyTensor
27 device = _DummyDevice
28 dtype = _DummyDType
30 # Provide common dtype names you might reference
31 float32 = _DummyDType()
32 float64 = _DummyDType()
34 torch = _DummyTorchModule()
36try:
37 import warnings as _warnings
38 with _warnings.catch_warnings():
39 _warnings.filterwarnings('ignore', message='IProgress not found')
40 from tqdm.auto import tqdm
41except ImportError: # pragma: no cover
42 def tqdm(x):
43 return x
46def get_entropy(
47 descriptors: np.ndarray | torch.Tensor,
48 width: float,
49 use_tqdm: bool = True,
50 device: str | torch.device = 'cuda',
51 block: int = 1024,
52 dtype: torch.dtype = torch.float32,
53 eps: float = 1e-12,
54) -> tuple[float, np.ndarray]:
55 r"""
56 Computes an estimate for the information entropy :math:`H(\mathbf{X})` of a
57 set of descriptors :math:`\mathbf{X}`. The estimate is described in
58 [Nat. Comm. **16**, 4014 (2025)](https://doi.org/10.1038/s41467-025-59232-0)
59 and given by
61 .. math::
63 \mathcal{H}(\{\mathbf{X}\}) = -\frac{1}{n} \sum_{i=1}^{n} p_i
65 where
67 .. math::
69 p_i
70 = \log \left[
71 \frac{1}{n} \sum_{j=1}^{n}
72 K_h(\mathbf{X}_i, \mathbf{X}_j)
73 \right]
75 with a Gaussian kernel
77 .. math::
79 K_h(\mathbf{X}_i, \mathbf{X}_j)
80 = \exp\!\left(
81 -\frac{\lVert \mathbf{X}_i - \mathbf{X}_j \rVert^2}{2h^2}.
82 \right)
84 The calculation is done via torch if the library has been installed,
85 and numpy otherwise. When using torch the calculation is run via CUDA.
86 The latter behavior can be controlled using the :attr:`device` argument.
88 Parameters
89 ----------
90 descriptors
91 The set of descriptors :math:`\mathbf{X}` for which to evaluate to
92 the entropy. Typically each row corresponds to one atom and the
93 columns correspond to the different descriptor components.
94 width
95 Width :math:`h` of the Gaussian kernel.
96 use_tqdm
97 Use `tqdm <https://tqdm.github.io/>`_ to show a progress bar.
98 Note that this requires tqdm to be installed.
99 block
100 In order to limit the memory needs, the kernel density estimate
101 matrix is handled in blocks. This parameter controls the size of
102 each block. Smaller numbers imply a smaller memory footprint.
103 eps
104 Smallest (absolute) permissible value.
105 device
106 Device to use for calculation. The documentation of
107 [`torch.device`](https://docs.pytorch.org/docs/stable/tensor_attributes.html#torch.device)
108 provides more information.
109 Only used when pytorch is available.
110 dtype
111 Floating point precision used for the computation. The documentation of
112 [`torch.dtype`](https://docs.pytorch.org/docs/stable/tensor_attributes.html)
113 provides an overview.
114 Only used when pytorch is available.
116 Returns
117 -------
118 A tuple comprising the total entropy :math:`H(\mathbf{X})` and the entropy contributions
119 :math:`p_i` from each row in the input descriptor matrix.
120 """
121 if torch_available:
122 res = _get_entropy_torch(descriptors, width, use_tqdm, block, eps, device, dtype)
123 return res
124 else: # pragma: no cover
125 warn('Using the numpy implementation.'
126 ' Install torch in order to use GPUs and achieve a considerable speed-up.')
127 res = _get_entropy_numpy(descriptors, width, use_tqdm, block, eps)
128 return res
131def _get_entropy_numpy(
132 descriptors: np.ndarray,
133 width: float,
134 use_tqdm: bool,
135 block: int,
136 eps: float,
137) -> float:
138 """Compute the informational entropy using numpy.
139 See get_entropy for documentation.
140 """
141 X = np.asarray(descriptors, dtype=np.float64)
142 N, d = X.shape
143 s = np.sum(X * X, axis=1) # (N,)
144 inv_two_sigma2 = 1.0 / (2.0 * width * width)
146 row_sums = np.zeros(N, dtype=np.float64)
147 for i0 in tqdm(range(0, N, block), leave=False):
148 i1 = min(i0 + block, N)
149 # D2[i0:i1, :] = s[i0:i1,None] + s[None,:] - 2*X[i0:i1]@X.T
150 G = X[i0:i1] @ X.T # (B,N)
151 D2_blk = (s[i0:i1, None] + s[None, :] - 2.0 * G)
152 row_sums[i0:i1] = np.sum(np.exp(-D2_blk * inv_two_sigma2), axis=1)
154 subvals = -np.log(np.clip(row_sums / N, min=eps))
155 subvals /= N
156 entropy = np.sum(subvals)
157 return -float(entropy), subvals
160def _get_entropy_torch(
161 descriptors: np.ndarray | torch.Tensor,
162 width: float,
163 use_tqdm: bool,
164 block: int,
165 eps: float,
166 device: str | torch.device = 'cuda',
167 dtype: torch.dtype = torch.float32,
168) -> tuple[float, np.ndarray]:
169 """Compute the informational entropy using torch.
170 See get_entropy for documentation.
171 """
172 with torch.no_grad():
173 # Move data to device
174 if not torch.cuda.is_available() and str(device) == 'cuda': # pragma: no cover
175 device = 'cpu'
176 X = torch.as_tensor(descriptors, dtype=dtype, device=device)
177 N, d = X.shape
178 inv_two_sigma2 = 1.0 / (2.0 * width * width)
180 # Precompute norms once
181 s = (X * X).sum(dim=1) # (N,)
182 row_sums = torch.zeros(N, dtype=dtype, device=device)
184 # Block over rows i; each block computes K[i0:i1, :].sum(-1)
185 # D2 = s[i] + s[j] - 2 * X[i] @ X[j]^T (formed in blocks to save memory)
186 XT = X.T # reuse in matmuls
187 for i0 in tqdm(range(0, N, block), leave=False):
188 i1 = min(i0 + block, N)
189 try:
190 G = X[i0:i1] @ XT # (B, N)
191 D2_blk = s[i0:i1, None] + s[None, :] - 2.0 * G
192 except torch.cuda.OutOfMemoryError: # pragma: no cover
193 torch.cuda.empty_cache()
194 raise ValueError(
195 'Tried to allocate too much GPU memory.'
196 f' Try to reduce the value of block, e.g., to {block//2}.')
197 row_sums[i0:i1] = torch.exp(-D2_blk * inv_two_sigma2).sum(dim=1)
199 subvals = -torch.log(torch.clamp(row_sums / N, min=eps))
200 subvals /= N
201 entropy = torch.sum(subvals)
202 return -float(entropy.detach().cpu().item()), subvals.detach().cpu().numpy()