Coverage for calorine/tools/entropy.py: 100%
44 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-12-17 08:17 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2025-12-17 08:17 +0000
1from warnings import warn
2import numpy as np
4try:
5 import torch
6 torch_available = True
7except ImportError: # pragma: no cover
8 torch_available = False
9 # If pytorch is not available, we still need a torch object
10 # to satisfy type hints etc. Otherwise the file will not run.
12 class _DummyTensor:
13 """Fallback Tensor placeholder."""
14 pass
16 class _DummyDevice:
17 """Fallback device placeholder."""
18 def __init__(self, *args, **kwargs):
19 pass
21 class _DummyDType:
22 """Fallback dtype placeholder."""
23 pass
25 class _DummyTorchModule:
26 Tensor = _DummyTensor
27 device = _DummyDevice
28 dtype = _DummyDType
30 # Provide common dtype names you might reference
31 float32 = _DummyDType()
32 float64 = _DummyDType()
34 torch = _DummyTorchModule()
36try:
37 from tqdm.auto import tqdm
38except ImportError: # pragma: no cover
39 def tqdm(x):
40 return x
43def get_entropy(
44 descriptors: np.ndarray | torch.Tensor,
45 width: float,
46 use_tqdm: bool = True,
47 device: str | torch.device = 'cuda',
48 block: int = 1024,
49 dtype: torch.dtype = torch.float32,
50 eps: float = 1e-12,
51) -> tuple[float, np.ndarray]:
52 r"""
53 Computes an estimate for the information entropy :math:`H(\mathbf{X})` of a
54 set of descriptors :math:`\mathbf{X}`. The estimate is described in
55 [Nat. Comm. **16**, 4014 (2025)](https://doi.org/10.1038/s41467-025-59232-0)
56 and given by
58 .. math::
60 \mathcal{H}(\{\mathbf{X}\}) = -\frac{1}{n} \sum_{i=1}^{n} p_i
62 where
64 .. math::
66 p_i
67 = \log \left[
68 \frac{1}{n} \sum_{j=1}^{n}
69 K_h(\mathbf{X}_i, \mathbf{X}_j)
70 \right]
72 with a Gaussian kernel
74 .. math::
76 K_h(\mathbf{X}_i, \mathbf{X}_j)
77 = \exp\!\left(
78 -\frac{\lVert \mathbf{X}_i - \mathbf{X}_j \rVert^2}{2h^2}.
79 \right)
81 The calculation is done via torch if the library has been installed,
82 and numpy otherwise. When using torch the calculation is run via CUDA.
83 The latter behavior can be controlled using the :attr:`device` argument.
85 Parameters
86 ----------
87 descriptors
88 The set of descriptors :math:`\mathbf{X}` for which to evaluate to
89 the entropy. Typically each row corresponds to one atom and the
90 columns correspond to the different descriptor components.
91 width
92 Width :math:`h` of the Gaussian kernel.
93 use_tqdm
94 Use `tqdm <https://tqdm.github.io/>`_ to show a progress bar.
95 Note that this requires tqdm to be installed.
96 block
97 In order to limit the memory needs, the kernel density estimate
98 matrix is handled in blocks. This parameter controls the size of
99 each block. Smaller numbers imply a smaller memory footprint.
100 eps
101 Smallest (absolute) permissible value.
102 device
103 Device to use for calculation. The documentation of
104 [`torch.device`](https://docs.pytorch.org/docs/stable/tensor_attributes.html#torch.device)
105 provides more information.
106 Only used when pytorch is available.
107 dtype
108 Floating point precision used for the computation. The documentation of
109 [`torch.dtype`](https://docs.pytorch.org/docs/stable/tensor_attributes.html)
110 provides an overview.
111 Only used when pytorch is available.
113 Returns
114 -------
115 A tuple comprising the total entropy :math:`H(\mathbf{X})` and the entropy contributions
116 :math:`p_i` from each row in the input descriptor matrix.
117 """
118 if torch_available:
119 res = _get_entropy_torch(descriptors, width, use_tqdm, block, eps, device, dtype)
120 return res
121 else: # pragma: no cover
122 warn('Using the numpy implementation.'
123 ' Install torch in order to use GPUs and achieve a considerable speed-up.')
124 res = _get_entropy_numpy(descriptors, width, use_tqdm, block, eps)
125 return res
128def _get_entropy_numpy(
129 descriptors: np.ndarray,
130 width: float,
131 use_tqdm: bool,
132 block: int,
133 eps: float,
134) -> float:
135 """Compute the informational entropy using numpy.
136 See get_entropy for documentation.
137 """
138 X = np.asarray(descriptors, dtype=np.float64)
139 N, d = X.shape
140 s = np.sum(X * X, axis=1) # (N,)
141 inv_two_sigma2 = 1.0 / (2.0 * width * width)
143 row_sums = np.zeros(N, dtype=np.float64)
144 for i0 in tqdm(range(0, N, block), leave=False):
145 i1 = min(i0 + block, N)
146 # D2[i0:i1, :] = s[i0:i1,None] + s[None,:] - 2*X[i0:i1]@X.T
147 G = X[i0:i1] @ X.T # (B,N)
148 D2_blk = (s[i0:i1, None] + s[None, :] - 2.0 * G)
149 row_sums[i0:i1] = np.sum(np.exp(-D2_blk * inv_two_sigma2), axis=1)
151 subvals = -np.log(np.clip(row_sums / N, min=eps))
152 subvals /= N
153 entropy = np.sum(subvals)
154 return -float(entropy), subvals
157def _get_entropy_torch(
158 descriptors: np.ndarray | torch.Tensor,
159 width: float,
160 use_tqdm: bool,
161 block: int,
162 eps: float,
163 device: str | torch.device = 'cuda',
164 dtype: torch.dtype = torch.float32,
165) -> tuple[float, np.ndarray]:
166 """Compute the informational entropy using torch.
167 See get_entropy for documentation.
168 """
169 with torch.no_grad():
170 # Move data to device
171 if not torch.cuda.is_available() and str(device) == 'cuda': # pragma: no cover
172 device = 'cpu'
173 X = torch.as_tensor(descriptors, dtype=dtype, device=device)
174 N, d = X.shape
175 inv_two_sigma2 = 1.0 / (2.0 * width * width)
177 # Precompute norms once
178 s = (X * X).sum(dim=1) # (N,)
179 row_sums = torch.zeros(N, dtype=dtype, device=device)
181 # Block over rows i; each block computes K[i0:i1, :].sum(-1)
182 # D2 = s[i] + s[j] - 2 * X[i] @ X[j]^T (formed in blocks to save memory)
183 XT = X.T # reuse in matmuls
184 for i0 in tqdm(range(0, N, block), leave=False):
185 i1 = min(i0 + block, N)
186 try:
187 G = X[i0:i1] @ XT # (B, N)
188 D2_blk = s[i0:i1, None] + s[None, :] - 2.0 * G
189 except torch.cuda.OutOfMemoryError: # pragma: no cover
190 torch.cuda.empty_cache()
191 raise ValueError(
192 'Tried to allocate too much GPU memory.'
193 f' Try to reduce the value of block, e.g., to {block//2}.')
194 row_sums[i0:i1] = torch.exp(-D2_blk * inv_two_sigma2).sum(dim=1)
196 subvals = -torch.log(torch.clamp(row_sums / N, min=eps))
197 subvals /= N
198 entropy = torch.sum(subvals)
199 return -float(entropy.detach().cpu().item()), subvals.detach().cpu().numpy()