Coverage for calorine/tools/spectra.py: 100%

1r"""

2Tools for computing optical and vibrational spectra from MD trajectories.

4Covers three routes depending on the model and GPUMD output:

6* **Dielectric tensor** (:func:`get_dielectric_function`): full 3×3 tensor

7 from ``dpdt.out`` (qNEP, ``column='dP'`` or ``column='P'``) or ``dipole.out``

8 (TNEP, ``column='mu'``).

9* **IR spectrum** (:func:`get_ir_spectrum`): from ``dpdt.out``

10 (qNEP, ``column='dP'`` or ``column='P'``) or ``dipole.out``

11 (TNEP, ``column='mu'``).

12* **Raman spectrum** (:func:`get_raman_spectrum`): from ``polarizability.out``

13 (TNEP polarizability/susceptibility model).

15Nomenclature

16------------

17- **Molecules / nanoparticles (localized systems)**: dipole moment :math:`\mu`

18 (e·Å) and polarizability :math:`\alpha` (bohr³ or training-data units).

19- **Solids / liquids (extended systems)**: polarization :math:`P` (e·Å, total

20 cell dipole) and susceptibility :math:`\chi` (same unit as training data per

21 cell).

23Both cases use the same GPUMD output files and formulas; the distinction is

24whether :attr:`volume` is supplied for normalization.

26Quantum corrections

27-------------------

28The spectra from classical MD can be corrected for quantum statistics via

29:func:`apply_quantum_correction`. The correction factors differ by scattering

30order (M. Cardona, *Topics in Applied Physics*, Vol. 50 (Springer, 1982);

31Rosander *et al.*, PRB **111**, 064107 (2025)).

32"""

34import warnings

36import numpy as np

37from pandas import DataFrame

38from ase.units import _k as kB_SI, _e as e_SI, _eps0 as eps0_SI, _c as c_SI, _hbar as hbar_SI

40_rad_s_to_thz = 1e-12

41_rad_s_to_invcm = 1 / (2 * np.pi * c_SI * 100)

44def _compute_correlation_function(Z1, Z2):

45 """Normalized cross-correlation; normalization divides by the number of contributing pairs."""

46 N = len(Z1)

47 return np.correlate(Z1, Z2, mode='full')[N - 1:] / np.arange(N, 0, -1)

50def _gaussian_decay(t, t_sigma):

51 r"""Gaussian envelope :math:`\exp(-t^2 / 2\sigma^2)` used as a window applied to the ACF."""

52 return np.exp(-0.5 * (t / t_sigma) ** 2)

55def _psd_from_acf(acf, dt):

56 """Power spectral density via even extension of the ACF followed by rfft."""

57 signal = np.hstack((acf, acf[:0:-1]))

58 fft = dt * np.fft.rfft(signal)

59 freqs = 2 * np.pi * np.fft.rfftfreq(len(signal), dt)

60 return freqs, fft.real

63def _parse_polarization_pair(polarization_in, polarization_out):

64 """Validate and parse a pair of polarization unit vectors, or return (None, None)."""

65 if (polarization_in is None) != (polarization_out is None):

66 raise ValueError('polarization_in and polarization_out must be given together')

67 if polarization_in is None:

68 return None, None

69 n_in = np.asarray(polarization_in, dtype=float)

70 n_out = np.asarray(polarization_out, dtype=float)

71 if n_in.shape != (3,) or n_out.shape != (3,):

72 raise ValueError('polarization_in and polarization_out must have shape (3,)')

73 return n_in, n_out

76# ── Signal column mapping ──────────────────────────────────────────────────────

78_SIGNAL_COLS = {

79 'dP': (['dPx', 'dPy', 'dPz'], True),

80 'P': (['Px', 'Py', 'Pz'], False),

81 'mu': (['mu_x', 'mu_y', 'mu_z'], False),

82}

85def _resolve_signal(signal, column, dt):

86 """Select signal columns from a DataFrame, infer derivative flag, auto-extract dt."""

87 if column not in _SIGNAL_COLS:

88 raise ValueError(

89 f'column must be one of {list(_SIGNAL_COLS)!r}, got {column!r}'

90 )

91 col_names, derivative = _SIGNAL_COLS[column]

92 missing = [c for c in col_names if c not in signal.columns]

93 if missing:

94 raise ValueError(

95 f'signal is missing columns {missing!r} for column={column!r}'

96 )

97 if dt is None:

98 if 'time' not in signal.columns:

99 raise ValueError(

100 'dt must be provided when signal has no time column '

101 f'(got columns: {list(signal.columns)!r})'

102 )

103 dt = float(signal['time'].diff().dropna().iloc[0])

104 return signal[col_names].to_numpy(dtype=float), dt, derivative

105

106

107# ── IR spectrum ────────────────────────────────────────────────────────────────

108

109def _acf_to_psd(acf, dt, t_sigma, window_size):

110 """Truncate ACF, apply optional Gaussian window, and compute PSD. dt, t_sigma, and

111 window_size in ps."""

112 if window_size is None:

113 window_size = 5 * t_sigma if t_sigma is not None else None

114 if window_size is not None:

115 if t_sigma is not None and window_size < 3 * t_sigma:

116 warnings.warn(

117 f'window_size ({window_size:.3g} ps) is less than 3 * t_sigma '

118 f'({3 * t_sigma:.3g} ps); the Gaussian window is truncated before it has '

119 'meaningfully decayed, which can introduce spectral artifacts. Consider '

120 'increasing window_size or decreasing t_sigma.'

121 )

122 max_available = len(acf) * dt

123 if window_size > max_available:

124 raise ValueError(

125 f'window_size ({window_size:.3g} ps) exceeds the {max_available:.3g} ps '

126 'of autocorrelation data available from the input trajectory. Use a '

127 'smaller window_size (or a larger t_sigma, since window_size defaults to '

128 '5 * t_sigma), or provide a longer trajectory.'

129 )

130 acf = acf if window_size is None else acf[:int(window_size / dt)]

131 t_acf = dt * np.arange(len(acf)) # ps

132 if t_sigma is not None:

133 acf = acf * _gaussian_decay(t_acf, t_sigma=t_sigma) # both ps

134 return _psd_from_acf(acf, dt * 1e-12) # _psd_from_acf uses seconds

135

136

137def _ir_spectrum_1d(dt, signal, volume_SI, temperature, t_sigma, window_size, derivative=False):

138 """Compute IR spectrum for a single dipole/polarization component in SI units."""

139 D = e_SI * 1e-10 / (1e-15 if derivative else 1.0)

140 signal_SI = (signal - np.mean(signal)) * D

141 acf = _compute_correlation_function(signal_SI, signal_SI)

142 w, S = _acf_to_psd(acf, dt, t_sigma, window_size)

143 # When input is the signal itself, multiply by omega**2 to get PSD of its derivative

144 # (PSD[dx/dt] = omega**2 * PSD[x]). When input is already the derivative, use PSD directly.

145 S_dpdt = S if derivative else w ** 2 * S

146

147 if volume_SI is not None:

148 beta = 1.0 / (kB_SI * temperature)

149 conductivity = (beta / (3.0 * volume_SI)) * S_dpdt

150 with np.errstate(divide='ignore', invalid='ignore'):

151 eps_imag = conductivity / (eps0_SI * w)

152 return w, eps_imag, conductivity

153 else:

154 return w, S_dpdt

155

156

157def get_ir_spectrum(

158 signal: DataFrame,

159 volume: float = None,

160 temperature: float = None,

161 column: str = 'dP',

162 dt: float = None,

163 polarization: np.ndarray = None,

164 t_sigma: float = None,

165 window_size: float = None,

166) -> DataFrame:

167 r"""Compute the IR spectrum from a time series of dipole moments or polarizations.

168

169 Parameters

170 ----------

171 signal

172 ``DataFrame`` as returned by :func:`~calorine.gpumd.read_dpdt` or

173 :func:`~calorine.gpumd.read_dipole`. The columns to use are selected

174 via :attr:`column`.

175 volume

176 Simulation cell volume in Å³. Required for extended systems; when

177 provided the function returns the imaginary dielectric function and

178 conductivity. Pass ``None`` for molecules to obtain an unnormalized

179 line shape in arbitrary units.

180 temperature

181 Temperature in K. Required when :attr:`volume` is not ``None``.

182 column

183 Selects which columns of :attr:`signal` to use and implies whether

184 the input is a time derivative:

185

186 ``'dP'`` (default)

187 Uses ``dPx``, ``dPy``, ``dPz`` from :func:`~calorine.gpumd.read_dpdt`

188 (qNEP); signal is :math:`\mathrm{d}P/\mathrm{d}t` in e·Å/fs.

189 ``'P'``

190 Uses ``Px``, ``Py``, ``Pz`` from :func:`~calorine.gpumd.read_dpdt`

191 (qNEP); signal is the polarization :math:`P(t)` in e·Å.

192 ``'mu'``

193 Uses ``mu_x``, ``mu_y``, ``mu_z`` from

194 :func:`~calorine.gpumd.read_dipole` (TNEP); signal is the dipole

195 moment :math:`\mu(t)` in e·Å.

196

197 dt

198 Time between consecutive frames in ps. Auto-extracted from the

199 ``time`` column when present (i.e. for :func:`~calorine.gpumd.read_dpdt`

200 output). Must be supplied explicitly for step-indexed inputs such as

201 :func:`~calorine.gpumd.read_dipole` output.

202 polarization

203 Unit vector ``(3,)`` defining the electric-field polarization direction.

204 When given, the spectrum is computed from the projected signal

205 :math:`s(t) = \hat{n} \cdot \mathrm{signal}(t)` instead of the

206 isotropic average.

207 t_sigma

208 Width of the Gaussian window applied to the ACF in ps.

209 ``None`` uses no windowing.

210 window_size

211 Length of the ACF to retain, in ps. ``None`` (default) uses

212 :math:`5 \times` :attr:`t_sigma` when :attr:`t_sigma` is given, otherwise the

213 full ACF. A warning is issued if :attr:`window_size` is less than

214 :math:`3 \times` :attr:`t_sigma`, since the Gaussian window is then truncated

215 before it has meaningfully decayed. Raises :class:`ValueError` if

216 :attr:`window_size` exceeds the autocorrelation data available from

217 :attr:`signal`.

218

219 Returns

220 -------

221 DataFrame

222 Always contains ``angular_frequency`` (THz) and

223 ``wavenumber_invcm`` (:math:`\mathrm{cm}^{-1}`).

224 When :attr:`volume` and :attr:`temperature` are given:

225 ``epsilon_imag`` (dimensionless) and ``conductivity`` (S/m).

226 When :attr:`volume` is ``None``: ``ir_intensity`` (arbitrary units,

227 proportional to :math:`\mathrm{PSD}[\dot{\mu}]`).

228 """

229 if volume is not None and temperature is None:

230 raise ValueError('temperature must be provided when volume is given')

231

232 arr, dt, derivative = _resolve_signal(signal, column, dt)

233 volume_SI = volume * 1e-30 if volume is not None else None

234

235 if polarization is not None:

236 n = np.asarray(polarization, dtype=float)

237 if n.shape != (3,):

238 raise ValueError('polarization must have shape (3,)')

239 components = [arr @ n]

240 else:

241 components = [arr[:, i] for i in range(3)]

242

243 results = [_ir_spectrum_1d(dt, c, volume_SI, temperature, t_sigma, window_size, derivative)

244 for c in components]

245

246 if volume_SI is not None:

247 w = results[0][0]

248 eps_imag = np.mean([r[1] for r in results], axis=0)

249 conductivity = np.mean([r[2] for r in results], axis=0)

250 mask = np.isfinite(eps_imag)

251 cols = ['angular_frequency', 'wavenumber_invcm', 'epsilon_imag', 'conductivity']

252 data = np.column_stack((w[mask] * _rad_s_to_thz, w[mask] * _rad_s_to_invcm,

253 eps_imag[mask], conductivity[mask]))

254 return DataFrame(data, columns=cols)

255 else:

256 w = results[0][0]

257 ir_intensity = np.mean([r[1] for r in results], axis=0)

258 cols = ['angular_frequency', 'wavenumber_invcm', 'ir_intensity']

259 data = np.column_stack((w * _rad_s_to_thz, w * _rad_s_to_invcm, ir_intensity))

260 return DataFrame(data, columns=cols)

261

262

263# ── Raman spectrum ─────────────────────────────────────────────────────────────

264

265_POL_COLS = ['xx', 'yy', 'zz', 'yz', 'xz', 'xy']

266

267

268def get_raman_spectrum(

269 dt: float,

270 polarizability: DataFrame,

271 polarization_in: np.ndarray = None,

272 polarization_out: np.ndarray = None,

273 t_sigma: float = None,

274 window_size: float = None,

275) -> DataFrame:

276 r"""Compute the Raman spectrum from a time series of polarizability tensors.

277

278 Parameters

279 ----------

280 dt

281 Time between consecutive frames in ps.

282 polarizability

283 DataFrame with columns ``xx``, ``yy``, ``zz``, ``xy``, ``yz``, ``xz``

284 containing the polarizability :math:`\alpha` (molecules) or susceptibility

285 :math:`\chi` (extended systems), as returned by

286 :func:`~calorine.gpumd.read_polarizability`. Units are those of the

287 TNEP training data (typically bohr³).

288 polarization_in

289 Unit vector ``(3,)`` for the polarization of the incoming light. If

290 both :attr:`polarization_in` and :attr:`polarization_out` are given,

291 the polarization-resolved intensity

292 :math:`I(\omega) \propto \mathrm{FT}[\langle s(0)s(t)\rangle]` with

293 :math:`s(t) = \hat{n}^\mathrm{out} \cdot \alpha(t) \cdot \hat{n}^\mathrm{in}`

294 is added as the column ``raman_polarized``.

295 polarization_out

296 Unit vector ``(3,)`` for the polarization of the outgoing (scattered)

297 light.

298 t_sigma

299 Width of the Gaussian window applied to the ACF in ps. ``None`` uses no

300 windowing.

301 window_size

302 Length of the ACF to retain, in ps. ``None`` (default) uses

303 :math:`5 \times` :attr:`t_sigma` when :attr:`t_sigma` is given, otherwise the

304 full ACF. A warning is issued if :attr:`window_size` is less than

305 :math:`3 \times` :attr:`t_sigma`, since the Gaussian window is then truncated

306 before it has meaningfully decayed. Raises :class:`ValueError` if

307 :attr:`window_size` exceeds the autocorrelation data available from

308 :attr:`polarizability`.

309

310 Returns

311 -------

312 DataFrame

313 Always contains ``angular_frequency`` (THz),

314 ``wavenumber_invcm`` (:math:`\mathrm{cm}^{-1}`),

315 ``raman_isotropic`` (proportional to

316 :math:`\mathrm{FT}[\langle\gamma(0)\gamma(t)\rangle]`), and

317 ``raman_anisotropic`` (proportional to

318 :math:`\mathrm{FT}[\langle\mathrm{Tr}[\beta(0)\beta(t)]\rangle]`).

319 If both polarization vectors are given, also contains

320 ``raman_polarized``.

321 """

322 missing = [c for c in _POL_COLS if c not in polarizability.columns]

323 if missing:

324 raise ValueError(f'polarizability is missing columns: {missing}')

325

326 # Remove mean (Rayleigh component) before computing fluctuation spectrum.

327 pol = polarizability[_POL_COLS] - polarizability[_POL_COLS].mean()

328

329 n_in, n_out = _parse_polarization_pair(polarization_in, polarization_out)

330

331 # Isotropic component gamma(t) = Tr[alpha(t)] / 3.

332 gamma = (pol['xx'] + pol['yy'] + pol['zz']).to_numpy() / 3.0

333

334 # Traceless part beta = alpha - gamma*I.

335 beta_xx = pol['xx'].to_numpy() - gamma

336 beta_yy = pol['yy'].to_numpy() - gamma

337 beta_zz = pol['zz'].to_numpy() - gamma

338 # Off-diagonal elements are unchanged: beta_xy = alpha_xy, etc.

339

340 with np.errstate(invalid='ignore', divide='ignore'):

341 # Isotropic Raman: FT[<gamma(0)*gamma(t)>].

342 acf_iso = _compute_correlation_function(gamma, gamma)

343 w, L_iso = _acf_to_psd(acf_iso, dt, t_sigma, window_size)

344

345 # Anisotropic Raman: FT[<Tr[beta(0)*beta(t)]>].

346 # Tr[beta(0)*beta(t)] = sum_ij beta_ij(0)*beta_ij(t) — sum of per-element

347 # ACFs with factor 2 for off-diagonal (since beta_ij = beta_ji).

348 acf_aniso = (

349 _compute_correlation_function(beta_xx, beta_xx)

350 + _compute_correlation_function(beta_yy, beta_yy)

351 + _compute_correlation_function(beta_zz, beta_zz)

352 + 2.0 * _compute_correlation_function(pol['yz'].to_numpy(), pol['yz'].to_numpy())

353 + 2.0 * _compute_correlation_function(pol['xz'].to_numpy(), pol['xz'].to_numpy())

354 + 2.0 * _compute_correlation_function(pol['xy'].to_numpy(), pol['xy'].to_numpy())

355 )

356 _, L_aniso = _acf_to_psd(acf_aniso, dt, t_sigma, window_size)

357

358 cols = ['angular_frequency', 'wavenumber_invcm', 'raman_isotropic', 'raman_anisotropic']

359 data = np.column_stack((w * _rad_s_to_thz, w * _rad_s_to_invcm, L_iso, L_aniso))

360 df = DataFrame(data, columns=cols)

361

362 # Polarization-resolved spectrum: s(t) = n_out . alpha(t) . n_in.

363 # s = sum_ij n_out_i * alpha_ij * n_in_j; use symmetry alpha_ij = alpha_ji.

364 if n_in is not None:

365 s = (n_in[0]*n_out[0] * pol['xx'].to_numpy()

366 + n_in[1]*n_out[1] * pol['yy'].to_numpy()

367 + n_in[2]*n_out[2] * pol['zz'].to_numpy()

368 + (n_in[1]*n_out[2] + n_in[2]*n_out[1]) * pol['yz'].to_numpy()

369 + (n_in[0]*n_out[2] + n_in[2]*n_out[0]) * pol['xz'].to_numpy()

370 + (n_in[0]*n_out[1] + n_in[1]*n_out[0]) * pol['xy'].to_numpy())

371 with np.errstate(invalid='ignore', divide='ignore'):

372 acf_pol = _compute_correlation_function(s, s)

373 _, L_pol = _acf_to_psd(acf_pol, dt, t_sigma, window_size)

374 df['raman_polarized'] = L_pol

375

376 return df

377

378

379# ── Quantum correction ─────────────────────────────────────────────────────────

380

381def apply_quantum_correction(

382 df: DataFrame,

383 temperature: float,

384 column: str,

385 order: str = 'first',

386 force: bool = False,

387) -> DataFrame:

388 r"""Apply a quantum correction to a classically computed spectrum.

389

390 Classical MD underestimates spectral intensities because it samples the

391 classical Boltzmann distribution rather than the Bose-Einstein distribution.

392 The correction factor depends on the scattering order and mode type

393 [Cardona1982]_ [Rosander2025]_.

394

395 Parameters

396 ----------

397 df

398 DataFrame with an ``angular_frequency`` column (THz) and the column

399 to correct.

400 temperature

401 Temperature in K.

402 column

403 Name of the spectral column to correct (e.g. ``'raman_isotropic'``,

404 ``'ir_intensity'``, ``'epsilon_imag'``).

405 order

406 Correction type:

407

408 ``'first'``

409 First-order scattering (IR absorption and first-order Raman):

410 :math:`f = \beta\hbar\omega / (1 - e^{-\beta\hbar\omega})`.

411 ``'overtone'``

412 Second-order overtone (same mode, :math:`\omega_1 = \omega/2`):

413 :math:`f = [y/(1 - e^{-y})]^2 (2 - e^{-y})` with

414 :math:`y = \beta\hbar\omega/2`.

415 ``'combination'``

416 Second-order combination band upper bound

417 (:math:`\omega_1 = \omega_2 = \omega/2`):

418 :math:`f = [y/(1 - e^{-y})]^2` with :math:`y = \beta\hbar\omega/2`.

419 force

420 If ``True``, overwrite an existing ``column + '_qm'`` column.

421 Default ``False`` raises :class:`ValueError` to prevent accidental

422 double-correction.

423

424 Returns

425 -------

426 DataFrame

427 Copy of :attr:`df` with a new column ``column + '_qm'`` containing

428 the quantum-corrected intensities. The input is not mutated.

429

430 References

431 ----------

432 .. [Cardona1982] M. Cardona, *Resonance phenomena*, in *Topics in Applied

433 Physics*, Vol. 50, edited by M. Cardona and G. Güntherodt

434 (Springer, Berlin, 1982).

435 .. [Rosander2025] Rosander *et al.*, Phys. Rev. B **111**, 064107 (2025),

436 Supp. Eqs. S5, S7, S10.

437 """

438 if order not in ('first', 'overtone', 'combination'):

439 raise ValueError("order must be 'first', 'overtone', or 'combination'")

440

441 if not force and column + '_qm' in df.columns:

442 raise ValueError(

443 f"column '{column}_qm' already exists; pass force=True to overwrite"

444 )

445

446 w = df['angular_frequency'].to_numpy() * 1e12 # THz → rad/s

447 beta = 1.0 / (kB_SI * temperature)

448 x = beta * hbar_SI * w # beta*hbar*omega (dimensionless)

449

450 with np.errstate(divide='ignore', invalid='ignore', over='ignore'):

451 if order == 'first':

452 # f = beta*hbar*omega / (1 - exp(-beta*hbar*omega))

453 factor = np.where(x > 0, x / (1.0 - np.exp(-x)), 1.0)

454 elif order == 'overtone':

455 # f = [y/(1-exp(-y))]**2 * (2-exp(-y)), y = beta*hbar*omega/2

456 y = x / 2.0

457 base = np.where(y > 0, y / (1.0 - np.exp(-y)), 1.0)

458 factor = base ** 2 * np.where(y > 0, 2.0 - np.exp(-y), 1.0)

459 else: # combination

460 # f = [y/(1-exp(-y))]**2, y = beta*hbar*omega/2

461 y = x / 2.0

462 base = np.where(y > 0, y / (1.0 - np.exp(-y)), 1.0)

463 factor = base ** 2

464

465 result = df.copy()

466 result[column + '_qm'] = df[column].to_numpy() * factor

467 return result

468

469

470# ── Dielectric tensor ─────────────────────────────────────────────────────────

471

472_TENSOR_COMPONENTS = [(0, 0, 'xx'), (1, 1, 'yy'), (2, 2, 'zz'),

473 (1, 2, 'yz'), (0, 2, 'xz'), (0, 1, 'xy')]

474

475

476def get_dielectric_function(

477 signal: DataFrame,

478 volume: float,

479 temperature: float,

480 column: str = 'dP',

481 dt: float = None,

482 t_sigma: float = None,

483 window_size: float = None,

484 return_real_part: bool = True,

485 kk_method: str = 'vectorized',

486 kk_max_memory_gb: float = 4.0,

487) -> DataFrame:

488 r"""Compute the dielectric function from a three-component polarization time series.

489

490 Computes all six unique Voigt components of :math:`\epsilon_2^{\alpha\beta}(\omega)`

491 (xx, yy, zz, yz, xz, xy) from the three-component time series written by GPUMD.

492 The real part :math:`\epsilon_1^{\alpha\beta}(\omega)` is obtained via the

493 Kramers-Kronig relation when :attr:`return_real_part` is ``True`` (the default).

494

495 The imaginary part is related to the symmetrized cross-correlation via

496

497 .. math::

498

499 \epsilon_2^{\alpha\beta}(\omega) = \frac{\beta}{\epsilon_0 \omega V}

500 \,\mathrm{Re}\!\left[\int_0^\infty

501 \langle \dot{P}_\alpha(0)\,\dot{P}_\beta(t) \rangle

502 e^{-i\omega t}\,dt\right],

503

504 where :math:`\beta = 1/(k_\mathrm{B}T)` and :math:`V` is the cell volume.

505

506 Parameters

507 ----------

508 signal

509 ``DataFrame`` as returned by :func:`~calorine.gpumd.read_dpdt` or

510 :func:`~calorine.gpumd.read_dipole`. The columns to use are selected

511 via :attr:`column`.

512 volume

513 Simulation cell volume in Å³.

514 temperature

515 Temperature in K.

516 column

517 Selects which columns of :attr:`signal` to use and implies whether

518 the input is a time derivative:

519

520 ``'dP'`` (default)

521 Uses ``dPx``, ``dPy``, ``dPz`` from :func:`~calorine.gpumd.read_dpdt`

522 (qNEP); input is :math:`\dot{\mathbf{P}}(t)` in e·Å/fs.

523 ``'P'``

524 Uses ``Px``, ``Py``, ``Pz`` from :func:`~calorine.gpumd.read_dpdt`

525 (qNEP); input is the polarization :math:`\mathbf{P}(t)` in e·Å.

526 ``'mu'``

527 Uses ``mu_x``, ``mu_y``, ``mu_z`` from

528 :func:`~calorine.gpumd.read_dipole` (TNEP); input is the dipole

529 moment :math:`\boldsymbol{\mu}(t)` in e·Å.

530

531 dt

532 Time between consecutive frames in ps. Auto-extracted from the

533 ``time`` column when present (i.e. for :func:`~calorine.gpumd.read_dpdt`

534 output). Must be supplied explicitly for step-indexed inputs such as

535 :func:`~calorine.gpumd.read_dipole` output.

536 t_sigma

537 Width of the Gaussian window applied to the ACF in ps.

538 ``None`` uses no windowing.

539 window_size

540 Length of the ACF to retain, in ps. ``None`` (default) uses

541 :math:`5 \times` :attr:`t_sigma` when :attr:`t_sigma` is given, otherwise the

542 full ACF (i.e. the full trajectory length). This directly

543 sets the size of the frequency grid (:math:`n \approx` :attr:`window_size`

544 :math:`/\,dt`, or the number of frames in :attr:`signal` when unset) passed to

545 the Kramers-Kronig step below; see :attr:`kk_method` for how the cost of that

546 step depends on :math:`n`. A warning is issued if :attr:`window_size` is less

547 than :math:`3 \times` :attr:`t_sigma`, since the Gaussian window is then

548 truncated before it has meaningfully decayed. Raises :class:`ValueError` if

549 :attr:`window_size` exceeds the autocorrelation data available from

550 :attr:`signal`.

551 return_real_part

552 When ``True`` (default), the real part :math:`\epsilon_1^{\alpha\beta}(\omega)`

553 is computed via the Kramers-Kronig relation and appended as

554 ``epsilon_real_{xx,yy,zz,yz,xz,xy}`` columns.

555 kk_method

556 Integration method passed to :func:`~calorine.tools.apply_kramers_kronig`

557 when :attr:`return_real_part` is ``True``. ``'vectorized'`` (default) uses

558 an exact trapezoid rule over an :math:`n \times n` matrix

559 (:math:`\mathcal{O}(n^2)` time and memory, where :math:`n` is the size of the

560 frequency grid set by :attr:`window_size`); ``'fft'`` uses a faster

561 (:math:`\mathcal{O}(n \log n)` time, :math:`\mathcal{O}(n)` memory) Hilbert

562 transform approximation. Ignored when :attr:`return_real_part` is ``False``.

563 kk_max_memory_gb

564 Memory limit (in GB) passed to :func:`~calorine.tools.apply_kramers_kronig` as

565 ``max_memory_gb`` when :attr:`kk_method` is ``'vectorized'``. If the estimated

566 peak memory for the :math:`n \times n` matrix would exceed this, :class:`ValueError`

567 is raised before attempting the allocation, rather than letting the process hang

568 or get OOM-killed; pass ``kk_method='fft'``, a smaller :attr:`window_size`, or a

569 larger :attr:`kk_max_memory_gb` instead. Set to ``None`` to disable the check.

570 Ignored when :attr:`return_real_part` is ``False`` or :attr:`kk_method` is

571 ``'fft'``.

572

573 Returns

574 -------

575 DataFrame

576 Contains ``angular_frequency`` (THz), ``wavenumber_invcm``

577 (:math:`\mathrm{cm}^{-1}`),

578 ``epsilon_imag_{xx,yy,zz,yz,xz,xy}`` (dimensionless, imaginary dielectric

579 tensor components), and ``conductivity_{xx,yy,zz,yz,xz,xy}`` (S/m).

580 When :attr:`return_real_part` is ``True``, ``epsilon_real_{xx,yy,zz,yz,xz,xy}``

581 columns are appended.

582 """

583 arr, dt, derivative = _resolve_signal(signal, column, dt)

584

585 D = e_SI * 1e-10 / (1e-15 if derivative else 1.0)

586 volume_SI = volume * 1e-30

587 beta = 1.0 / (kB_SI * temperature)

588

589 signal_SI = (arr - arr.mean(axis=0)) * D

590

591 def _psd(z1, z2=None):

592 if z2 is None:

593 acf = _compute_correlation_function(z1, z1)

594 else:

595 acf = (_compute_correlation_function(z1, z2)

596 + _compute_correlation_function(z2, z1)) / 2

597 w, S = _acf_to_psd(acf, dt, t_sigma, window_size)

598 # PSD[dx/dt] = omega**2 * PSD[x] when input is the signal itself.

599 return w, S if derivative else w ** 2 * S

600

601 w = None

602 conductivities = {}

603 eps_imags = {}

604 for i, j, lbl in _TENSOR_COMPONENTS:

605 zi = signal_SI[:, i]

606 zj = signal_SI[:, j]

607 w, S_dpdt = _psd(zi) if i == j else _psd(zi, zj)

608 conductivity = (beta / volume_SI) * S_dpdt

609 with np.errstate(divide='ignore', invalid='ignore'):

610 eps_imag = conductivity / (eps0_SI * w)

611 conductivities[lbl] = conductivity

612 eps_imags[lbl] = eps_imag

613

614 mask = w > 0

615 cols = (['angular_frequency', 'wavenumber_invcm']

616 + [f'epsilon_imag_{lbl}' for _, _, lbl in _TENSOR_COMPONENTS]

617 + [f'conductivity_{lbl}' for _, _, lbl in _TENSOR_COMPONENTS])

618 arrays = ([w[mask] * _rad_s_to_thz, w[mask] * _rad_s_to_invcm]

619 + [eps_imags[lbl][mask] for _, _, lbl in _TENSOR_COMPONENTS]

620 + [conductivities[lbl][mask] for _, _, lbl in _TENSOR_COMPONENTS])

621 result = DataFrame(np.column_stack(arrays), columns=cols)

622 if return_real_part:

623 from .kramers_kronig import apply_kramers_kronig

624 result = apply_kramers_kronig(result, method=kk_method, max_memory_gb=kk_max_memory_gb)

625 return result