Source code for bayesflow.distributions.diagonal_student_t

import math

import numpy as np

import keras
from keras import ops

from bayesflow.types import Shape, Tensor
from bayesflow.utils import expand_tile
from bayesflow.utils.decorators import allow_batch_size
from bayesflow.utils.serialization import serializable, serialize

from .distribution import Distribution


[docs] @serializable("bayesflow.distributions") class DiagonalStudentT(Distribution): """Implements a backend-agnostic diagonal Student-t distribution.""" def __init__( self, df: int | float, loc: int | float | np.ndarray | Tensor = 0.0, scale: int | float | np.ndarray | Tensor = 1.0, trainable_parameters: bool = False, seed_generator: keras.random.SeedGenerator = None, **kwargs, ): """ Initializes a backend-agnostic Student's t-distribution with optional learnable parameters. This class represents a Student's t-distribution, which is useful for modeling heavy-tailed data. The distribution is parameterized by degrees of freedom (`df`), location (`loc`), and scale (`scale`). These parameters can either be fixed or learned during training. The class also supports random number generation with an optional seed for reproducibility. Parameters ---------- df : int or float Degrees of freedom for the Student's t-distribution. Lower values result in heavier tails, making it more robust to outliers. loc : int, float, np.ndarray, or Tensor, optional The location parameter (mean) of the distribution. Default is 0.0. scale : int, float, np.ndarray, or Tensor, optional The scale parameter (standard deviation) of the distribution. Default is 1.0. trainable_parameters : bool, optional Whether to treat `loc` and `scale` as trainable parameters. Default is False. seed_generator : keras.random.SeedGenerator, optional A Keras seed generator for reproducible random sampling. If None, a new seed generator is created. Default is None. **kwargs Additional keyword arguments passed to the base `Distribution` class. """ super().__init__(**kwargs) self.df = df self.loc = loc self.scale = scale self.trainable_parameters = trainable_parameters self.seed_generator = seed_generator or keras.random.SeedGenerator() self.log_normalization_constant = None self.dim = None self._loc = None self._scale = None
[docs] def build(self, input_shape: Shape) -> None: if self.built: return self.dim = int(input_shape[-1]) # convert to tensor and broadcast if necessary self.loc = ops.cast(ops.broadcast_to(self.loc, (self.dim,)), "float32") self.scale = ops.cast(ops.broadcast_to(self.scale, (self.dim,)), "float32") self.log_normalization_constant = ( -0.5 * self.dim * math.log(self.df) - 0.5 * self.dim * math.log(math.pi) - math.lgamma(0.5 * self.df) + math.lgamma(0.5 * (self.df + self.dim)) - ops.sum(keras.ops.log(self.scale)) ) if self.trainable_parameters: self._loc = self.add_weight( shape=ops.shape(self.loc), initializer=keras.initializers.get(self.loc), dtype="float32", trainable=True ) self._scale = self.add_weight( shape=ops.shape(self.scale), initializer=keras.initializers.get(self.scale), dtype="float32", trainable=True, ) else: self._loc = self.loc self._scale = self.scale
[docs] def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor: mahalanobis_term = ops.sum((samples - self._loc) ** 2 / self._scale**2, axis=-1) result = -0.5 * (self.df + self.dim) * ops.log1p(mahalanobis_term / self.df) if normalize: result += self.log_normalization_constant return result
[docs] @allow_batch_size def sample(self, batch_shape: Shape) -> Tensor: # As of writing this code, keras does not support the chi-square distribution # nor does it support a scale or rate parameter in Gamma. Hence, we use the relation: # chi-square(df) = Gamma(shape = 0.5 * df, scale = 2) = Gamma(shape = 0.5 * df, scale = 1) * 2 chi2_samples = keras.random.gamma(batch_shape, alpha=0.5 * self.df, seed=self.seed_generator) * 2.0 # The chi-quare samples need to be repeated across self.dim # since for each element of batch_shape only one sample is created. chi2_samples = expand_tile(chi2_samples, n=self.dim, axis=-1) normal_samples = keras.random.normal(batch_shape + (self.dim,), seed=self.seed_generator) return self._loc + self._scale * normal_samples * ops.sqrt(self.df / chi2_samples)
[docs] def get_config(self): base_config = super().get_config() config = { "df": self.df, "loc": self.loc, "scale": self.scale, "trainable_parameters": self.trainable_parameters, "seed_generator": self.seed_generator, } return base_config | serialize(config)