Source code for bayesflow.networks.mlp.mlp

from collections.abc import Sequence
from typing import Literal

import keras
from keras.saving import register_keras_serializable as serializable

from bayesflow.types import Tensor
from bayesflow.utils import keras_kwargs
from .hidden_block import ConfigurableHiddenBlock


[docs] @serializable(package="bayesflow.networks") class MLP(keras.Layer): """ Implements a simple configurable MLP with optional residual connections and dropout. If used in conjunction with a coupling net, a diffusion model, or a flow matching model, it assumes that the input and conditions are already concatenated (i.e., this is a single-input model). """ def __init__( self, widths: Sequence[int] = (256, 256), *, activation: str = "mish", kernel_initializer: str = "he_normal", residual: bool = False, dropout: Literal[0, None] | float = 0.05, spectral_normalization: bool = False, **kwargs, ): """ Implements a flexible multi-layer perceptron (MLP) with optional residual connections, dropout, and spectral normalization. This MLP can be used as a general-purpose feature extractor or function approximator,supporting configurable depth, width, activation functions, and weight initializations. If `residual` is enabled, each layer includes a skip connection for improved gradient flow. The model also supports dropout for regularization and spectral normalization for stability in learning smooth functions. The architecture can be specified either via an explicit sequence of layer widths (`widths`) or by defining a fixed depth and width (`depth` and `width`). Parameters ---------- widths : Sequence[int], optional Defines the number of hidden units per layer, as well as the number of layers to be used. activation : str, optional Activation function applied in the hidden layers, such as "mish". Default is "mish". kernel_initializer : str, optional Initialization strategy for kernel weights, such as "he_normal". Default is "he_normal". residual : bool, optional Whether to use residual connections for improved training stability. Default is False. dropout : float or None, optional Dropout rate applied within the MLP layers for regularization. Default is 0.05. spectral_normalization : bool, optional Whether to apply spectral normalization to stabilize training. Default is False. **kwargs Additional keyword arguments passed to the Keras layer initialization. Raises ------ ValueError If both `widths` and (`depth`, `width`) are provided """ super().__init__(**keras_kwargs(kwargs)) self.res_blocks = [] for width in widths: self.res_blocks.append( ConfigurableHiddenBlock( units=width, activation=activation, kernel_initializer=kernel_initializer, residual=residual, dropout=dropout, spectral_normalization=spectral_normalization, ) )
[docs] def build(self, input_shape): for layer in self.res_blocks: layer.build(input_shape) input_shape = layer.compute_output_shape(input_shape)
[docs] def call(self, x: Tensor, training: bool = False, **kwargs) -> Tensor: for layer in self.res_blocks: x = layer(x, training=training) return x
[docs] def compute_output_shape(self, input_shape): for layer in self.res_blocks: input_shape = layer.compute_output_shape(input_shape) return input_shape