# Copyright (c) 2022 The BayesFlow Developers
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import numpy as np
import tensorflow as tf
from bayesflow import default_settings
from bayesflow.coupling_networks import CouplingLayer
from bayesflow.helper_functions import build_meta_dict
from bayesflow.helper_networks import MCDropout
[docs]
class InvertibleNetwork(tf.keras.Model):
"""Implements a chain of conditional invertible coupling layers for conditional density estimation."""
available_designs = ("affine", "spline", "interleaved")
[docs]
def __init__(
self,
num_params,
num_coupling_layers=6,
coupling_design="affine",
coupling_settings=None,
permutation="fixed",
use_act_norm=True,
act_norm_init=None,
use_soft_flow=False,
soft_flow_bounds=(1e-3, 5e-2),
**kwargs,
):
"""Creates a chain of coupling layers with optional `ActNorm` layers in-between. Implements ideas from:
[1] Radev, S. T., Mertens, U. K., Voss, A., Ardizzone, L., & Köthe, U. (2020).
BayesFlow: Learning complex stochastic models with invertible neural networks.
IEEE Transactions on Neural Networks and Learning Systems.
[2] Kim, H., Lee, H., Kang, W. H., Lee, J. Y., & Kim, N. S. (2020).
Softflow: Probabilistic framework for normalizing flow on manifolds.
Advances in Neural Information Processing Systems, 33, 16388-16397.
[3] Ardizzone, L., Kruse, J., Lüth, C., Bracher, N., Rother, C., & Köthe, U. (2020).
Conditional invertible neural networks for diverse image-to-image translation.
In DAGM German Conference on Pattern Recognition (pp. 373-387). Springer, Cham.
[4] Durkan, C., Bekasov, A., Murray, I., & Papamakarios, G. (2019).
Neural spline flows. Advances in Neural Information Processing Systems, 32.
[5] Kingma, D. P., & Dhariwal, P. (2018).
Glow: Generative flow with invertible 1x1 convolutions.
Advances in Neural Information Processing Systems, 31.
Parameters
----------
num_params : int
The number of parameters to perform inference on. Equivalently, the dimensionality of the
latent space.
num_coupling_layers : int, optional, default: 6
The number of coupling layers to use as defined in [1] and [2]. In general, more coupling layers
will give you more expressive power, but will be slower and may need more simulations to train.
Typically, between 4 and 10 coupling layers should suffice for most applications.
coupling_design : str or callable, optional, default: 'affine'
The type of internal coupling network to use. Must be in ['affine', 'spline', 'interleaved'].
The first corresponds to the architecture in [3, 5], the second corresponds to a modified
version of [4]. The third option will alternate between affine and spline layers, for example,
if num_coupling_layers == 3, the chain will consist of ["affine", "spline", "affine"] layers.
In general, spline couplings run slower than affine couplings, but require fewer coupling
layers. Spline couplings may work best with complex (e.g., multimodal) low-dimensional
problems. The difference will become less and less pronounced as we move to higher dimensions.
Note: This is the first setting you may want to change, if inference does not work as expected!
coupling_settings : dict or None, optional, default: None
The coupling network settings to pass to the internal coupling layers. See ``default_settings``
for possible settings. Below are two examples.
Examples:
1. If using ``coupling_design='affine``, you may want to turn on Monte Carlo Dropout and
use an ELU activation function for the internal networks. You can do this by providing:
``
coupling_settings={
'mc_dropout' : True,
'dense_args' : dict(units=128, activation='elu')
}
``
2. If using ``coupling_design='spline'``, you may want to change the number of learnable bins
and increase the dropout probability (i.e., more regularization to guard against overfitting):
``
coupling_settings={
'dropout_prob': 0.2,
'bins' : 32,
}
``
permutation : str or None, optional, default: 'fixed'
Whether to use permutations between coupling layers. Highly recommended if ``num_coupling_layers > 1``
Important: Must be in ['fixed', 'learnable', None]
use_act_norm : bool, optional, default: True
Whether to use activation normalization after each coupling layer, as used in [5].
Recommended to keep default.
act_norm_init : np.ndarray of shape (num_simulations, num_params) or None, optional, default: None
Optional data-dependent initialization for the internal ``ActNorm`` layers, as done in [5]. Could be helpful
for deep invertible networks.
use_soft_flow : bool, optional, default: False
Whether to perturb the target distribution (i.e., parameters) with small amount of independent
noise, as done in [2]. Could be helpful for degenerate distributions.
soft_flow_bounds : tuple(float, float), optional, default: (1e-3, 5e-2)
The bounds of the continuous uniform distribution from which the noise scale would be sampled
at each iteration. Only relevant when ``use_soft_flow=True``.
**kwargs : dict
Optional keyword arguments (e.g., name) passed to the tf.keras.Model __init__ method.
"""
super().__init__(**kwargs)
layer_settings = dict(
latent_dim=num_params,
permutation=permutation,
use_act_norm=use_act_norm,
act_norm_init=act_norm_init,
)
self.coupling_layers = self._create_coupling_layers(
layer_settings, coupling_settings, coupling_design, num_coupling_layers
)
self.soft_flow = use_soft_flow
self.soft_low = soft_flow_bounds[0]
self.soft_high = soft_flow_bounds[1]
self.permutation = permutation
self.use_act_norm = use_act_norm
self.latent_dim = num_params
[docs]
def call(self, targets, condition, inverse=False, **kwargs):
"""Performs one pass through an invertible chain (either inverse or forward).
Parameters
----------
targets : tf.Tensor
The estimation quantities of interest, shape (batch_size, ...)
condition : tf.Tensor
The conditional data x, shape (batch_size, summary_dim)
inverse : bool, default: False
Flag indicating whether to run the chain forward or backwards
Returns
-------
(z, log_det_J) : tuple(tf.Tensor, tf.Tensor)
If inverse=False: The transformed input and the corresponding Jacobian of the transformation,
v shape: (batch_size, ...), log_det_J shape: (batch_size, ...)
target : tf.Tensor
If inverse=True: The transformed out, shape (batch_size, ...)
Notes
-----
If ``inverse=False``, the return is ``(z, log_det_J)``.\n
If ``inverse=True``, the return is ``target``.
"""
if inverse:
return self.inverse(targets, condition, **kwargs)
return self.forward(targets, condition, **kwargs)
[docs]
def forward(self, targets, condition, **kwargs):
"""Performs a forward pass through the chain."""
# Add noise to target if using SoftFlow, use explicitly
# not in call(), since methods are public
if self.soft_flow and condition is not None:
# Extract shapes of tensors
target_shape = tf.shape(targets)
condition_shape = tf.shape(condition)
# Needs to be concatinable with condition
if len(condition_shape) == 2:
shape_scale = (condition_shape[0], 1)
else:
shape_scale = (condition_shape[0], condition_shape[1], 1)
# Case training mode
if kwargs.get("training"):
noise_scale = tf.random.uniform(shape=shape_scale, minval=self.soft_low, maxval=self.soft_high)
# Case inference mode
else:
noise_scale = tf.zeros(shape=shape_scale) + self.soft_low
# Perturb data with noise (will broadcast to all dimensions)
if len(shape_scale) == 2 and len(target_shape) == 3:
targets += tf.expand_dims(noise_scale, axis=1) * tf.random.normal(shape=target_shape)
else:
targets += noise_scale * tf.random.normal(shape=target_shape)
# Augment condition with noise scale variate
condition = tf.concat((condition, noise_scale), axis=-1)
z = targets
log_det_Js = []
for layer in self.coupling_layers:
z, log_det_J = layer(z, condition, **kwargs)
log_det_Js.append(log_det_J)
# Sum Jacobian determinants for all layers (coupling blocks) to obtain total Jacobian.
log_det_J = tf.add_n(log_det_Js)
return z, log_det_J
[docs]
def inverse(self, z, condition, **kwargs):
"""Performs a reverse pass through the chain. Assumes that it is only used
in inference mode, so ``**kwargs`` contains ``training=False``."""
# Add noise to target if using SoftFlow, use explicitly
# not in call(), since methods are public
if self.soft_flow and condition is not None:
# Needs to be concatinable with condition
shape_scale = (
(condition.shape[0], 1) if len(condition.shape) == 2 else (condition.shape[0], condition.shape[1], 1)
)
noise_scale = tf.zeros(shape=shape_scale) + 2.0 * self.soft_low
# Augment condition with noise scale variate
condition = tf.concat((condition, noise_scale), axis=-1)
target = z
for layer in reversed(self.coupling_layers):
target = layer(target, condition, inverse=True, **kwargs)
return target
@staticmethod
def _create_coupling_layers(settings, coupling_settings, coupling_design, num_coupling_layers):
"""Helper method to create a list of coupling layers. Takes care
of the different options for coupling design.
"""
if coupling_design not in InvertibleNetwork.available_designs:
raise NotImplementedError("Coupling design should be one of", InvertibleNetwork.available_designs)
# Case affine or spline
if coupling_design != "interleaved":
design = coupling_design
_coupling_settings = coupling_settings
coupling_layers = [
CouplingLayer(coupling_design=design, coupling_settings=_coupling_settings, **settings)
for _ in range(num_coupling_layers)
]
# Case interleaved, starts with affine
else:
coupling_layers = []
designs = (["affine", "spline"] * int(np.ceil(num_coupling_layers / 2)))[:num_coupling_layers]
for design in designs:
# Fail gently, if neither None, nor a dictionary with keys ("spline", "affine")
_coupling_settings = None if coupling_settings is None else coupling_settings[design]
layer = CouplingLayer(coupling_design=design, coupling_settings=_coupling_settings, **settings)
coupling_layers.append(layer)
return coupling_layers
[docs]
@classmethod
def create_config(cls, **kwargs):
""" "Used to create the settings dictionary for the internal networks of the invertible
network. Will fill in missing"""
settings = build_meta_dict(user_dict=kwargs, default_setting=default_settings.DEFAULT_SETTING_INVERTIBLE_NET)
return settings
[docs]
class EvidentialNetwork(tf.keras.Model):
"""Implements a network whose outputs are the concentration parameters of a Dirichlet density.
Follows ideas from:
[1] Radev, S. T., D'Alessandro, M., Mertens, U. K., Voss, A., Köthe, U., & Bürkner, P. C. (2021).
Amortized Bayesian model comparison with evidential deep learning.
IEEE Transactions on Neural Networks and Learning Systems.
[2] Sensoy, M., Kaplan, L., & Kandemir, M. (2018).
Evidential deep learning to quantify classification uncertainty.
Advances in neural information processing systems, 31.
"""
[docs]
def __init__(self, num_models, dense_args=None, num_dense=3, output_activation="softplus", **kwargs):
"""Creates an instance of an evidential network for amortized model comparison.
Parameters
----------
num_models : int
The number of candidate (competing models) for the comparison scenario.
dense_args : dict or None, optional, default: None
The arguments for a tf.keras.layers.Dense layer. If None, defaults will be used.
num_dense : int, optional, default: 3
The number of dense layers for the main network part.
output_activation : str or callable, optional, default: 'softplus'
The activation function to use for the network outputs.
Important: needs to have positive outputs.
**kwargs : dict, optional, default: {}
Optional keyword arguments (e.g., name) passed to the tf.keras.Model __init__ method.
"""
super().__init__(**kwargs)
if dense_args is None:
dense_args = default_settings.DEFAULT_SETTING_DENSE_EVIDENTIAL
# A network to increase representation power
self.dense = tf.keras.Sequential([tf.keras.layers.Dense(**dense_args) for _ in range(num_dense)])
# The layer to output model evidences
self.alpha_layer = tf.keras.layers.Dense(
num_models,
activation=output_activation,
**{k: v for k, v in dense_args.items() if k != "units" and k != "activation"},
)
self.num_models = num_models
[docs]
def call(self, condition, **kwargs):
"""Computes evidences for model comparison given a batch of data and optional concatenated context,
typically passed through a summayr network.
Parameters
----------
condition : tf.Tensor of shape (batch_size, ...)
The input variables used for determining ``p(model | condition)``
Returns
-------
evidence : tf.Tensor of shape (batch_size, num_models) -- the learned model evidences
"""
return self.evidence(condition, **kwargs)
[docs]
@tf.function
def evidence(self, condition, **kwargs):
rep = self.dense(condition, **kwargs)
alpha = self.alpha_layer(rep, **kwargs)
evidence = alpha + 1.0
return evidence
[docs]
def sample(self, condition, n_samples, **kwargs):
"""Samples posterior model probabilities from the higher-order Dirichlet density.
Parameters
----------
condition : tf.Tensor
The summary of the observed (or simulated) data, shape (n_data_sets, ...)
n_samples : int
Number of samples to obtain from the approximate posterior
Returns
-------
pm_samples : tf.Tensor or np.array
The posterior draws from the Dirichlet distribution, shape (num_samples, num_batch, num_models)
"""
alpha = self.evidence(condition, **kwargs)
n_datasets = alpha.shape[0]
pm_samples = np.stack(
[np.default_rng().dirichlet(alpha[n, :], size=n_samples) for n in range(n_datasets)], axis=1
)
return pm_samples
[docs]
@classmethod
def create_config(cls, **kwargs):
""" "Used to create the settings dictionary for the internal networks of the invertible
network. Will fill in missing"""
settings = build_meta_dict(user_dict=kwargs, default_setting=default_settings.DEFAULT_SETTING_EVIDENTIAL_NET)
return settings
[docs]
class PMPNetwork(tf.keras.Model):
"""Implements a network that approximates posterior model probabilities (PMPs) as employed in [1].
[1] Elsemüller, L., Schnuerch, M., Bürkner, P. C., & Radev, S. T. (2023).
A Deep Learning Method for Comparing Bayesian Hierarchical Models.
arXiv preprint arXiv:2301.11873.
"""
[docs]
def __init__(
self,
num_models,
dense_args=None,
num_dense=3,
dropout=True,
mc_dropout=False,
dropout_prob=0.05,
output_activation=tf.nn.softmax,
**kwargs,
):
"""Creates an instance of a PMP network for amortized model comparison.
Parameters
----------
num_models : int
The number of candidate (competing models) for the comparison scenario.
dense_args : dict or None, optional, default: None
The arguments for a tf.keras.layers.Dense layer. If None, defaults will be used.
num_dense : int, optional, default: 3
The number of dense layers for the main network part.
dropout : bool, optional, default: True
Whether to use dropout in-between the hidden layers.
mc_dropout : bool, optional, default: False
Whether to use dropout Monte Carlo dropout (i.e., Bayesian approximation) during inference
dropout_prob : float in (0, 1), optional, default: 0.05
The dropout probability. Only has effecft if ``dropout=True`` or ``mc_dropout=True``
output_activation : callable, optional, default: tf.nn.softmax
The activation function to apply to the network outputs.
Important: Needs to have positive outputs and be bounded between 0 and 1.
**kwargs : dict, optional, default: {}
Optional keyword arguments (e.g., name) passed to the ``tf.keras.Model`` __init__ method.
"""
super().__init__(**kwargs)
# Pick default settings, if None provided
if dense_args is None:
dense_args = default_settings.DEFAULT_SETTING_DENSE_PMP
# Sequential model with optional (MC) Dropout
self.net = tf.keras.Sequential()
for _ in range(num_dense):
self.net.add(tf.keras.layers.Dense(**dense_args))
if mc_dropout:
self.net.add(MCDropout(dropout_prob))
elif dropout:
self.net.add(tf.keras.layers.Dropout(dropout_prob))
else:
pass
self.output_layer = tf.keras.layers.Dense(num_models)
self.output_activation = output_activation
self.num_models = num_models
[docs]
def call(self, condition, return_probs=True, **kwargs):
"""Forward pass through the network. Computes approximated PMPs given a batch of data
and optional concatenated context, typically passed through a summary network.
Parameters
----------
condition : tf.Tensor of shape (batch_size, ...)
The input variables used for determining ``p(model | condition)``
return_probs : bool, optional, default: True
Whether to return probabilities or logits (pre-activation, unnormalized)
Returns
-------
out : tf.Tensor of shape (batch_size, ..., num_models)
The approximated PMPs (post-activation) or logits (pre-activation)
"""
rep = self.net(condition, **kwargs)
logits = self.output_layer(rep, **kwargs)
if return_probs:
return self.output_activation(logits)
return logits
[docs]
def posterior_probs(self, condition, **kwargs):
"""Shortcut function to obtain posterior probabilities given a
condition tensor (e.g., summary statistics of data sets).
Parameters
----------
condition : tf.Tensor of shape (batch_size, ...)
The input variables used for determining ``p(model | condition)``
Returns
-------
out : tf.Tensor of shape (batch_size, ..., num_models)
The approximated PMPs
"""
return self(condition, return_probs=True, **kwargs)
[docs]
def logits(self, condition, **kwargs):
"""Shortcut function to obtain logits given a condition tensor
(e.g., summary statistics of data sets).
Parameters
----------
condition : tf.Tensor of shape (batch_size, ...)
The input variables used for determining ``p(model | condition)``
Returns
-------
out : tf.Tensor of shape (batch_size, ..., num_models)
The approximated PMPs
"""
return self(condition, return_probs=False, **kwargs)
[docs]
@classmethod
def create_config(cls, **kwargs):
"""Used to create the settings dictionary for the internal networks of the
network. Will fill in missing."""
settings = build_meta_dict(user_dict=kwargs, default_setting=default_settings.DEFAULT_SETTING_PMP_NET)
return settings