Source code for bayesflow.diagnostics.plots.calibration_ecdf

from collections.abc import Callable, Mapping, Sequence

import numpy as np
import keras
import matplotlib.pyplot as plt

from ...utils.plot_utils import prepare_plot_data, add_titles_and_labels, prettify_subplots
from ...utils.ecdf import simultaneous_ecdf_bands
from ...utils.ecdf.ranks import fractional_ranks, distance_ranks


[docs] def calibration_ecdf( estimates: Mapping[str, np.ndarray] | np.ndarray, targets: Mapping[str, np.ndarray] | np.ndarray, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, test_quantities: dict[str, Callable] = None, difference: bool = False, stacked: bool = False, rank_type: str | np.ndarray = "fractional", figsize: Sequence[float] = None, label_fontsize: int = 16, legend_fontsize: int = 14, legend_location: str = "upper right", title_fontsize: int = 18, tick_fontsize: int = 12, rank_ecdf_color: str = "#132a70", fill_color: str = "grey", num_row: int = None, num_col: int = None, **kwargs, ) -> plt.Figure: """ Creates the empirical CDFs for each marginal rank distribution and plots it against a uniform ECDF. ECDF simultaneous bands are drawn using simulations from the uniform, as proposed by [1]. For models with many parameters, use `stacked=True` to obtain an idea of the overall calibration of a posterior approximator. To compute ranks based on the Euclidean distance to the origin or a reference, use `rank_type='distance'` (and pass a reference array, respectively). This can be used to check the joint calibration of the posterior approximator and might show potential biases in the posterior approximation which are not detected by the fractional ranks (e.g., when the prior equals the posterior). This is motivated by [2]. [1] Säilynoja, T., Bürkner, P. C., & Vehtari, A. (2022). Graphical test for discrete uniformity and its applications in goodness-of-fit evaluation and multiple sample comparison. Statistics and Computing, 32(2), 1-21. https://arxiv.org/abs/2103.10522 [2] Lemos, Pablo, et al. "Sampling-based accuracy testing of posterior estimators for general inference." International Conference on Machine Learning. PMLR, 2023. https://proceedings.mlr.press/v202/lemos23a.html Parameters ---------- estimates : np.ndarray of shape (n_data_sets, n_post_draws, n_params) The posterior draws obtained from n_data_sets targets : np.ndarray of shape (n_data_sets, n_params) The prior draws obtained for generating n_data_sets difference : bool, optional, default: False If `True`, plots the ECDF difference. Enables a more dynamic visualization range. stacked : bool, optional, default: False If `True`, all ECDFs will be plotted on the same plot. If `False`, each ECDF will have its own subplot, similar to the behavior of `calibration_histogram`. rank_type : str, optional, default: 'fractional' If `fractional` (default), the ranks are computed as the fraction of posterior samples that are smaller than the prior. If `distance`, the ranks are computed as the fraction of posterior samples that are closer to a reference points (default here is the origin). You can pass a reference array in the same shape as the `estimates` array by setting `targets` in the ``ranks_kwargs``. This is motivated by [2]. variable_keys : list or None, optional, default: None Select keys from the dictionaries provided in estimates and targets. By default, select all keys. variable_names : list or None, optional, default: None The parameter names for nice plot titles. Inferred if None. Only relevant if `stacked=False`. test_quantities : dict or None, optional, default: None A dict that maps plot titles to functions that compute test quantities based on estimate/target draws. The dict keys are automatically added to ``variable_keys`` and ``variable_names``. Test quantity functions are expected to accept a dict of draws with shape ``(batch_size, ...)`` as the first (typically only) positional argument and return an NumPy array of shape ``(batch_size,)``. The functions do not have to deal with an additional sample dimension, as appropriate reshaping is done internally. figsize : tuple or None, optional, default: None The figure size passed to the matplotlib constructor. Inferred if None. label_fontsize : int, optional, default: 16 The font size of the y-label and y-label texts legend_fontsize : int, optional, default: 14 The font size of the legend text title_fontsize : int, optional, default: 18 The font size of the title text. Only relevant if `stacked=False` tick_fontsize : int, optional, default: 12 The font size of the axis ticklabels rank_ecdf_color : str, optional, default: '#a34f4f' The color to use for the rank ECDFs fill_color : str, optional, default: 'grey' The color of the fill arguments. num_row : int, optional, default: None The number of rows for the subplots. Dynamically determined if None. num_col : int, optional, default: None The number of columns for the subplots. Dynamically determined if None. **kwargs : dict, optional, default: {} Keyword arguments can be passed to control the behavior of ECDF simultaneous band computation through the ``ecdf_bands_kwargs`` dictionary. See `simultaneous_ecdf_bands` for keyword arguments. Moreover, additional keyword arguments can be passed to control the behavior of the rank computation through the ``ranks_kwargs`` dictionary. Returns ------- f : plt.Figure - the figure instance for optional saving Raises ------ ShapeError If there is a deviation form the expected shapes of `estimates` and `targets`. ValueError If an unknown `rank_type` is passed. """ # Optionally, compute and prepend test quantities from draws if test_quantities is not None: test_quantities_estimates = {} test_quantities_targets = {} for key, test_quantity_fn in test_quantities.items(): # Apply test_quantity_func to ground-truths tq_targets = test_quantity_fn(data=targets) test_quantities_targets[key] = np.expand_dims(tq_targets, axis=1) # # Flatten estimates for batch processing in test_quantity_fn, apply function, and restore shape num_conditions, num_samples = next(iter(estimates.values())).shape[:2] flattened_estimates = keras.tree.map_structure(lambda t: np.reshape(t, (-1, *t.shape[2:])), estimates) flat_tq_estimates = test_quantity_fn(data=flattened_estimates) test_quantities_estimates[key] = np.reshape(flat_tq_estimates, (num_conditions, num_samples, 1)) # Add custom test quantities to variable keys and names for plotting # keys and names are set to the test_quantities dict keys test_quantities_names = list(test_quantities.keys()) if variable_keys is None: variable_keys = list(estimates.keys()) if isinstance(variable_names, list): variable_names = test_quantities_names + variable_names variable_keys = test_quantities_names + variable_keys estimates = test_quantities_estimates | estimates targets = test_quantities_targets | targets plot_data = prepare_plot_data( estimates=estimates, targets=targets, variable_keys=variable_keys, variable_names=variable_names, num_col=num_col, num_row=num_row, figsize=figsize, stacked=stacked, ) estimates = plot_data.pop("estimates") targets = plot_data.pop("targets") if rank_type == "fractional": # Compute fractional ranks ranks = fractional_ranks(estimates, targets) elif rank_type == "distance": # Compute ranks based on distance to the origin ranks = distance_ranks(estimates, targets, stacked=stacked, **kwargs.pop("ranks_kwargs", {})) else: raise ValueError(f"Unknown rank type: {rank_type}. Use 'fractional' or 'distance'.") # Plot individual ecdf of parameters for j in range(ranks.shape[-1]): xx = np.repeat(np.sort(ranks[:, j]), 2) xx = np.pad(xx, (1, 1), constant_values=(0, 1)) yy = np.linspace(0, 1, num=xx.shape[-1] // 2) yy = np.repeat(yy, 2) # Difference, if specified if difference: yy -= xx if stacked: if j == 0: if not isinstance(plot_data["axes"], np.ndarray): plot_data["axes"] = np.array([plot_data["axes"]]) # in case of single axis plot_data["axes"][0].plot(xx, yy, color=rank_ecdf_color, alpha=0.95, label="Rank ECDFs") else: plot_data["axes"][0].plot(xx, yy, color=rank_ecdf_color, alpha=0.95) else: plot_data["axes"].flat[j].plot(xx, yy, color=rank_ecdf_color, alpha=0.95, label="Rank ECDF") # Compute uniform ECDF and bands alpha, z, L, U = simultaneous_ecdf_bands(estimates.shape[0], **kwargs.pop("ecdf_bands_kwargs", {})) # Difference, if specified if difference: L -= z U -= z ylab = "ECDF Difference" else: ylab = "ECDF" # Add simultaneous bounds if not stacked: titles = plot_data["variable_names"] elif rank_type in ["distance", "random"]: titles = ["Joint ECDFs"] else: titles = ["Stacked ECDFs"] for ax, title in zip(plot_data["axes"].flat, titles): ax.fill_between(z, L, U, color=fill_color, alpha=0.2, label=rf"{int((1 - alpha) * 100)}$\%$ Confidence Bands") ax.legend(fontsize=legend_fontsize, loc=legend_location) ax.set_title(title, fontsize=title_fontsize) prettify_subplots(plot_data["axes"], num_subplots=plot_data["num_variables"], tick_fontsize=tick_fontsize) add_titles_and_labels( plot_data["axes"], plot_data["num_row"], plot_data["num_col"], xlabel=f"{rank_type.capitalize()} rank statistic", ylabel=ylab, label_fontsize=label_fontsize, ) plot_data["fig"].tight_layout() return plot_data["fig"]