Source code for bayesflow.diagnostics.plots.calibration_ecdf

from collections.abc import Mapping, Sequence

import numpy as np
import matplotlib.pyplot as plt

from ...utils.plot_utils import prepare_plot_data, add_titles_and_labels, prettify_subplots
from ...utils.ecdf import simultaneous_ecdf_bands
from ...utils.ecdf.ranks import fractional_ranks, distance_ranks


[docs] def calibration_ecdf( estimates: Mapping[str, np.ndarray] | np.ndarray, targets: Mapping[str, np.ndarray] | np.ndarray, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, difference: bool = False, stacked: bool = False, rank_type: str | np.ndarray = "fractional", figsize: Sequence[float] = None, label_fontsize: int = 16, legend_fontsize: int = 14, legend_location: str = "upper right", title_fontsize: int = 18, tick_fontsize: int = 12, rank_ecdf_color: str = "#132a70", fill_color: str = "grey", num_row: int = None, num_col: int = None, **kwargs, ) -> plt.Figure: """ Creates the empirical CDFs for each marginal rank distribution and plots it against a uniform ECDF. ECDF simultaneous bands are drawn using simulations from the uniform, as proposed by [1]. For models with many parameters, use `stacked=True` to obtain an idea of the overall calibration of a posterior approximator. To compute ranks based on the Euclidean distance to the origin or a reference, use `rank_type='distance'` (and pass a reference array, respectively). This can be used to check the joint calibration of the posterior approximator and might show potential biases in the posterior approximation which are not detected by the fractional ranks (e.g., when the prior equals the posterior). This is motivated by [2]. [1] Säilynoja, T., Bürkner, P. C., & Vehtari, A. (2022). Graphical test for discrete uniformity and its applications in goodness-of-fit evaluation and multiple sample comparison. Statistics and Computing, 32(2), 1-21. https://arxiv.org/abs/2103.10522 [2] Lemos, Pablo, et al. "Sampling-based accuracy testing of posterior estimators for general inference." International Conference on Machine Learning. PMLR, 2023. https://proceedings.mlr.press/v202/lemos23a.html Parameters ---------- estimates : np.ndarray of shape (n_data_sets, n_post_draws, n_params) The posterior draws obtained from n_data_sets targets : np.ndarray of shape (n_data_sets, n_params) The prior draws obtained for generating n_data_sets difference : bool, optional, default: False If `True`, plots the ECDF difference. Enables a more dynamic visualization range. stacked : bool, optional, default: False If `True`, all ECDFs will be plotted on the same plot. If `False`, each ECDF will have its own subplot, similar to the behavior of `calibration_histogram`. rank_type : str, optional, default: 'fractional' If `fractional` (default), the ranks are computed as the fraction of posterior samples that are smaller than the prior. If `distance`, the ranks are computed as the fraction of posterior samples that are closer to a reference points (default here is the origin). You can pass a reference array in the same shape as the `estimates` array by setting `targets` in the ``ranks_kwargs``. This is motivated by [2]. variable_keys : list or None, optional, default: None Select keys from the dictionaries provided in estimates and targets. By default, select all keys. variable_names : list or None, optional, default: None The parameter names for nice plot titles. Inferred if None. Only relevant if `stacked=False`. figsize : tuple or None, optional, default: None The figure size passed to the matplotlib constructor. Inferred if None. label_fontsize : int, optional, default: 16 The font size of the y-label and y-label texts legend_fontsize : int, optional, default: 14 The font size of the legend text title_fontsize : int, optional, default: 18 The font size of the title text. Only relevant if `stacked=False` tick_fontsize : int, optional, default: 12 The font size of the axis ticklabels rank_ecdf_color : str, optional, default: '#a34f4f' The color to use for the rank ECDFs fill_color : str, optional, default: 'grey' The color of the fill arguments. num_row : int, optional, default: None The number of rows for the subplots. Dynamically determined if None. num_col : int, optional, default: None The number of columns for the subplots. Dynamically determined if None. **kwargs : dict, optional, default: {} Keyword arguments can be passed to control the behavior of ECDF simultaneous band computation through the ``ecdf_bands_kwargs`` dictionary. See `simultaneous_ecdf_bands` for keyword arguments. Moreover, additional keyword arguments can be passed to control the behavior of the rank computation through the ``ranks_kwargs`` dictionary. Returns ------- f : plt.Figure - the figure instance for optional saving Raises ------ ShapeError If there is a deviation form the expected shapes of `estimates` and `targets`. ValueError If an unknown `rank_type` is passed. """ plot_data = prepare_plot_data( estimates=estimates, targets=targets, variable_keys=variable_keys, variable_names=variable_names, num_col=num_col, num_row=num_row, figsize=figsize, stacked=stacked, ) estimates = plot_data.pop("estimates") targets = plot_data.pop("targets") if rank_type == "fractional": # Compute fractional ranks ranks = fractional_ranks(estimates, targets) elif rank_type == "distance": # Compute ranks based on distance to the origin ranks = distance_ranks(estimates, targets, stacked=stacked, **kwargs.pop("ranks_kwargs", {})) else: raise ValueError(f"Unknown rank type: {rank_type}. Use 'fractional' or 'distance'.") # Plot individual ecdf of parameters for j in range(ranks.shape[-1]): xx = np.repeat(np.sort(ranks[:, j]), 2) xx = np.pad(xx, (1, 1), constant_values=(0, 1)) yy = np.linspace(0, 1, num=xx.shape[-1] // 2) yy = np.repeat(yy, 2) # Difference, if specified if difference: yy -= xx if stacked: if j == 0: if not isinstance(plot_data["axes"], np.ndarray): plot_data["axes"] = np.array([plot_data["axes"]]) # in case of single axis plot_data["axes"][0].plot(xx, yy, color=rank_ecdf_color, alpha=0.95, label="Rank ECDFs") else: plot_data["axes"][0].plot(xx, yy, color=rank_ecdf_color, alpha=0.95) else: plot_data["axes"].flat[j].plot(xx, yy, color=rank_ecdf_color, alpha=0.95, label="Rank ECDF") # Compute uniform ECDF and bands alpha, z, L, U = simultaneous_ecdf_bands(estimates.shape[0], **kwargs.pop("ecdf_bands_kwargs", {})) # Difference, if specified if difference: L -= z U -= z ylab = "ECDF Difference" else: ylab = "ECDF" # Add simultaneous bounds if not stacked: titles = plot_data["variable_names"] elif rank_type in ["distance", "random"]: titles = ["Joint ECDFs"] else: titles = ["Stacked ECDFs"] for ax, title in zip(plot_data["axes"].flat, titles): ax.fill_between(z, L, U, color=fill_color, alpha=0.2, label=rf"{int((1 - alpha) * 100)}$\%$ Confidence Bands") ax.legend(fontsize=legend_fontsize, loc=legend_location) ax.set_title(title, fontsize=title_fontsize) prettify_subplots(plot_data["axes"], num_subplots=plot_data["num_variables"], tick_fontsize=tick_fontsize) add_titles_and_labels( plot_data["axes"], plot_data["num_row"], plot_data["num_col"], xlabel=f"{rank_type.capitalize()} rank statistic", ylabel=ylab, label_fontsize=label_fontsize, ) plot_data["fig"].tight_layout() return plot_data["fig"]