Source code for bayesflow.utils.hparam_utils
import numpy as np
from .io import format_bytes, parse_bytes
from . import logging
from .tensor_utils import size_of
[docs]
def find_batch_size(sample, memory_budget: str = "auto", min: int = 4, max: int = 1024) -> int:
"""Returns an estimation of an optimal batch size based on memory budget and sample memory footprint.
:param sample: Any nested structure of tensors, representing a single sample.
:param memory_budget: The maximum available memory for a single batch.
:param min: The minimum batch size.
:param max: The maximum batch size.
"""
if memory_budget == "auto":
memory_budget = find_memory_budget()
elif isinstance(memory_budget, str):
memory_budget = parse_bytes(memory_budget)
# find the size of one sample
sample_memory = size_of(sample)
logging.info(f"Estimating memory footprint of one sample at {format_bytes(sample_memory, precision=1)}.")
# use a conservative (low) estimate for the optimal batch size
batch_size = memory_budget / (4 * sample_memory)
if batch_size < 16:
logging.warning(
"Memory budget is very small compared to sample size. You may need to accumulate gradients over "
f"multiple batches using `gradient_accumulation_steps` in the optimizer. We recommend accumulating "
f"at least {int(32 / batch_size)} steps."
)
# limit estimate to sensible range
batch_size = int(np.clip(batch_size, min, max))
return batch_size
[docs]
def find_memory_budget(device_type: str = None) -> int:
"""Returns an estimation of available memory in bytes for the given device type."""
# keras utilities for device information are not very mature yet
raise NotImplementedError("Automatic memory budget is not yet supported. Please pass an explicit value.")