diff --git a/src/earthkit/meteo/score/__init__.py b/src/earthkit/meteo/score/__init__.py
index d389998..a233d3b 100644
--- a/src/earthkit/meteo/score/__init__.py
+++ b/src/earthkit/meteo/score/__init__.py
@@ -15,5 +15,6 @@
 planned to work with objects like *earthkit.data FieldLists* or *xarray DataSets*.
 """
 
+from .bootstrap import *  # noqa
 from .correlation import *  # noqa
 from .crps import *  # noqa
diff --git a/src/earthkit/meteo/score/array/__init__.py b/src/earthkit/meteo/score/array/__init__.py
index e231eef..a1a1d60 100644
--- a/src/earthkit/meteo/score/array/__init__.py
+++ b/src/earthkit/meteo/score/array/__init__.py
@@ -11,5 +11,6 @@
 Verification functions operating on numpy arrays.
 """
 
+from .bootstrap import *  # noqa
 from .correlation import *  # noqa
 from .crps import *  # noqa
diff --git a/src/earthkit/meteo/score/array/bootstrap.py b/src/earthkit/meteo/score/array/bootstrap.py
new file mode 100644
index 0000000..b71d3d0
--- /dev/null
+++ b/src/earthkit/meteo/score/array/bootstrap.py
@@ -0,0 +1,137 @@
+import random
+
+from earthkit.utils.array import array_namespace
+
+
+def iter_samples(x, *args, sample_axis=0, n_iter=100, n_samples=None, randrange=random.randrange):
+    """Iterate over resampled arrays for bootstrapping
+
+    Parameters
+    ----------
+    x, *args: array-like
+        Inputs to the wrapped function, sampled for bootstrapping. Must have
+        the same size along ``sample_axis``
+    sample_axis: int or list of int
+        Sample along this axis (either same for all or one per argument)
+    n_iter: int
+        Number of bootstrapping iterations
+    n_samples: int or None
+        Number of samples for each iteration. If None, use the number of
+        inputs (size of ``x`` along the sampling axis)
+    randrange: function (int -> int)
+        Random generator for integers: `randrange(n)` should return an
+        integer in `range(n)`
+
+    Yields
+    ------
+    tuple
+        Resampled arrays (one for each iteration)
+    """
+    args = (x,) + args
+    n_arrays = len(args)
+    if isinstance(sample_axis, int):
+        sample_axis = [sample_axis for _ in range(n_arrays)]
+    else:
+        assert len(sample_axis) == n_arrays, "sample_axis must have one element per input array"
+    xp = array_namespace(*args)
+    arrays = tuple((xp.asarray(arr), axis) for arr, axis in zip(args, sample_axis))
+    n_inputs = x.shape[sample_axis[0]]
+    assert all(
+        y.shape[axis] == n_inputs for y, axis in arrays
+    ), "Input arrays must have the same size along the sampling axis"
+    if n_samples is None:
+        n_samples = n_inputs
+    for _ in range(n_iter):
+        indices = [randrange(n_inputs) for _ in range(n_samples)]
+        sampled = tuple(xp.take(y, indices=indices, axis=axis) for y, axis in arrays)
+        yield sampled
+
+
+def resample(x, *args, sample_axis=0, out_axis=0, n_iter=100, n_samples=None, randrange=random.randrange):
+    """Resample arrays for bootstrapping
+
+    Parameters
+    ----------
+    x, *args: array-like
+        Inputs to the wrapped function, sampled for bootstrapping. Must have
+        the same size along ``sample_axis``
+    sample_axis: int or list of int
+        Sample along this axis (either same for all or one per argument)
+    out_axis: int
+        Stack samples along this axis
+    n_iter: int
+        Number of bootstrapping iterations
+    n_samples: int or None
+        Number of samples for each iteration. If None, use the number of
+        inputs (size of ``x`` along the sampling axis)
+    randrange: function (int -> int)
+        Random generator for integers: `randrange(n)` should return an
+        integer in `range(n)`
+
+    Returns
+    ------
+    tuple
+        Resampled arrays (one for each iteration)
+    """
+    xp = array_namespace(x, *args)
+    n_arrays = len(args) + 1
+    samples = [[] for _ in range(n_arrays)]
+    samples_it = iter_samples(
+        x, *args, sample_axis=sample_axis, n_iter=n_iter, n_samples=n_samples, randrange=randrange
+    )
+    for sample in samples_it:
+        for i in range(n_arrays):
+            samples[i].append(sample[i])
+    return tuple(xp.stack(sampled_arr, axis=out_axis) for sampled_arr in samples)
+
+
+def bootstrap(
+    func,
+    x,
+    *args,
+    sample_axis=0,
+    out_axis=0,
+    n_iter=100,
+    n_samples=None,
+    randrange=random.randrange,
+    **kwargs,
+):
+    """Run bootstrapping
+
+    Parameters
+    ----------
+    func: function ((array, ..., **kwargs) -> array)
+        Function to bootstrap
+    x, *args: array-like
+        Inputs to ``function``, sampled for bootstrapping. Must have the same
+        size along ``sample_axis``
+    sample_axis: int or list of int
+        Sample along this axis (either same for all or one per argument)
+    out_axis: int
+        Stack samples along this axis
+    n_iter: int
+        Number of bootstrapping iterations
+    n_samples: int or None
+        Number of samples for each iteration. If None, use the number of
+        inputs (size of ``x`` along the sampling axis)
+    randrange: function (int -> int)
+        Random generator for integers: `randrange(n)` should return an
+        integer in `range(n)`
+    **kwargs
+        Additional keyword arguments to ``func``
+
+    Returns
+    -------
+    array-like
+        Aggregated results of the bootstrapping process
+    """
+    xp = array_namespace(x, *args)
+    samples = iter_samples(
+        x,
+        *args,
+        sample_axis=sample_axis,
+        n_iter=n_iter,
+        n_samples=n_samples,
+        randrange=randrange,
+    )
+    return xp.stack([func(*sampled, **kwargs) for sampled in samples], axis=out_axis)
diff --git a/src/earthkit/meteo/score/bootstrap.py b/src/earthkit/meteo/score/bootstrap.py
new file mode 100644
index 0000000..cd64c89
--- /dev/null
+++ b/src/earthkit/meteo/score/bootstrap.py
@@ -0,0 +1,179 @@
+import functools
+import random
+from dataclasses import dataclass
+
+from earthkit.utils.array import array_namespace
+
+try:
+    import xarray as xr
+except ModuleNotFoundError:
+    xr = None
+
+from . import array
+
+
+def resample(x, *args, **kwargs):
+    if xr is not None and isinstance(x, xr.DataArray):
+        n_arrays = len(args) + 1
+        dim = kwargs.get("dim", None)
+        if dim is None:
+            raise TypeError("resample with xarray arguments requires 'dim'")
+        in_dims = [[dim] for _ in range(n_arrays)]
+        sample_dim = kwargs.get("sample_dim", "sample")
+        out_dims = [[sample_dim] for _ in range(n_arrays)]
+        return xr.apply_ufunc(
+            functools.partial(array.resample, sample_axis=-1, out_axis=-1, **kwargs),
+            x,
+            *args,
+            input_core_dims=in_dims,
+            output_core_dims=out_dims,
+        )
+
+    return array.resample(x, *args, **kwargs)
+
+
+def _bootstrap_xarray(
+    func,
+    *args,
+    dim=None,
+    sample_dim="sample",
+    n_iter=100,
+    n_samples=None,
+    randrange=random.randrange,
+    **kwargs,
+):
+    assert xr is not None
+    if dim is None:
+        raise TypeError("bootstrap with xarray arguments requires 'dim'")
+    n_inputs = args[0].sizes[dim]
+    assert all(
+        arr.sizes[dim] == n_inputs for arr in args
+    ), "Input arrays must have the same size along the sampling axis"
+    if n_samples is None:
+        n_samples = n_inputs
+    results = []
+    for _ in range(n_iter):
+        indices = [randrange(n_inputs) for _ in range(n_samples)]
+        sampled = tuple(arr.isel({dim: indices}) for arr in args)
+        results.append(func(*sampled, **kwargs))
+    return xr.concat(results, sample_dim)
+
+
+def bootstrap(func, x, *args, **kwargs):
+    if xr is not None and isinstance(x, xr.DataArray):
+        return _bootstrap_xarray(func, x, *args, **kwargs)
+    return array.bootstrap(func, x, *args, **kwargs)
+
+
+@dataclass
+class BootstrapResult:
+    """Container for the result of a bootstrapping process"""
+
+    #: number of bootstrapping iterations
+    n_iter: int
+
+    #: number of samples for each iteration
+    n_samples: int
+
+    #: return values for each iteration, first dimension is the iteration
+    results: object
+
+    def quantiles(self, threshold: float):
+        """Compute quantiles associated with the given threshold
+
+        The first dimension of the returned array has length 2 corresponding to
+        the low (``threshold``) and high (``1 - threshold``) quantiles
+        """
+        return self._xp.quantile(self.results, [threshold, 1.0 - threshold], axis=0)
+
+    def sig_mask(self, threshold: float, sig_value: float):
+        """Compute the significance mask for the given threshold and significant value"""
+        low, high = self.quantiles(threshold)
+        return self._xp.logical_or(high < sig_value, low > sig_value)
+
+    @property
+    def _xp(self):
+        return array_namespace(self.results)
+
+
+class Bootstrappable:
+    def __init__(self, func):
+        self.func = func
+
+    def __call__(self, *args, **kwargs):
+        return self.func(*args, **kwargs)
+
+    def bootstrap(
+        self,
+        x,
+        *args,
+        sample_axis=0,
+        n_iter=100,
+        n_samples=None,
+        randrange=random.randrange,
+        **kwargs,
+    ):
+        """Run bootstrapping
+
+        Parameters
+        ----------
+        x, *args: array-like
+            Inputs to the wrapped function, sampled for bootstrapping. Must have
+            the same size along ``sample_axis``
+        sample_axis: int or list of int
+            Sample along this axis (either same for all or one per argument)
+        n_iter: int
+            Number of bootstrapping iterations
+        n_samples: int or None
+            Number of samples for each iteration. If None, use the number of
+            inputs (size of ``x`` along the sampling axis)
+        randrange: function (int -> int)
+            Random generator for integers: `randrange(n)` should return an
+            integer in `range(n)`
+        **kwargs
+            Additional keyword arguments to the wrapped function
+
+        Returns
+        -------
+        BootstrapResult
+            Aggregated results of the bootstrapping process
+        """
+        results = bootstrap(
+            self.func,
+            x,
+            *args,
+            sample_axis=sample_axis,
+            n_iter=n_iter,
+            n_samples=n_samples,
+            randrange=randrange,
+        )
+        return BootstrapResult(n_iter, n_samples, results)
+
+
+def enable_bootstrap(func):
+    """Enable bootstrapping on a binary scoring function
+
+    The function will be wrapped in a callable object (see
+    :class:`Bootstrappable`). Calling the object will call the function
+    directly. The object will have a :meth:`Bootstrappable.bootstrap` method to
+    run bootstrapping.
+
+    Examples
+    --------
+
+    ::
+        @enable_bootstrap
+        def mse(x, y, axis=-1):
+            return np.mean(np.square(y - x), axis=axis)
+
+        x = ...
+        y = ...
+        mse(x, y)  # normal call, return MSE
+        bresult = mse.bootstrap(x, y)  # bootstrapping
+        bresult.quantiles(0.2)
+        bresult.sig_mask(0.05, 0.0)
+    """
+
+    wrapper = Bootstrappable(func)
+    functools.update_wrapper(wrapper, func)
+    return wrapper