Merge pull request #18 from cpelley/ENH_CONSERVATIVE_APPROACH

pelson · web-flow · commit 44135af10426 · 2018-01-26T13:01:49.000Z
ENH: Added conservative interpolation approach
diff --git a/setup.py b/setup.py
@@ -13,6 +13,9 @@
 
 extensions = [Extension('{}._vinterp'.format(NAME),
                         [os.path.join(NAME, '_vinterp.pyx')],
+                        include_dirs=[np.get_include()]),
+              Extension('{}._conservative'.format(NAME),
+                        [os.path.join(NAME, '_conservative.pyx')],
                         include_dirs=[np.get_include()])]
 
 
diff --git a/stratify/__init__.py b/stratify/__init__.py
@@ -1,9 +1,11 @@
 from __future__ import absolute_import
 
-from ._vinterp import (interpolate, interp_schemes, extrap_schemes,
-                       INTERPOLATE_LINEAR, INTERPOLATE_NEAREST,
+from ._vinterp import (interpolate, interp_schemes,  # noqa: F401
+                       extrap_schemes, INTERPOLATE_LINEAR, INTERPOLATE_NEAREST,
                        EXTRAPOLATE_NAN, EXTRAPOLATE_NEAREST,
-                       EXTRAPOLATE_LINEAR, PyFuncExtrapolator, 
+                       EXTRAPOLATE_LINEAR, PyFuncExtrapolator,
                        PyFuncInterpolator)
+from ._bounded_vinterp import interpolate_conservative  # noqa: F401
+
 
 __version__ = '0.1a3.dev0'
diff --git a/stratify/_bounded_vinterp.py b/stratify/_bounded_vinterp.py
@@ -0,0 +1,135 @@
+from __future__ import absolute_import
+
+import numpy as np
+
+from ._conservative import conservative_interpolation
+
+
+def interpolate_conservative(z_target, z_src, fz_src, axis=-1):
+    """
+    1d conservative interpolation across multiple dimensions.
+
+    This function provides the ability to perform 1d interpolation on datasets
+    with more than one dimension. For instance, this function can be used to
+    interpolate a set of vertical levels, even if the interpolation coordinate
+    depends upon other dimensions.
+
+    A good use case might be when wanting to interpolate at a specific height
+    for height data which also depends on x and y - e.g. extract 1000hPa level
+    from a 3d dataset and associated pressure field. In the case of this
+    example, pressure would be the `z` coordinate, and the dataset
+    (e.g. geopotential height / temperature etc.) would be `f(z)`.
+
+    Parameters
+    ----------
+    z_target: :class:`np.ndarray`
+        Target coordinate.
+        This coordinate defines the levels to interpolate the source data
+        ``fz_src`` to.  ``z_target`` must have the same dimensionality as the
+        source coordinate ``z_src``, and the shape of ``z_target`` must match
+        the shape of ``z_src``, although the axis of interpolation may differ
+        in dimension size.
+    z_src: :class:`np.ndarray`
+        Source coordinate.
+        This coordinate defines the levels that the source data ``fz_src`` is
+        interpolated from.
+    fz_src: :class:`np.ndarray`
+        The source data; the phenomenon data values to be interpolated from
+        ``z_src`` to ``z_target``.
+        The data array must be at least ``z_src.ndim``, and its trailing
+        dimensions (i.e. those on its right hand side) must be exactly
+        the same as the shape of ``z_src``.
+    axis: int (default -1)
+        The ``fz_src`` axis to perform the interpolation over.
+
+    Returns
+    -------
+    : :class:`np.ndarray`
+        fz_src interpolated from z_src to z_target.
+
+    Note
+    ----
+    - Support for 1D z_target and corresponding ND z_src will be provided in
+    future as driven by user requirement.
+    - Those cells, where 'nan' values in the source data contribute, a 'nan'
+      value is returned.
+
+    """
+    if z_src.ndim != z_target.ndim:
+        msg = ('Expecting source and target levels dimensionality to be '
+               'identical.  {} != {}.')
+        raise ValueError(msg.format(z_src.ndim, z_target.ndim))
+
+    # Relative axis
+    axis = axis % fz_src.ndim
+    axis_relative = axis - (fz_src.ndim - (z_target.ndim-1))
+
+    src_shape = list(z_src.shape)
+    src_shape.pop(axis_relative)
+    tgt_shape = list(z_target.shape)
+    tgt_shape.pop(axis_relative)
+
+    if src_shape != tgt_shape:
+        src_shape = list(z_src.shape)
+        src_shape[axis_relative] = '-'
+        tgt_shape = list(z_target.shape)
+        src_shape[axis_relative] = '-'
+        msg = ('Expecting the shape of the source and target levels except '
+               'the axis of interpolation to be identical.  {} != {}')
+        raise ValueError(msg.format(tuple(src_shape), tuple(tgt_shape)))
+
+    dat_shape = list(fz_src.shape)
+    dat_shape = dat_shape[-(z_src.ndim-1):]
+    src_shape = list(z_src.shape[:-1])
+    if dat_shape != src_shape:
+        dat_shape = list(fz_src.shape)
+        dat_shape[:-(z_src.ndim-1)] = '-'
+        msg = ('The provided data is not of compatible shape with the '
+               'provided source bounds. {} != {}')
+        raise ValueError(msg.format(tuple(dat_shape), tuple(src_shape)))
+
+    if z_src.shape[-1] != 2:
+        msg = 'Unexpected source and target bounds shape. shape[-1] != 2'
+        raise ValueError(msg)
+
+    # Define our source in a consistent way.
+    # [broadcasting_dims, axis_interpolation, z_varying]
+
+    # src_data
+    bdims = list(range(fz_src.ndim - (z_src.ndim-1)))
+    data_vdims = [ind for ind in range(fz_src.ndim) if ind not in
+                  (bdims + [axis])]
+    data_transpose = bdims + [axis] + data_vdims
+    fz_src_reshaped = np.transpose(fz_src, data_transpose)
+    fz_src_orig = list(fz_src_reshaped.shape)
+    shape = (
+        int(np.product(fz_src_reshaped.shape[:len(bdims)])),
+        fz_src_reshaped.shape[len(bdims)],
+        int(np.product(fz_src_reshaped.shape[len(bdims)+1:])))
+    fz_src_reshaped = fz_src_reshaped.reshape(shape)
+
+    # Define our src and target bounds in a consistent way.
+    # [axis_interpolation, z_varying, 2]
+    vdims = list(set(range(z_src.ndim)) - set([axis_relative]))
+    z_src_reshaped = np.transpose(z_src, [axis_relative] + vdims)
+    z_target_reshaped = np.transpose(z_target, [axis_relative] + vdims)
+
+    shape = int(np.product(z_src_reshaped.shape[1:-1]))
+    z_src_reshaped = z_src_reshaped.reshape([z_src_reshaped.shape[0], shape,
+                                             z_src_reshaped.shape[-1]])
+    shape = int(np.product(z_target_reshaped.shape[1:-1]))
+    z_target_reshaped = z_target_reshaped.reshape(
+        [z_target_reshaped.shape[0], shape, z_target_reshaped.shape[-1]])
+
+    result = conservative_interpolation(
+        z_src_reshaped, z_target_reshaped, fz_src_reshaped)
+
+    # Turn the result into a shape consistent with the source.
+    # First reshape, then reverse transpose.
+    shape = fz_src_orig
+    shape[len(bdims)] = z_target.shape[axis_relative]
+    result = result.reshape(shape)
+    invert_transpose = [data_transpose.index(ind) for ind in
+                        list(range(result.ndim))]
+    result = result.transpose(invert_transpose)
+    return result
diff --git a/stratify/_conservative.pyx b/stratify/_conservative.pyx
@@ -0,0 +1,126 @@
+import numpy as np
+cimport numpy as np
+
+
+cdef calculate_weights(np.ndarray[np.float64_t, ndim=2] src_point,
+                       np.ndarray[np.float64_t, ndim=2] tgt_point):
+    """
+    Calculate weights for a given point.
+
+    The following visually illustrates the calculation::
+
+        src_min    src_max
+           |----------|                 : Source
+              tgt_min     tgt_max
+                 |------------|         : Target
+           |----------|                 : Delta (src_max - src_min)
+                 |----|                 : Overlap (between src & tgt)
+           weight = overlap / delta
+
+    Parameters
+    ----------
+    src_point (2d double array) - Source point (at a specific location).
+    tgt_point (2d double array) - Target point (at a specific location).
+
+    Returns
+    -------
+    2d double array - Weights corresponding to shape [src_point.shape[0],
+        tgt_point.shape[0]].
+
+    """
+    cdef Py_ssize_t src_ind, tgt_ind
+    cdef np.float64_t delta, weight
+    cdef np.ndarray[np.float64_t, ndim=2] weights
+    cdef np.ndarray[np.float64_t, ndim=1] src_cell, tgt_cell
+
+    weights = np.zeros([src_point.shape[0], tgt_point.shape[0]],
+                       dtype=np.float64)
+    for src_ind, src_cell in enumerate(src_point):
+        delta = src_cell.max() - src_cell.min()
+        for tgt_ind, tgt_cell in enumerate(tgt_point):
+            weight = (min(src_cell.max(), tgt_cell.max()) -
+                      max(src_cell.min(), tgt_cell.min())) / float(delta)
+            if weight > 0:
+                weights[src_ind, tgt_ind] = weight
+    return weights
+
+
+cdef apply_weights(np.ndarray[np.float64_t, ndim=3] src_point,
+                   np.ndarray[np.float64_t, ndim=3] tgt_point,
+                   np.ndarray[np.float64_t, ndim=3] src_data):
+    """
+    Perform conservative interpolation.
+
+    Conservation interpolation of a dataset between a provided source
+    coordinate and a target coordinate.  Where no source cells contribute to a
+    target cell, a np.nan value is returned.
+
+    Parameters
+    ----------
+    src_points (3d double array) - Source coordinate, taking the form
+        [axis_interpolation, z_varying, 2].
+    tgt_points (3d double array) - Target coordinate, taking the form
+        [axis_interpolation, z_varying, 2].
+    src_data (3d double array) - The source data, the phenomenon data to be
+        interpolated from ``src_points`` to ``tgt_points``.  Taking the form
+        [broadcasting_dims, axis_interpolation, z_varying].
+
+    Returns
+    -------
+    3d double array - Interpolated result over target levels (``tgt_points``).
+        Taking the form [broadcasting_dims, axis_interpolation, z_varying].
+
+    """
+    cdef Py_ssize_t ind
+    cdef np.ndarray[np.float64_t, ndim=3] results, weighted_contrib
+    cdef np.ndarray[np.float64_t, ndim=2] weights
+    results = np.zeros(
+        [src_data.shape[0], tgt_point.shape[0], src_data.shape[2]],
+        dtype='float64')
+    # Calculate and apply weights
+    for ind in range(src_data.shape[2]):
+        weights = calculate_weights(src_point[:, ind], tgt_point[:, ind])
+        if not (weights.sum(axis=1) == 1).all():
+            msg = ('Weights calculation yields a less than conservative '
+                   'result.  Aborting.')
+            raise ValueError(msg)
+        weighted_contrib = weights * src_data[..., ind][..., None]
+        results[..., ind] = (
+            np.nansum(weighted_contrib, axis=1))
+        # Return nan values for those target cells, where there is any
+        # contribution of nan data from the source data.
+        results[..., ind][
+            ((weights > 0) * np.isnan(weighted_contrib)).any(axis=1)] = np.nan
+
+        # Return np.nan for those target cells where no source contributes.
+        results[:, weights.sum(axis=0) == 0, ind] = np.nan
+    return results
+
+
+def conservative_interpolation(src_points, tgt_points, src_data):
+    """
+    Perform conservative interpolation.
+
+    Conservation interpolation of a dataset between a provided source
+    coordinate and a target coordinate.  All inputs are recast to 64-bit float
+    arrays before calculation.
+
+    Parameters
+    ----------
+    src_points (3d array) - Source coordinate, taking the form
+        [axis_interpolation, z_varying, 2].
+    tgt_points (3d array) - Target coordinate, taking the form
+        [axis_interpolation, z_varying, 2].
+    src_data (3d array) - The source data, the phenonenon data to be
+        interpolated from ``src_points`` to ``tgt_points``.  Taking the form
+        [broadcasting_dims, axis_interpolation, z_varying].
+
+    Returns
+    -------
+    3d double array - Interpolated result over target levels (``tgt_points``).
+        Taking the form [broadcasting_dims, axis_interpolation, z_varying].
+
+    """
+    return apply_weights(src_points.astype('float64'),
+                         tgt_points.astype('float64'),
+                         src_data.astype('float64'))
diff --git a/stratify/tests/test_bounded_vinterp.py b/stratify/tests/test_bounded_vinterp.py