From d568fda88f961ffc938773588f45830d39ae0120 Mon Sep 17 00:00:00 2001
From: Joe Schoonover <joe@fluidnumerics.com>
Date: Thu, 21 Aug 2025 17:00:29 -0400
Subject: [PATCH 1/9] First draft of morton-spatial-hashing

---
 parcels/spatialhash.py | 427 ++++++++++++++++++++++++-----------------
 1 file changed, 246 insertions(+), 181 deletions(-)

diff --git a/parcels/spatialhash.py b/parcels/spatialhash.py
index 57942610e..760ed8a5d 100644
--- a/parcels/spatialhash.py
+++ b/parcels/spatialhash.py
@@ -33,122 +33,115 @@ def __init__(
         # Hash grid size
         self._dh = self._hash_cell_size()
 
-        # Lower left corner of the hash grid
-        lon_min = self._source_grid.lon.min()
-        lat_min = self._source_grid.lat.min()
-        lon_max = self._source_grid.lon.max()
-        lat_max = self._source_grid.lat.max()
-
-        # Get corner vertices of each face
-        self._xbound = np.stack(
-            (
-                self._source_grid.lon[:-1, :-1],
-                self._source_grid.lon[:-1, 1:],
-                self._source_grid.lon[1:, 1:],
-                self._source_grid.lon[1:, :-1],
-            ),
-            axis=-1,
-        )
-        self._ybound = np.stack(
-            (
-                self._source_grid.lat[:-1, :-1],
-                self._source_grid.lat[:-1, 1:],
-                self._source_grid.lat[1:, 1:],
-                self._source_grid.lat[1:, :-1],
-            ),
-            axis=-1,
-        )
-
-        self._xmin = lon_min - self._dh
-        self._ymin = lat_min - self._dh
-        self._xmax = lon_max + self._dh
-        self._ymax = lat_max + self._dh
-
-        # Number of x points in the hash grid; used for
-        # array flattening
-        Lx = self._xmax - self._xmin
-        Ly = self._ymax - self._ymin
-        self._nx = int(np.ceil(Lx / self._dh))
-        self._ny = int(np.ceil(Ly / self._dh))
+        # # Lower left corner of the hash grid
+        # lon_min = self._source_grid.lon.min()
+        # lat_min = self._source_grid.lat.min()
+        # lon_max = self._source_grid.lon.max()
+        # lat_max = self._source_grid.lat.max()
 
-        # Generate the mapping from the hash indices to unstructured grid elements
-        self._face_hash_table = None
-        self._face_hash_table = self._initialize_face_hash_table()
-
-    def _hash_cell_size(self):
-        """Computes the size of the hash cells from the source grid.
-        The hash cell size is set to 1/2 of the square root of the median cell area
-        """
-        return np.sqrt(np.median(_planar_quad_area(self._source_grid.lat, self._source_grid.lon))) * 0.5
-
-    def _hash_index2d(self, coords):
-        """Computes the 2-d hash index (i,j) for the location (x,y), where x and y is the same units
-        as the source grid coordinates
-        """
-        # Wrap longitude to [-180, 180]
         if self._source_grid.mesh == "spherical":
-            lon = (coords[:, 1] + 180.0) % (360.0) - 180.0
-        else:
-            lon = coords[:, 1]
-        i = ((lon - self._xmin) / self._dh).astype(np.int32)
-        j = ((coords[:, 0] - self._ymin) / self._dh).astype(np.int32)
-        return j, i
-
-    def _hash_index(self, coords):
-        """Computes the flattened hash index for the location (x,y), where x and y are given in spherical
-        coordinates (in degrees). The single dimensioned hash index orders the flat index with all of the
-        i-points first and then all the j-points.
-        """
-        j, i = self._hash_index2d(coords)
-        return i + self._nx * j
-
-    def _grid_ji_for_eid(self, eid):
-        """Returns the (i,j) grid coordinates for the given element id (eid)"""
-        j = eid // (self._source_grid.xdim)
-        i = eid - j * (self._source_grid.xdim)
-        return j, i
-
-    def _initialize_face_hash_table(self):
-        """Create a mapping that relates unstructured grid faces to hash indices by determining
-        which faces overlap with which hash cells
-        """
-        if self._face_hash_table is None or self.reconstruct:
-            index_to_face = [[] for i in range(self._nx * self._ny)]
-            # Get the bounds of each curvilinear faces
-            lat_bounds, lon_bounds = _curvilinear_grid_facebounds(
-                self._source_grid.lat,
-                self._source_grid.lon,
+            # Boundaries of the hash grid are the unit cube
+            self._xmin = -1.0
+            self._ymin = -1.0
+            self._zmin = -1.0
+            self._xmax = 1.0
+            self._ymax = 1.0
+            self._zmax = 1.0  # Compute the cell centers of the source grid (for now, assuming Xgrid)
+            lon = np.deg2rad(self._source_grid.lon)
+            lat = np.deg2rad(self._source_grid.lat)
+            x, y, z = _latlon_rad_to_xyz(lat, lon)
+            _xbound = np.stack(
+                (
+                    x[:-1, :-1],
+                    x[:-1, 1:],
+                    x[1:, 1:],
+                    x[1:, :-1],
+                ),
+                axis=-1,
             )
-            coords = np.stack(
+            _ybound = np.stack(
                 (
-                    lat_bounds[:, :, 0].flatten(),
-                    lon_bounds[:, :, 0].flatten(),
+                    y[:-1, :-1],
+                    y[:-1, 1:],
+                    y[1:, 1:],
+                    y[1:, :-1],
                 ),
                 axis=-1,
             )
-            yi1, xi1 = self._hash_index2d(coords)
-            coords = np.stack(
+            _zbound = np.stack(
                 (
-                    lat_bounds[:, :, 1].flatten(),
-                    lon_bounds[:, :, 1].flatten(),
+                    z[:-1, :-1],
+                    z[:-1, 1:],
+                    z[1:, 1:],
+                    z[1:, :-1],
                 ),
                 axis=-1,
             )
-            yi2, xi2 = self._hash_index2d(coords)
-            nface = (self._source_grid.xdim) * (self._source_grid.ydim)
-            for eid in range(nface):
-                for j in range(yi1[eid], yi2[eid] + 1):
-                    if xi1[eid] <= xi2[eid]:
-                        # Normal case, no wrap
-                        for i in range(xi1[eid], xi2[eid] + 1):
-                            index_to_face[(i % self._nx) + self._nx * j].append(eid)
-                    else:
-                        # Wrap-around case
-                        for i in range(xi1[eid], self._nx):
-                            index_to_face[(i % self._nx) + self._nx * j].append(eid)
-                        for i in range(0, xi2[eid] + 1):
-                            index_to_face[(i % self._nx) + self._nx * j].append(eid)
-            return index_to_face
+            # Compute centroid locations of each cells
+            self._xc = np.mean(_xbound, axis=-1)
+            self._yc = np.mean(_ybound, axis=-1)
+            self._zc = np.mean(_zbound, axis=-1)
+
+        else:
+            # Boundaries of the hash grid are the bounding box of the source grid
+            self._xmin = self._source_grid.lon.min()
+            self._ymin = self._source_grid.lat.min()
+            self._zmin = 0.0
+            x = self._source_grid.lon
+            y = self._source_grid.lat
+
+            _xbound = np.stack(
+                (
+                    x[:-1, :-1],
+                    x[:-1, 1:],
+                    x[1:, 1:],
+                    x[1:, :-1],
+                ),
+                axis=-1,
+            )
+            _ybound = np.stack(
+                (
+                    y[:-1, :-1],
+                    y[:-1, 1:],
+                    y[1:, 1:],
+                    y[1:, :-1],
+                ),
+                axis=-1,
+            )
+            # Compute centroid locations of each cells
+            self._xc = np.mean(_xbound, axis=-1)
+            self._yc = np.mean(_ybound, axis=-1)
+            self._zc = np.zeros_like(self._xc)
+
+        # Generate the mapping from the hash indices to unstructured grid elements
+        self._hash_table = None
+        self._hash_table = self._initialize_hash_table()
+
+    def _initialize_hash_table(self):
+        """Create a mapping that relates unstructured grid faces to hash indices by determining
+        which faces overlap with which hash cells
+        """
+        if self._hash_table is None or self.reconstruct:
+            j, i = np.indices(self._xc.shape)  # Get the indices of the curvilinear grid
+
+            morton_codes = _encode_morton3d(self._xc, self._yc, self._zc)
+            ## Prepare quick lookup (hash) table for relating i,j indices to morton codes
+            # Sort i,j indices by morton code
+            order = np.argsort(morton_codes.ravel())
+            morton_codes_sorted = morton_codes.ravel()[order]
+            i_sorted = i.ravel()[order]
+            j_sorted = j.ravel()[order]
+
+            # Get a list of unique morton codes and their corresponding starts and counts (CSR format)
+            keys, starts, counts = np.unique(morton_codes_sorted, return_index=True, return_counts=True)
+            hash_table = {
+                "keys": keys,
+                "starts": starts,
+                "counts": counts,
+                "i": i_sorted,
+                "j": j_sorted,
+            }
+            return hash_table
 
     def query(
         self,
@@ -169,32 +162,66 @@ def query(
             Face id's in the self._source_grid where each coords element is found. When a coords element is not found, the
             corresponding array entry in faces is set to -1.
         """
-        num_coords = coords.shape[0]
+        keys = self._hash_table["keys"], starts = self._hash_table["starts"], counts = self._hash_table["counts"]
+        i = self._hash_table["i"], j = self._hash_table["j"]
+
+        if self._source_grid.mesh == "spherical":
+            # Convert coords to Cartesian coordinates (x, y, z)
+            lat = np.deg2rad(coords[:, 0])
+            lon = np.deg2rad(coords[:, 1])
+            x, y, z = _latlon_rad_to_xyz(lat, lon)
+        else:
+            # For Cartesian grids, use the coordinates directly
+            x = coords[:, 0]
+            y = coords[:, 1]
+            z = np.zeros_like(x)
+
+        query_codes = _encode_morton3d(x, y, z)
+
+        # Locate each query in the unique key array
+        pos = np.searchsorted(keys, query_codes)
 
-        # Preallocate results
-        faces = np.full((num_coords, 2), -1, dtype=np.int32)
+        # Valid hits: inside range and exact match
+        valid = (pos < len(keys)) & (keys[pos] == query_codes)
 
-        # Get the list of candidate faces for each coordinate
-        candidate_faces = [self._face_hash_table[pid] for pid in self._hash_index(coords)]
+        # How many matches each query has
+        hit_counts = np.where(valid, counts[pos], 0).astype(np.int64)
 
-        for i, (coord, candidates) in enumerate(zip(coords, candidate_faces, strict=False)):
-            for face_id in candidates:
-                yi, xi = self._grid_ji_for_eid(face_id)
-                nodes = np.stack(
-                    (
-                        self._ybound[yi, xi, :],
-                        self._xbound[yi, xi, :],
-                    ),
-                    axis=-1,
-                )
+        # CSR-style offsets (prefix sum), total number of hits
+        offsets = np.empty(hit_counts.size + 1, dtype=np.int64)
+        offsets[0] = 0
+        np.cumsum(hit_counts, out=offsets[1:])
+        total = int(offsets[-1])
 
-                bcoord = np.asarray(_barycentric_coordinates(nodes, coord))
-                err = abs(np.dot(bcoord, nodes[:, 0]) - coord[0]) + abs(np.dot(bcoord, nodes[:, 1]) - coord[1])
-                if (bcoord >= 0).all() and err < tol:
-                    faces[i, :] = [yi, xi]
-                    break
+        # Preallocate output (total,2); early exit if no hits at all
+        matches = np.empty((total, 2), dtype=i.dtype)
+        # if total == 0: # TODO : Do we want to error out here ?
+        #     return matches, offsets
 
-        return faces
+        # For each *element* in the flattened output, figure out which query it belongs to.
+        # Repeat each query index i by its hit_count to build a per-element mapping.
+        q_index_for_elem = np.repeat(np.arange(query_codes.size, dtype=np.int64), hit_counts)
+
+        # For each element, compute its "intra-group" offset (0..hits_i-1).
+        # This is a vectorized trick: a global ramp minus the repeated group starts.
+        intra = np.arange(total, dtype=np.int64) - np.repeat(offsets[:-1], hit_counts)
+
+        # Map each element to a source index in I/J:
+        #   source_start = starts[pos[i]] for the query i
+        #   source_idx   = source_start + intra
+        source_idx = starts[pos[q_index_for_elem]].astype(np.int64) + intra
+
+        # Gather all (I,J) pairs in one shot
+        matches[:, 0] = j[source_idx]
+        matches[:, 1] = i[source_idx]
+
+        # Split into per-query blocks (views, no copies)
+        blocks = np.split(matches, offsets[1:-1])
+        # Wrap into an object array with the same leading shape as query_codes
+        faces = np.empty(np.prod(query_codes.shape, dtype=int), dtype=object)
+        faces[:] = blocks  # no append, just vectorized assignment
+
+        return faces.reshape(query_codes.shape)
 
 
 def _triangle_area(A, B, C):
@@ -242,69 +269,107 @@ def _barycentric_coordinates(nodes, point, min_area=1e-8):
     return barycentric_coords
 
 
-def _planar_quad_area(lat, lon):
-    """Computes the area of each quadrilateral face in a curvilinear grid.
-    The lon and lat arrays are assumed to be 2D arrays of points with dimensions (n_y, n_x).
-    The area is computed using the Shoelace formula.
-    This method is only used during hashgrid construction to determine the size of the hash cells.
+def _latlon_rad_to_xyz(lat, lon):
+    """Converts Spherical latitude and longitude coordinates into Cartesian x,
+    y, z coordinates.
+    """
+    x = np.cos(lon) * np.cos(lat)
+    y = np.sin(lon) * np.cos(lat)
+    z = np.sin(lat)
 
-    Parameters
-    ----------
-    lon : np.ndarray
-        2D array of shape (n_y, n_x) containing the longitude of each corner node of the curvilinear grid.
-    lat : np.ndarray
-        2D array of shape (n_y, n_x) containing the latitude of each corner node of the curvilinear grid.
+    return x, y, z
 
-    Returns
-    -------
-    area : np.ndarray
-        2D array of shape (n_y-1, n_x-1) containing the area of each quadrilateral face in the curvilinear grid.
+
+def _dilate_bits(n):
     """
-    x0 = lon[:-1, :-1]
-    x1 = lon[:-1, 1:]
-    x2 = lon[1:, 1:]
-    x3 = lon[1:, :-1]
+    Takes a 10-bit integer n, in range [0,1023], and "dilates" its bits so that
+    there are two zeros between each bit of n in the result.
 
-    y0 = lat[:-1, :-1]
-    y1 = lat[:-1, 1:]
-    y2 = lat[1:, 1:]
-    y3 = lat[1:, :-1]
+    This is a preparation step for building a 3D Morton code:
+    - One axis (x, y, or z) is dilated like this.
+    - Then the three dilated coordinates are bitwise interleaved
+      to produce the full 30-bit Morton code.
 
-    # Shoelace formula: 0.5 * |sum(x_i*y_{i+1} - x_{i+1}*y_i)|
-    area = 0.5 * np.abs(x0 * y1 + x1 * y2 + x2 * y3 + x3 * y0 - y0 * x1 - y1 * x2 - y2 * x3 - y3 * x0)
-    return area
+    Example:
+        Input n:  b9 b8 b7 b6 b5 b4 b3 b2 b1 b0
+        Output:   b9 0 0 b8 0 0 b7 0 0 ... b0 0 0
+    """
+    n = np.asarray(n, dtype=np.uint32)
 
+    # Step 1: Keep only the lowest 10 bits of n
+    # Mask = 0x3FF = binary 11 1111 1111
+    n &= np.uint32(0x000003FF)
 
-def _curvilinear_grid_facebounds(lat, lon):
-    """Computes the bounds of each curvilinear face in the grid.
-    The lon and lat arrays are assumed to be 2D arrays of points with dimensions (n_y, n_x).
-    The bounds are for faces whose corner node vertices are defined by lat,lon.
-    Face(yi,xi) is surrounding by points (yi,xi), (yi,xi+1), (yi+1,xi+1), (yi+1,xi).
-    This method is only used during hashgrid construction to determine which curvilinear
-    faces overlap with which hash cells.
+    # Step 2: First spreading stage
+    # Shift left by 16 and OR with original.
+    # This spreads the bits apart, but introduces overlaps.
+    # Mask 0xff0000ff clears out the unwanted overlaps.
+    n = (n | (n << np.uint32(16))) & np.uint32(0xFF0000FF)
 
-    Parameters
-    ----------
-    lon : np.ndarray
-        2D array of shape (n_y, n_x) containing the longitude of each corner node of the curvilinear grid.
-    lat : np.ndarray
-        2D array of shape (n_y, n_x) containing the latitude of each corner node of the curvilinear grid.
+    # Step 3: Second spreading stage
+    # Similar idea: shift left by 8, OR, then mask.
+    # Now the bits are further separated.
+    n = (n | (n << np.uint32(8))) & np.uint32(0x0300F00F)
 
-    Returns
-    -------
-    xbounds : np.ndarray
-        Array of shape (n_y-1, n_x-1, 2) containing the bounds of each face in the x-direction.
-    ybounds : np.ndarray
-        Array of shape (n_y-1, n_x-1, 2) containing the bounds of each face in the y-direction.
-    """
-    xf = np.stack((lon[:-1, :-1], lon[:-1, 1:], lon[1:, 1:], lon[1:, :-1]), axis=-1)
-    xf_low = xf.min(axis=-1)
-    xf_high = xf.max(axis=-1)
-    xbounds = np.stack([xf_low, xf_high], axis=-1)
+    # Step 4: Third spreading stage
+    # Shift by 4, OR, mask again.
+    # At this point, there are 1 or 2 zeros between many of the bits.
+    n = (n | (n << np.uint32(4))) & np.uint32(0x030C30C3)
+
+    # Step 5: Final spreading stage
+    # Shift by 2, OR, mask.
+    # After this, each original bit is isolated with exactly two zeros
+    # between it and the next bit, ready for 3D Morton interleaving.
+    n = (n | (n << np.uint32(2))) & np.uint32(0x09249249)
 
-    yf = np.stack((lat[:-1, :-1], lat[:-1, 1:], lat[1:, 1:], lat[1:, :-1]), axis=-1)
-    yf_low = yf.min(axis=-1)
-    yf_high = yf.max(axis=-1)
-    ybounds = np.stack([yf_low, yf_high], axis=-1)
+    # Return the dilated value.
+    return n
 
-    return ybounds, xbounds
+
+def _encode_morton3d(x, y, z, xmin, xmax, ymin, ymax, zmin, zmax):
+    """
+    Quantize (x, y, z) to 10 bits each (0..1023), dilate the bits so there are
+    two zeros between successive bits, and interleave them into a 3D Morton code.
+
+    Notes
+    -----
+    - Works with scalars or NumPy arrays (broadcasting applies).
+    - Output is up to 30 bits; we return np.uint32 (or np.uint64 if you prefer).
+    - Requires `part1by2` defined as in your previous snippet.
+    """
+    # Convert inputs to ndarray for consistent dtype/ufunc behavior.
+    x = np.asarray(x)
+    y = np.asarray(y)
+    z = np.asarray(z)
+
+    # --- 1) Normalize each coordinate to [0, 1] over its bounding box. ---
+    # Compute denominators once (avoid division by zero if bounds equal).
+    dx = xmax - xmin
+    dy = ymax - ymin
+    dz = zmax - zmin
+
+    # Normalize to [0,1]; if a range is degenerate, map to 0 to avoid NaN/inf.
+    xn = np.where(dx != 0, (x - xmin) / dx, 0.0)
+    yn = np.where(dy != 0, (y - ymin) / dy, 0.0)
+    zn = np.where(dz != 0, (z - zmin) / dz, 0.0)
+
+    # --- 2) Quantize to 10 bits (0..1023). ---
+    # Multiply by 1023, round down, and clip to be safe against slight overshoot.
+    xq = np.clip((xn * 1023.0).astype(np.uint32), 0, 1023)
+    yq = np.clip((yn * 1023.0).astype(np.uint32), 0, 1023)
+    zq = np.clip((zn * 1023.0).astype(np.uint32), 0, 1023)
+
+    # --- 3) Bit-dilate each 10-bit number so each bit is separated by two zeros. ---
+    # _dilate_bits maps:  b9..b0  ->  b9 0 0 b8 0 0 ... b0 0 0
+    dx3 = _dilate_bits(xq).astype(np.uint64)
+    dy3 = _dilate_bits(yq).astype(np.uint64)
+    dz3 = _dilate_bits(zq).astype(np.uint64)
+
+    # --- 4) Interleave the dilated bits into a single Morton code. ---
+    # Bit layout (from LSB upward): x0,y0,z0, x1,y1,z1, ..., x9,y9,z9
+    # We shift z's bits by 2, y's by 1, x stays at 0, then OR them together.
+    # Cast to a wide type before shifting/OR to be safe when arrays are used.
+    code = (dz3 << 2) | (dy3 << 1) | dx3
+
+    # If you want a compact type, it fits in 30 bits; uint32 is enough.
+    return code.astype(np.uint32)

From e0d3c36519208c3b79651d6dfa07e7cd6902bbc9 Mon Sep 17 00:00:00 2001
From: Joe Schoonover <joe@fluidnumerics.com>
Date: Fri, 22 Aug 2025 17:17:18 -0400
Subject: [PATCH 2/9] Add search for minimum centroid distance for j,i search

---
 parcels/spatialhash.py | 133 +++++++++++++++++++++++++----------------
 1 file changed, 82 insertions(+), 51 deletions(-)

diff --git a/parcels/spatialhash.py b/parcels/spatialhash.py
index 760ed8a5d..0567ffabf 100644
--- a/parcels/spatialhash.py
+++ b/parcels/spatialhash.py
@@ -30,15 +30,6 @@ def __init__(
         self._source_grid = grid
         self.reconstruct = reconstruct
 
-        # Hash grid size
-        self._dh = self._hash_cell_size()
-
-        # # Lower left corner of the hash grid
-        # lon_min = self._source_grid.lon.min()
-        # lat_min = self._source_grid.lat.min()
-        # lon_max = self._source_grid.lon.max()
-        # lat_max = self._source_grid.lat.max()
-
         if self._source_grid.mesh == "spherical":
             # Boundaries of the hash grid are the unit cube
             self._xmin = -1.0
@@ -85,8 +76,11 @@ def __init__(
         else:
             # Boundaries of the hash grid are the bounding box of the source grid
             self._xmin = self._source_grid.lon.min()
+            self._xmax = self._source_grid.lon.max()
             self._ymin = self._source_grid.lat.min()
+            self._ymax = self._source_grid.lat.max()
             self._zmin = 0.0
+            self._zmax = 0.0
             x = self._source_grid.lon
             y = self._source_grid.lat
 
@@ -124,7 +118,9 @@ def _initialize_hash_table(self):
         if self._hash_table is None or self.reconstruct:
             j, i = np.indices(self._xc.shape)  # Get the indices of the curvilinear grid
 
-            morton_codes = _encode_morton3d(self._xc, self._yc, self._zc)
+            morton_codes = _encode_morton3d(
+                self._xc, self._yc, self._zc, self._xmin, self._xmax, self._ymin, self._ymax, self._zmin, self._zmax
+            )
             ## Prepare quick lookup (hash) table for relating i,j indices to morton codes
             # Sort i,j indices by morton code
             order = np.argsort(morton_codes.ravel())
@@ -146,46 +142,55 @@ def _initialize_hash_table(self):
     def query(
         self,
         coords,
-        tol=1e-6,
     ):
-        """Queries the hash table.
+        """
+        Queries the hash table and finds the closes face in the source grid for each coordinate pair.
 
         Parameters
         ----------
         coords : array_like
-            coordinate pairs in degrees (lat, lon) to query.
-
+            coordinate pairs in degrees (lat, lon) to query of shape (N,2) where N is the number of queries.
 
         Returns
         -------
-        faces : ndarray of shape (coords.shape[0]), dtype=np.int32
-            Face id's in the self._source_grid where each coords element is found. When a coords element is not found, the
-            corresponding array entry in faces is set to -1.
+        faces : ndarray of shape (N,2), dtype=np.int32
+            For each coordinate pair, returns the (j,i) indices of the closest face in the hash grid.
+            If no face is found, returns (-1,-1) for that query.
         """
-        keys = self._hash_table["keys"], starts = self._hash_table["starts"], counts = self._hash_table["counts"]
-        i = self._hash_table["i"], j = self._hash_table["j"]
+        keys = self._hash_table["keys"]
+        starts = self._hash_table["starts"]
+        counts = self._hash_table["counts"]
+        i = self._hash_table["i"]
+        j = self._hash_table["j"]
+
+        xc = self._xc
+        yc = self._yc
+        zc = self._zc
 
         if self._source_grid.mesh == "spherical":
             # Convert coords to Cartesian coordinates (x, y, z)
             lat = np.deg2rad(coords[:, 0])
             lon = np.deg2rad(coords[:, 1])
-            x, y, z = _latlon_rad_to_xyz(lat, lon)
+            qx, qy, qz = _latlon_rad_to_xyz(lat, lon)
         else:
             # For Cartesian grids, use the coordinates directly
-            x = coords[:, 0]
-            y = coords[:, 1]
-            z = np.zeros_like(x)
+            qx = coords[:, 0]
+            qy = coords[:, 1]
+            qz = np.zeros_like(qx)
 
-        query_codes = _encode_morton3d(x, y, z)
+        query_codes = _encode_morton3d(
+            qx, qy, qz, self._xmin, self._xmax, self._ymin, self._ymax, self._zmin, self._zmax
+        ).ravel()
+        num_queries = query_codes.size
 
         # Locate each query in the unique key array
-        pos = np.searchsorted(keys, query_codes)
+        pos = np.searchsorted(keys, query_codes)  # pos is shape (N,)
 
         # Valid hits: inside range and exact match
-        valid = (pos < len(keys)) & (keys[pos] == query_codes)
+        valid = (pos < len(keys)) & (keys[pos] == query_codes)  # has shape (N,)
 
-        # How many matches each query has
-        hit_counts = np.where(valid, counts[pos], 0).astype(np.int64)
+        # How many matches each query has; hit_counts[i] is the number of hits for query i
+        hit_counts = np.where(valid, counts[pos], 0).astype(np.int64)  # has shape (N,)
 
         # CSR-style offsets (prefix sum), total number of hits
         offsets = np.empty(hit_counts.size + 1, dtype=np.int64)
@@ -193,35 +198,61 @@ def query(
         np.cumsum(hit_counts, out=offsets[1:])
         total = int(offsets[-1])
 
-        # Preallocate output (total,2); early exit if no hits at all
-        matches = np.empty((total, 2), dtype=i.dtype)
-        # if total == 0: # TODO : Do we want to error out here ?
-        #     return matches, offsets
-
-        # For each *element* in the flattened output, figure out which query it belongs to.
-        # Repeat each query index i by its hit_count to build a per-element mapping.
-        q_index_for_elem = np.repeat(np.arange(query_codes.size, dtype=np.int64), hit_counts)
+        # Now, we need to create some quick lookup arrays that give us the list of positions in the hash table
+        # that correspond to each query.
+        # Create a quick lookup array that maps each element of all the valid queries (with repeats) to its query index
+        q_index_for_elem = np.repeat(np.arange(num_queries, dtype=np.int64), hit_counts)  # This has shape (total,)
 
         # For each element, compute its "intra-group" offset (0..hits_i-1).
-        # This is a vectorized trick: a global ramp minus the repeated group starts.
         intra = np.arange(total, dtype=np.int64) - np.repeat(offsets[:-1], hit_counts)
 
-        # Map each element to a source index in I/J:
-        #   source_start = starts[pos[i]] for the query i
-        #   source_idx   = source_start + intra
+        # starts[pos[q_index_for_elem]] + intra gives a list of positions in the hash table that we can
+        # use to quickly gather the (i,j) pairs for each query
         source_idx = starts[pos[q_index_for_elem]].astype(np.int64) + intra
 
-        # Gather all (I,J) pairs in one shot
-        matches[:, 0] = j[source_idx]
-        matches[:, 1] = i[source_idx]
-
-        # Split into per-query blocks (views, no copies)
-        blocks = np.split(matches, offsets[1:-1])
-        # Wrap into an object array with the same leading shape as query_codes
-        faces = np.empty(np.prod(query_codes.shape, dtype=int), dtype=object)
-        faces[:] = blocks  # no append, just vectorized assignment
-
-        return faces.reshape(query_codes.shape)
+        # Gather all (j,i) pairs in one shot
+        j_all = j[source_idx]
+        i_all = i[source_idx]
+
+        # Gather centroid coordinates at those (j,i)
+        xc_all = xc[j_all, i_all]
+        yc_all = yc[j_all, i_all]
+        zc_all = zc[j_all, i_all]
+
+        # Broadcast to flat (same as q_flat), then repeat per candidate
+        # This makes it easy to compute distances from the query points
+        # to the candidate points for minimization.
+        qx_all = np.repeat(qx.ravel(), hit_counts)
+        qy_all = np.repeat(qy.ravel(), hit_counts)
+        qz_all = np.repeat(qz.ravel(), hit_counts)
+
+        # Squared distances for all candidates
+        dist_all = (xc_all - qx_all) ** 2 + (yc_all - qy_all) ** 2 + (zc_all - qz_all) ** 2
+
+        # Segment-wise minima per query using reduceat
+        # For each query, we need to find the minimum distance.
+        dmin_per_q = np.minimum.reduceat(dist_all, offsets[:-1])
+
+        # To get argmin indices per query (without loops):
+        # Build a masked "within-index" array that is large unless it equals the segment-min.
+        big = np.iinfo(np.int64).max
+        within_masked = np.where(dist_all == np.repeat(dmin_per_q, hit_counts), intra, big)
+        argmin_within = np.minimum.reduceat(within_masked, offsets[:-1])  # first occurrence in ties
+
+        # Build absolute source index for the winning candidate in each query
+        start_for_q = np.where(valid, starts[pos], 0)  # 0 is dummy for invalid queries
+        src_best = (start_for_q + argmin_within).astype(np.int64)
+
+        # Write outputs only for queries that had candidates
+        # Pre-allocate i and j indices of the best match for each query
+        # Default values to -1 (no match case)
+        j_best = np.full(num_queries, -1, dtype=np.int64)
+        i_best = np.full(num_queries, -1, dtype=np.int64)
+        has_hits = hit_counts > 0
+        j_best[has_hits] = i_all[src_best[has_hits]]
+        i_best[has_hits] = j_all[src_best[has_hits]]
+
+        return (j_best.reshape(query_codes.shape), i_best.reshape(query_codes.shape))
 
 
 def _triangle_area(A, B, C):

From 59df08ad6a8c98eb3c03781655804a5e5fe554e1 Mon Sep 17 00:00:00 2001
From: Joe Schoonover <joe@fluidnumerics.com>
Date: Fri, 22 Aug 2025 21:46:25 -0400
Subject: [PATCH 3/9] Relax exact match on morton code

---
 parcels/spatialhash.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/parcels/spatialhash.py b/parcels/spatialhash.py
index 0567ffabf..5b4734198 100644
--- a/parcels/spatialhash.py
+++ b/parcels/spatialhash.py
@@ -141,15 +141,18 @@ def _initialize_hash_table(self):
 
     def query(
         self,
-        coords,
+        y,
+        x,
     ):
         """
         Queries the hash table and finds the closes face in the source grid for each coordinate pair.
 
         Parameters
         ----------
-        coords : array_like
-            coordinate pairs in degrees (lat, lon) to query of shape (N,2) where N is the number of queries.
+        y : array_like
+            y-coordinates in degrees (lat) to query of shape (N,) where N is the number of queries.
+        x : array_like
+            x-coordinates in degrees (lon) to query of shape (N,) where N is the number of queries.
 
         Returns
         -------
@@ -167,15 +170,17 @@ def query(
         yc = self._yc
         zc = self._zc
 
+        y = np.asarray(y)
+        x = np.asarray(x)
         if self._source_grid.mesh == "spherical":
             # Convert coords to Cartesian coordinates (x, y, z)
-            lat = np.deg2rad(coords[:, 0])
-            lon = np.deg2rad(coords[:, 1])
+            lat = np.deg2rad(y)
+            lon = np.deg2rad(x)
             qx, qy, qz = _latlon_rad_to_xyz(lat, lon)
         else:
             # For Cartesian grids, use the coordinates directly
-            qx = coords[:, 0]
-            qy = coords[:, 1]
+            qx = x
+            qy = y
             qz = np.zeros_like(qx)
 
         query_codes = _encode_morton3d(
@@ -186,8 +191,8 @@ def query(
         # Locate each query in the unique key array
         pos = np.searchsorted(keys, query_codes)  # pos is shape (N,)
 
-        # Valid hits: inside range and exact match
-        valid = (pos < len(keys)) & (keys[pos] == query_codes)  # has shape (N,)
+        # Valid hits: inside range
+        valid = pos < len(keys)
 
         # How many matches each query has; hit_counts[i] is the number of hits for query i
         hit_counts = np.where(valid, counts[pos], 0).astype(np.int64)  # has shape (N,)

From 6838ae9f68b672e11af5c6b7468c96de720503fb Mon Sep 17 00:00:00 2001
From: Joe Schoonover <joe@fluidnumerics.com>
Date: Sat, 23 Aug 2025 14:28:45 -0400
Subject: [PATCH 4/9] Fix bug in vectorized spatialhash search

---
 parcels/_index_search.py | 3 +--
 parcels/spatialhash.py   | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/parcels/_index_search.py b/parcels/_index_search.py
index f95c215b0..0a00f3c07 100644
--- a/parcels/_index_search.py
+++ b/parcels/_index_search.py
@@ -254,8 +254,7 @@ def _search_indices_curvilinear_2d(
 ):  # TODO fix typing instructions to make clear that y, x etc need to be ndarrays
     yi, xi = yi_guess, xi_guess
     if yi is None or xi is None:
-        faces = grid.get_spatial_hash().query(np.column_stack((y, x)))
-        yi, xi = faces[0]
+        yi, xi = grid.get_spatial_hash().query(y, x)
 
     xsi = eta = -1.0 * np.ones(len(x), dtype=float)
     invA = np.array(
diff --git a/parcels/spatialhash.py b/parcels/spatialhash.py
index 5b4734198..1d74862de 100644
--- a/parcels/spatialhash.py
+++ b/parcels/spatialhash.py
@@ -254,8 +254,8 @@ def query(
         j_best = np.full(num_queries, -1, dtype=np.int64)
         i_best = np.full(num_queries, -1, dtype=np.int64)
         has_hits = hit_counts > 0
-        j_best[has_hits] = i_all[src_best[has_hits]]
-        i_best[has_hits] = j_all[src_best[has_hits]]
+        j_best[has_hits] = j[src_best[has_hits]]
+        i_best[has_hits] = i[src_best[has_hits]]
 
         return (j_best.reshape(query_codes.shape), i_best.reshape(query_codes.shape))
 

From f53003a38ad0acda5aadea5fd8622d992a7b5d2f Mon Sep 17 00:00:00 2001
From: Joe Schoonover <joe@fluidnumerics.com>
Date: Sat, 23 Aug 2025 17:18:24 -0400
Subject: [PATCH 5/9] Fix reference to _source_grid._mesh attribute

---
 parcels/spatialhash.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/parcels/spatialhash.py b/parcels/spatialhash.py
index 1d74862de..40b39cfda 100644
--- a/parcels/spatialhash.py
+++ b/parcels/spatialhash.py
@@ -30,7 +30,7 @@ def __init__(
         self._source_grid = grid
         self.reconstruct = reconstruct
 
-        if self._source_grid.mesh == "spherical":
+        if self._source_grid._mesh == "spherical":
             # Boundaries of the hash grid are the unit cube
             self._xmin = -1.0
             self._ymin = -1.0
@@ -172,7 +172,7 @@ def query(
 
         y = np.asarray(y)
         x = np.asarray(x)
-        if self._source_grid.mesh == "spherical":
+        if self._source_grid._mesh == "spherical":
             # Convert coords to Cartesian coordinates (x, y, z)
             lat = np.deg2rad(y)
             lon = np.deg2rad(x)

From e604f6f701139c5ddf15c572541f3966765a8d3d Mon Sep 17 00:00:00 2001
From: Joe Schoonover <11430768+fluidnumerics-joe@users.noreply.github.com>
Date: Mon, 25 Aug 2025 07:14:28 -0400
Subject: [PATCH 6/9] Update parcels/spatialhash.py

Co-authored-by: Erik van Sebille <e.vansebille@uu.nl>
---
 parcels/spatialhash.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/parcels/spatialhash.py b/parcels/spatialhash.py
index 40b39cfda..80ce3f93f 100644
--- a/parcels/spatialhash.py
+++ b/parcels/spatialhash.py
@@ -385,9 +385,10 @@ def _encode_morton3d(x, y, z, xmin, xmax, ymin, ymax, zmin, zmax):
     dz = zmax - zmin
 
     # Normalize to [0,1]; if a range is degenerate, map to 0 to avoid NaN/inf.
-    xn = np.where(dx != 0, (x - xmin) / dx, 0.0)
-    yn = np.where(dy != 0, (y - ymin) / dy, 0.0)
-    zn = np.where(dz != 0, (z - zmin) / dz, 0.0)
+    with np.errstate(invalid="ignore"):
+        xn = np.where(dx != 0, (x - xmin) / dx, 0.0)
+        yn = np.where(dy != 0, (y - ymin) / dy, 0.0)
+        zn = np.where(dz != 0, (z - zmin) / dz, 0.0)
 
     # --- 2) Quantize to 10 bits (0..1023). ---
     # Multiply by 1023, round down, and clip to be safe against slight overshoot.

From 190c5e13943782e415802fbef96bf3a8bb9bfcd1 Mon Sep 17 00:00:00 2001
From: Joe Schoonover <11430768+fluidnumerics-joe@users.noreply.github.com>
Date: Mon, 25 Aug 2025 07:14:53 -0400
Subject: [PATCH 7/9] Update parcels/spatialhash.py

Co-authored-by: Erik van Sebille <e.vansebille@uu.nl>
---
 parcels/spatialhash.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/parcels/spatialhash.py b/parcels/spatialhash.py
index 80ce3f93f..bfa0042fc 100644
--- a/parcels/spatialhash.py
+++ b/parcels/spatialhash.py
@@ -408,5 +408,5 @@ def _encode_morton3d(x, y, z, xmin, xmax, ymin, ymax, zmin, zmax):
     # Cast to a wide type before shifting/OR to be safe when arrays are used.
     code = (dz3 << 2) | (dy3 << 1) | dx3
 
-    # If you want a compact type, it fits in 30 bits; uint32 is enough.
+    # Since our compact type fits in 30 bits, uint32 is enough.
     return code.astype(np.uint32)

From 06211ab1b67132cd5ea7fb1e0002dd3f8dbf3d15 Mon Sep 17 00:00:00 2001
From: Joe Schoonover <11430768+fluidnumerics-joe@users.noreply.github.com>
Date: Mon, 25 Aug 2025 10:08:03 -0400
Subject: [PATCH 8/9] Add comment for zmin/zmax setting for flat mesh

Co-authored-by: Erik van Sebille <e.vansebille@uu.nl>
---
 parcels/spatialhash.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/parcels/spatialhash.py b/parcels/spatialhash.py
index bfa0042fc..2045d5fab 100644
--- a/parcels/spatialhash.py
+++ b/parcels/spatialhash.py
@@ -79,6 +79,7 @@ def __init__(
             self._xmax = self._source_grid.lon.max()
             self._ymin = self._source_grid.lat.min()
             self._ymax = self._source_grid.lat.max()
+            # setting min and max below is needed for mesh="flat"
             self._zmin = 0.0
             self._zmax = 0.0
             x = self._source_grid.lon

From 46ce99eefdf234f0a118066f094a115c9878ec26 Mon Sep 17 00:00:00 2001
From: Joe Schoonover <joe@fluidnumerics.com>
Date: Mon, 25 Aug 2025 10:11:55 -0400
Subject: [PATCH 9/9] Fix docstrings for morton code and dilate_bits

---
 parcels/spatialhash.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/parcels/spatialhash.py b/parcels/spatialhash.py
index 2045d5fab..dddba7432 100644
--- a/parcels/spatialhash.py
+++ b/parcels/spatialhash.py
@@ -368,11 +368,26 @@ def _encode_morton3d(x, y, z, xmin, xmax, ymin, ymax, zmin, zmax):
     Quantize (x, y, z) to 10 bits each (0..1023), dilate the bits so there are
     two zeros between successive bits, and interleave them into a 3D Morton code.
 
+    Parameters
+    ----------
+    x, y, z : array_like
+        Input coordinates to encode. Can be scalars or arrays (broadcasting applies).
+    xmin, xmax : float
+        Minimum and maximum bounds for x coordinate.
+    ymin, ymax : float
+        Minimum and maximum bounds for y coordinate.
+    zmin, zmax : float
+        Minimum and maximum bounds for z coordinate.
+
+    Returns
+    -------
+    code : ndarray, dtype=uint32
+        The resulting Morton codes, same shape as the broadcasted input coordinates.
+
     Notes
     -----
     - Works with scalars or NumPy arrays (broadcasting applies).
-    - Output is up to 30 bits; we return np.uint32 (or np.uint64 if you prefer).
-    - Requires `part1by2` defined as in your previous snippet.
+    - Output is up to 30 bits returned as uint32.
     """
     # Convert inputs to ndarray for consistent dtype/ufunc behavior.
     x = np.asarray(x)