From b35e0abe3f546e115172eb3d5cc2c94159cefd94 Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Thu, 25 May 2023 12:05:53 +0200
Subject: [PATCH 01/14] fix deprecated shapely syntax

---
 spacv/grid_builder.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/spacv/grid_builder.py b/spacv/grid_builder.py
index bb75782..984663a 100644
--- a/spacv/grid_builder.py
+++ b/spacv/grid_builder.py
@@ -1,9 +1,10 @@
-import numpy as np
 import geopandas as gpd
-from shapely.geometry import Polygon, asPolygon
+import numpy as np
 from matplotlib.collections import PolyCollection
+from shapely.geometry import Polygon
 from sklearn.neighbors import BallTree
-from .utils import convert_geodataframe, geometry_to_2d, convert_numpy
+
+from .utils import convert_geodataframe, convert_numpy, geometry_to_2d
 
 __all__ = [
     "construct_blocks",
@@ -109,7 +110,7 @@ def construct_square_grid(XYs, tiles_x, tiles_y):
     top_left = np.add(minx, np.multiply(rows, dx)), np.add(miny, np.multiply(columns+1, dy))
 
     polys = np.vstack([bottom_left, bottom_right, top_right, top_left]).reshape(4,2,-1)
-    polys = [asPolygon(polys[:,:,i]) for i in range(tiles_x*tiles_y)]
+    polys = [Polygon(polys[:,:,i]) for i in range(tiles_x*tiles_y)]
 
     grid = gpd.GeoDataFrame({'geometry':polys})
 

From a8aeb9f066c1bd76761e83d8b9f6864c6b1b9620 Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Thu, 25 May 2023 12:58:24 +0200
Subject: [PATCH 02/14] fix deprecated np type

---
 spacv/base_classes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacv/base_classes.py b/spacv/base_classes.py
index 45609f0..6354cdd 100644
--- a/spacv/base_classes.py
+++ b/spacv/base_classes.py
@@ -71,7 +71,7 @@ def _remove_buffered_indices(self, XYs, test_indices, buffer_radius, geometry_bu
             return test_indices, train_exclude
         else:
             # Yield empty array because no training data removed in dead zone when buffer is zero
-            _ = np.empty([], dtype=np.int)
+            _ = np.empty([], dtype=int)
             return test_indices, _
         
     @abstractmethod

From b29761224ef795e9861428829956033c07031e28 Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Thu, 25 May 2023 12:59:02 +0200
Subject: [PATCH 03/14] fix error when no deadzones (zero buffer)

---
 spacv/base_classes.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/spacv/base_classes.py b/spacv/base_classes.py
index 6354cdd..eb76c2a 100644
--- a/spacv/base_classes.py
+++ b/spacv/base_classes.py
@@ -44,8 +44,11 @@ def split(self, XYs):
         indices = XYs.index.values
         
         for test_indices, train_excluded in self._iter_test_indices(XYs):  
-            # Exclude the training indices within buffer
-            train_excluded = np.concatenate([test_indices, train_excluded])
+            if train_excluded.ndim and train_excluded.size:
+                # Exclude the training indices within buffer
+                train_excluded = np.concatenate([test_indices, train_excluded])
+            else:
+                train_excluded = test_indices
             train_index = np.setdiff1d(
                                 np.union1d(
                                     indices,

From 5571e8b485857aa17fc65484795537287a983b2c Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Thu, 25 May 2023 13:00:00 +0200
Subject: [PATCH 04/14] formatting

---
 spacv/base_classes.py |  9 ++++++---
 spacv/spacv.py        | 10 ++++++----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/spacv/base_classes.py b/spacv/base_classes.py
index eb76c2a..e0b23e1 100644
--- a/spacv/base_classes.py
+++ b/spacv/base_classes.py
@@ -1,8 +1,11 @@
-from abc import ABC, abstractmethod, ABCMeta
-import numpy as np
+from abc import ABC, ABCMeta, abstractmethod
+
 import geopandas as gpd
+import numpy as np
 from sklearn.model_selection import BaseCrossValidator
-from .utils import convert_geoseries, convert_geodataframe
+
+from .utils import convert_geodataframe, convert_geoseries
+
 
 class BaseSpatialCV(BaseCrossValidator, metaclass=ABCMeta):
     """
diff --git a/spacv/spacv.py b/spacv/spacv.py
index 6f27581..6bc5df4 100644
--- a/spacv/spacv.py
+++ b/spacv/spacv.py
@@ -1,11 +1,13 @@
-import warnings 
 import numbers
-import numpy as np
+import warnings
+
 import geopandas as gpd
+import numpy as np
 from sklearn.cluster import MiniBatchKMeans
+
 from .base_classes import BaseSpatialCV
-from .grid_builder import construct_blocks, assign_pt_to_grid
-from .utils import geometry_to_2d, convert_geodataframe, load_custom_polygon
+from .grid_builder import assign_pt_to_grid, construct_blocks
+from .utils import convert_geodataframe, geometry_to_2d, load_custom_polygon
 
 __all__ = [
     "HBLOCK",

From 04bf503bc98942792586fa170041af126f47f354 Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Sat, 27 May 2023 12:37:14 +0200
Subject: [PATCH 05/14] formatting updates for consistencty (black)

---
 spacv/base_classes.py     |  65 +++++----
 spacv/grid_builder.py     | 231 +++++++++++++++++--------------
 spacv/spacv.py            | 284 +++++++++++++++++++-------------------
 spacv/tests/test_cv.py    | 151 +++++++++++---------
 spacv/tests/test_grids.py | 156 ++++++++++-----------
 spacv/tests/test_viz.py   |   2 +-
 spacv/utils.py            |  26 ++--
 spacv/visualisation.py    | 222 ++++++++++++++++-------------
 8 files changed, 605 insertions(+), 532 deletions(-)

diff --git a/spacv/base_classes.py b/spacv/base_classes.py
index e0b23e1..cc296f4 100644
--- a/spacv/base_classes.py
+++ b/spacv/base_classes.py
@@ -1,4 +1,4 @@
-from abc import ABC, ABCMeta, abstractmethod
+from abc import ABCMeta, abstractmethod
 
 import geopandas as gpd
 import numpy as np
@@ -11,21 +11,20 @@ class BaseSpatialCV(BaseCrossValidator, metaclass=ABCMeta):
     """
     Base class for partitioning-based spatial cross-validation approaches.
     """
-    def __init__(
-        self
-    ):
+
+    def __init__(self):
         self.buffer_radius = buffer_radius
-        
+
     def split(self, XYs):
         """
         Generate indices to split data into training and test set.
-        
+
         Parameters
         ----------
         XYs : GeoSeries
             GeoSeries containing shapely Points that identify Easting
             and Northing coordinates of data points.
-    
+
         Yields
         ------
         train : ndarray
@@ -35,80 +34,80 @@ def split(self, XYs):
         """
         XYs = convert_geoseries(XYs).reset_index(drop=True)
         minx, miny, maxx, maxy = XYs.total_bounds
-        
+
         buffer_radius = self.buffer_radius
-        if buffer_radius > maxx-minx or buffer_radius > maxy-miny:
+        if buffer_radius > maxx - minx or buffer_radius > maxy - miny:
             raise ValueError(
                 "buffer_radius too large and excludes all points. Given {}.".format(
                     self.buffer_radius
                 )
             )
-        num_samples = XYs.shape[0]
+        # num_samples = XYs.shape[0]
         indices = XYs.index.values
-        
-        for test_indices, train_excluded in self._iter_test_indices(XYs):  
+
+        for test_indices, train_excluded in self._iter_test_indices(XYs):
             if train_excluded.ndim and train_excluded.size:
                 # Exclude the training indices within buffer
                 train_excluded = np.concatenate([test_indices, train_excluded])
             else:
                 train_excluded = test_indices
             train_index = np.setdiff1d(
-                                np.union1d(
-                                    indices,
-                                    train_excluded
-                                ), np.intersect1d(indices, train_excluded)
-                            )
+                np.union1d(indices, train_excluded),
+                np.intersect1d(indices, train_excluded),
+            )
             if len(train_index) < 1:
                 raise ValueError(
                     "Training set is empty. Try lowering buffer_radius to include more training instances."
                 )
-            test_index = indices[test_indices]      
+            test_index = indices[test_indices]
             yield train_index, test_index
-            
-    def _remove_buffered_indices(self, XYs, test_indices, buffer_radius, geometry_buffer):
+
+    def _remove_buffered_indices(
+        self, XYs, test_indices, buffer_radius, geometry_buffer
+    ):
         # Remove training points from dead zone buffer
-        if buffer_radius > 0:            
-            # Buffer grid and clip training instances            
-            candidate_deadzone = XYs.loc[~XYs.index.isin( test_indices )]            
+        if buffer_radius > 0:
+            # Buffer grid and clip training instances
+            candidate_deadzone = XYs.loc[~XYs.index.isin(test_indices)]
             candidate_deadzone = convert_geodataframe(candidate_deadzone)
             geometry_buffer = convert_geodataframe(geometry_buffer)
             deadzone_points = gpd.sjoin(candidate_deadzone, geometry_buffer)
-            train_exclude = deadzone_points.loc[~deadzone_points.index.isin(test_indices)].index.values
+            train_exclude = deadzone_points.loc[
+                ~deadzone_points.index.isin(test_indices)
+            ].index.values
             return test_indices, train_exclude
         else:
             # Yield empty array because no training data removed in dead zone when buffer is zero
             _ = np.empty([], dtype=int)
             return test_indices, _
-        
+
     @abstractmethod
     def _iter_test_indices(self, XYs):
         """
-        Generates integer indices corresponding to test sets and 
+        Generates integer indices corresponding to test sets and
         training indices to be excluded from model training.
-        
+
         Parameters
         ----------
         X : GeoSeries
             GeoSeries containing shapely Points that identify Easting
             and Northing coordinates of data points.
-            
+
         Yields
         ------
         test_indices : array
             The testing set indices for that fold.
         train_exclude : array
             The training set indices to exclude for that fold.
-        """ 
-    
+        """
+
     def get_n_splits(self):
         """
         Returns the number of folds used in the cross-validation.
-        
+
         Returns
         -------
         n_splits : int
             Returns the number of folds in the cross-validator.
         """
         return self.n_splits
-
-        
\ No newline at end of file
diff --git a/spacv/grid_builder.py b/spacv/grid_builder.py
index 984663a..81e4bb5 100644
--- a/spacv/grid_builder.py
+++ b/spacv/grid_builder.py
@@ -6,20 +6,27 @@
 
 from .utils import convert_geodataframe, convert_numpy, geometry_to_2d
 
-__all__ = [
-    "construct_blocks",
-    "construct_square_grid",
-    "construct_hex_grid"
-]
-
-def construct_blocks(XYs, tiles_x, tiles_y, method='unique', shape='square', 
-                     direction='diagonal', data=None, n_groups=5, n_sims=10, 
-                     distance_metric='euclidean', random_state=None):
+__all__ = ["construct_blocks", "construct_square_grid", "construct_hex_grid"]
+
+
+def construct_blocks(
+    XYs,
+    tiles_x,
+    tiles_y,
+    method="unique",
+    shape="square",
+    direction="diagonal",
+    data=None,
+    n_groups=5,
+    n_sims=10,
+    distance_metric="euclidean",
+    random_state=None,
+):
     """
     Build a grid over study area with user-defined number of tiles. This
     function exposes a number of configurable parameters to design the shape
-    of grid, but also the fold assignment technique. 
-    
+    of grid, but also the fold assignment technique.
+
     Parameters
     ----------
     XYs : Geoseries series
@@ -39,7 +46,7 @@ def construct_blocks(XYs, tiles_x, tiles_y, method='unique', shape='square',
         If int, random_state is the seed used by the random number generator.
         If None, the random number generator is the RandomState instance used
         by `np.random`.
-        
+
     Returns
     -------
     grid : GeoDataFrame Dataframe
@@ -48,39 +55,43 @@ def construct_blocks(XYs, tiles_x, tiles_y, method='unique', shape='square',
     """
     # Construct grid of polygons of defined size and shape
     grid = construct_grid(XYs, tiles_x, tiles_y, shape)
-    
+
     # Set grid assignment method
-    if method == 'unique':
-        grid['grid_id'] = grid.index
-    elif method == 'systematic':
-        if shape != 'square':
-            raise Exception("systematic grid assignment method does not work for irregular grids.")
-        grid['grid_id'] = assign_systematic(grid, tiles_x, tiles_y, direction)
-    elif method == 'random':
-        grid['grid_id'] = assign_randomized(grid, n_groups, random_state)
-    elif method == 'optimized_random':
-        grid['grid_id'] = assign_optimized_random(grid, XYs, data, 
-                                                             n_groups,
-                                                             n_sims, 
-                                                             distance_metric)
+    if method == "unique":
+        grid["grid_id"] = grid.index
+    elif method == "systematic":
+        if shape != "square":
+            raise Exception(
+                "systematic grid assignment method does not work for irregular grids."
+            )
+        grid["grid_id"] = assign_systematic(grid, tiles_x, tiles_y, direction)
+    elif method == "random":
+        grid["grid_id"] = assign_randomized(grid, n_groups, random_state)
+    elif method == "optimized_random":
+        grid["grid_id"] = assign_optimized_random(
+            grid, XYs, data, n_groups, n_sims, distance_metric
+        )
     else:
-        raise ValueError("Method not recognised. Choose between: unique, systematic, random or optimized_random.")
+        raise ValueError(
+            "Method not recognised. Choose between: unique, systematic, random or optimized_random."
+        )
     return grid
-    
+
+
 def construct_grid(XYs, tiles_x, tiles_y, shape):
     """
     Choose grid shape to build across bounds of study area.
     """
-    if shape == 'square':
+    if shape == "square":
         return construct_square_grid(XYs, tiles_x, tiles_y)
-    if shape == 'hex':
+    if shape == "hex":
         return construct_hex_grid(XYs, tiles_x, tiles_y)
-    
+
 
 def construct_square_grid(XYs, tiles_x, tiles_y):
     """
     Build square grid over study area with user-defined number of tiles.
-    
+
     Parameters
     ----------
     XYs : Geoseries series
@@ -89,37 +100,48 @@ def construct_square_grid(XYs, tiles_x, tiles_y):
         Integer declaring number of tiles along X axis.
     tiles_y : integer
         Integer declaring number of tiles along Y axis.
-        
+
     Returns
     -------
     grid : GeoDataFrame Dataframe
         GeoDataFrame with square grids as shapely polygons.
-    
+
     """
     minx, miny, maxx, maxy = XYs.total_bounds
 
-    rows = np.array(list(np.arange(0, tiles_x))*tiles_y)
+    rows = np.array(list(np.arange(0, tiles_x)) * tiles_y)
     columns = np.repeat(np.arange(0, tiles_y), tiles_x)
-    
+
     dx = (maxx - minx) / tiles_x
     dy = (maxy - miny) / tiles_y
 
-    bottom_left = np.add(minx, np.multiply(rows, dx)), np.add(miny, np.multiply(columns, dy))
-    bottom_right = np.add(minx, np.multiply(rows+1, dx)), np.add(miny, np.multiply(columns, dy))
-    top_right = np.add(minx, np.multiply(rows+1, dx)), np.add(miny, np.multiply(columns+1, dy))
-    top_left = np.add(minx, np.multiply(rows, dx)), np.add(miny, np.multiply(columns+1, dy))
+    bottom_left = np.add(minx, np.multiply(rows, dx)), np.add(
+        miny, np.multiply(columns, dy)
+    )
+    bottom_right = np.add(minx, np.multiply(rows + 1, dx)), np.add(
+        miny, np.multiply(columns, dy)
+    )
+    top_right = np.add(minx, np.multiply(rows + 1, dx)), np.add(
+        miny, np.multiply(columns + 1, dy)
+    )
+    top_left = np.add(minx, np.multiply(rows, dx)), np.add(
+        miny, np.multiply(columns + 1, dy)
+    )
 
-    polys = np.vstack([bottom_left, bottom_right, top_right, top_left]).reshape(4,2,-1)
-    polys = [Polygon(polys[:,:,i]) for i in range(tiles_x*tiles_y)]
+    polys = np.vstack([bottom_left, bottom_right, top_right, top_left]).reshape(
+        4, 2, -1
+    )
+    polys = [Polygon(polys[:, :, i]) for i in range(tiles_x * tiles_y)]
 
-    grid = gpd.GeoDataFrame({'geometry':polys})
+    grid = gpd.GeoDataFrame({"geometry": polys})
 
     return grid
 
+
 def construct_hex_grid(XYs, tiles_x, tiles_y):
     """
     Build hexagon grid over study area with user-defined number of tiles.
-    
+
     Parameters
     ----------
     XYs : Geoseries series
@@ -128,15 +150,15 @@ def construct_hex_grid(XYs, tiles_x, tiles_y):
         Integer declaring number of tiles along X axis.
     tiles_y : integer
         Integer declaring number of tiles along Y axis.
-        
+
     Returns
     -------
     grid : GeoDataFrame Dataframe
         GeoDataFrame with hexagon grids as shapely polygons.
-    
+
     """
     minx, miny, maxx, maxy = XYs.total_bounds
-    padding = 1.e-9 * (maxx - minx)
+    padding = 1.0e-9 * (maxx - minx)
     minx -= padding
     maxx += padding
     dx = (maxx - minx) / tiles_x
@@ -147,56 +169,55 @@ def construct_hex_grid(XYs, tiles_x, tiles_y):
     n = tiles_x1 * tiles_y1 + tiles_x * tiles_y
 
     offsets = np.zeros((n, 2), float)
-    offsets[:tiles_x1 * tiles_y1, 0] = np.repeat(np.arange(tiles_x1), tiles_y1)
-    offsets[:tiles_x1 * tiles_y1, 1] = np.tile(np.arange(tiles_y1), tiles_x1)
-    offsets[tiles_x1 * tiles_y1:, 0] = np.repeat(np.arange(tiles_x) + 0.5, tiles_y)
-    offsets[tiles_x1 * tiles_y1:, 1] = np.tile(np.arange(tiles_y), tiles_x) + 0.5
-    
+    offsets[: tiles_x1 * tiles_y1, 0] = np.repeat(np.arange(tiles_x1), tiles_y1)
+    offsets[: tiles_x1 * tiles_y1, 1] = np.tile(np.arange(tiles_y1), tiles_x1)
+    offsets[tiles_x1 * tiles_y1 :, 0] = np.repeat(np.arange(tiles_x) + 0.5, tiles_y)
+    offsets[tiles_x1 * tiles_y1 :, 1] = np.tile(np.arange(tiles_y), tiles_x) + 0.5
+
     offsets[:, 0] *= dx
     offsets[:, 1] *= dy
     offsets[:, 0] += minx
     offsets[:, 1] += miny
 
     polygon = [dx, dy / 3] * np.array(
-        [[.5, -.5], [.5, .5], [0., 1.], [-.5, .5], [-.5, -.5], [0., -1.]])
+        [[0.5, -0.5], [0.5, 0.5], [0.0, 1.0], [-0.5, 0.5], [-0.5, -0.5], [0.0, -1.0]]
+    )
 
-    collection = PolyCollection([polygon],offsets=offsets)
+    collection = PolyCollection([polygon], offsets=offsets)
     hex_polys = collection.get_paths()[0].vertices
     hex_array = []
-    for xs,ys in collection.get_offsets():
-        hex_x = np.add(hex_polys[:,0],  xs)
-        hex_y = np.add(hex_polys[:,1],  ys)
+    for xs, ys in collection.get_offsets():
+        hex_x = np.add(hex_polys[:, 0], xs)
+        hex_y = np.add(hex_polys[:, 1], ys)
         hex_array.append(Polygon(np.vstack([hex_x, hex_y]).T))
 
-    hex_grid = gpd.GeoDataFrame({'geometry':hex_array})
-    
+    hex_grid = gpd.GeoDataFrame({"geometry": hex_array})
+
     return hex_grid
 
-def assign_systematic(grid, tiles_x, tiles_y, direction='diagonal'):
+
+def assign_systematic(grid, tiles_x, tiles_y, direction="diagonal"):
     """
     Set grid pattern as systematic by assigning grid IDs along diagonals, normal and anti-diagonal.
     """
     # Reshape length of grid to matrix
-    sys_matrix = np.arange(0, tiles_x * tiles_y) \
-                   .reshape(tiles_y, tiles_x)
+    sys_matrix = np.arange(0, tiles_x * tiles_y).reshape(tiles_y, tiles_x)
     length, width = sys_matrix.shape
 
     # Set systematic pattern, diagonal or anti-diagonal
-    if direction == 'diagonal':
-        diags = [sys_matrix.diagonal(i) 
-                     for i in range(width-1, -length,-1)]
-    elif direction == 'anti':
-        diags = [sys_matrix[::-1,:].diagonal(i) 
-                     for i in range(-length+1, width)]
+    if direction == "diagonal":
+        diags = [sys_matrix.diagonal(i) for i in range(width - 1, -length, -1)]
+    elif direction == "anti":
+        diags = [sys_matrix[::-1, :].diagonal(i) for i in range(-length + 1, width)]
     else:
-        raise ValueError("Direction of systematic pattern not recognised. Choose between: diagonal or anti.")
+        raise ValueError(
+            "Direction of systematic pattern not recognised. Choose between: diagonal or anti."
+        )
 
     # Construct lookup between diagonal element indices and grid dataframe
-    systematic_lookup = dict([
-                            tuple([element, key]) 
-                                for key, diag in enumerate(diags) 
-                                    for element in diag
-                        ])
+    systematic_lookup = dict(
+        [tuple([element, key]) for key, diag in enumerate(diags) for element in diag]
+    )
     grid_id = grid.index.map(systematic_lookup)
 
     return grid_id
@@ -207,17 +228,20 @@ def assign_randomized(grid, n_groups=5, random_state=None):
     Set grid pattern as randomized by randomly assigning grid IDs.
     """
     np.random.seed(random_state)
-    
+
     # Determine number of randomized groups
     n_random_grps = np.arange(0, n_groups)
     n_grids = grid.shape[0]
-    
+
     # Allocate random group id to each grid row
     grid_id = np.random.choice(n_random_grps, size=n_grids, replace=True)
-    
+
     return grid_id
 
-def assign_optimized_random(grid, XYs, data, n_groups=5, n_sims=10, distance_metric='euclidean'):
+
+def assign_optimized_random(
+    grid, XYs, data, n_groups=5, n_sims=10, distance_metric="euclidean"
+):
     """
     Set grid pattern as optimized random by taking grid IDs that minimize dissimilarity between folds.
     """
@@ -227,53 +251,52 @@ def assign_optimized_random(grid, XYs, data, n_groups=5, n_sims=10, distance_met
             " dissimilarity when using optimized_random method."
         )
     data = convert_numpy(data)
-    # Build dictionary of grid IDs with paired SSR for dissimilarity 
+    # Build dictionary of grid IDs with paired SSR for dissimilarity
     optimized_grid = {}
     for sim in range(n_sims):
         grid_id = assign_randomized(grid, n_groups)
-        grid['grid_id'] = grid_id
+        grid["grid_id"] = grid_id
         folds = assign_pt_to_grid(XYs, grid, distance_metric)
 
         # Scale for SSR calculation
         X = (data - data.mean(axis=0)) / data.std(axis=0)
         Xbar = X.mean(axis=0)
-        X_grid_means = np.array([ X[v].mean(axis=0) 
-                                     for _, v in folds.groupby('grid_id').groups.items()])
-        # Calculate dissimilarity between folds and mean values across all data 
-        sse = sum(
-            sum((X_grid_means - Xbar)**2)
+        X_grid_means = np.array(
+            [X[v].mean(axis=0) for _, v in folds.groupby("grid_id").groups.items()]
         )
-        optimized_grid.update( {sim : {'sse': sse, 'grid_id': grid_id}} )
+        # Calculate dissimilarity between folds and mean values across all data
+        sse = sum(sum((X_grid_means - Xbar) ** 2))
+        optimized_grid.update({sim: {"sse": sse, "grid_id": grid_id}})
 
     # Take the optimized grid as one that minimises dissimilarity between folds
-    minimised_obj = min(optimized_grid, key = lambda x : optimized_grid[x]['sse'])
-    grid_id = optimized_grid[minimised_obj]['grid_id']
+    minimised_obj = min(optimized_grid, key=lambda x: optimized_grid[x]["sse"])
+    grid_id = optimized_grid[minimised_obj]["grid_id"]
     return grid_id
 
 
-def assign_pt_to_grid(XYs, grid, distance_metric='euclidean', random_state=None):
+def assign_pt_to_grid(XYs, grid, distance_metric="euclidean", random_state=None):
     """
-    Spatial join pts to grids. Reassign border points to nearest grid based on centroid distance. 
+    Spatial join pts to grids. Reassign border points to nearest grid based on centroid distance.
     """
     np.random.seed(random_state)
-    XYs = convert_geodataframe(XYs)   
-    # Equate spatial reference systems if defined 
+    XYs = convert_geodataframe(XYs)
+    # Equate spatial reference systems if defined
     if not grid.crs == XYs.crs:
-        grid.crs = XYs.crs        
-    XYs = gpd.sjoin(XYs, grid, how='left' , op='within')[['geometry', 'grid_id']]
-        
+        grid.crs = XYs.crs
+    XYs = gpd.sjoin(XYs, grid, how="left", op="within")[["geometry", "grid_id"]]
+
     # In rare cases, points will sit at the border separating two grids
-    if XYs['grid_id'].isna().any():
+    if XYs["grid_id"].isna().any():
         # Find border pts and assign to nearest grid centroid
         grid_centroid = grid.geometry.centroid
         grid_centroid = geometry_to_2d(grid_centroid)
-        border_pt_index = XYs['grid_id'].isna()
+        border_pt_index = XYs["grid_id"].isna()
         border_pts = XYs[border_pt_index].geometry
-        border_pts = geometry_to_2d(border_pts)      
-        
+        border_pts = geometry_to_2d(border_pts)
+
         # Update border pt grid IDs
-        tree = BallTree(grid_centroid, metric=distance_metric) 
-        grid_id  = tree.query(border_pts, k=1, return_distance=False).flatten()
-        grid_id = grid.loc[grid_id, 'grid_id'].values
-        XYs.loc[border_pt_index, 'grid_id'] = grid_id
-    return XYs
\ No newline at end of file
+        tree = BallTree(grid_centroid, metric=distance_metric)
+        grid_id = tree.query(border_pts, k=1, return_distance=False).flatten()
+        grid_id = grid.loc[grid_id, "grid_id"].values
+        XYs.loc[border_pt_index, "grid_id"] = grid_id
+    return XYs
diff --git a/spacv/spacv.py b/spacv/spacv.py
index 6bc5df4..faeabd9 100644
--- a/spacv/spacv.py
+++ b/spacv/spacv.py
@@ -7,25 +7,21 @@
 
 from .base_classes import BaseSpatialCV
 from .grid_builder import assign_pt_to_grid, construct_blocks
-from .utils import convert_geodataframe, geometry_to_2d, load_custom_polygon
+from .utils import geometry_to_2d, load_custom_polygon
+
+__all__ = ["HBLOCK", "SKCV", "RepeatedSKCV", "UserDefinedSCV"]
 
-__all__ = [
-    "HBLOCK",
-    "SKCV",
-    "RepeatedSKCV",
-    "UserDefinedSCV"
-]
 
 class HBLOCK(BaseSpatialCV):
     """
     H-Blocking spatial cross-validator. Partitions study area
     into a series of grid polygons that are assigned into
     different folds based on several user-defined options. HBLOCK
-    exposes several parameters for choosing block sizes, types and 
+    exposes several parameters for choosing block sizes, types and
     fold assignment.
-    
+
     Yields indices to split data into training and test sets.
-    
+
     Parameters
     ----------
     tiles_x : integer, default=5
@@ -36,14 +32,14 @@ class HBLOCK(BaseSpatialCV):
         Specify shape of grid polygons, square or hex.
     method : string, default='unique'
         Choose grid ID assignment method to build folds. Options
-        are: unique, where every polygon in the grid is a fold; 
-        systematic, where folds reflect diagonal or anti-diagonal 
-        patterns across the study area; random, where folds are 
+        are: unique, where every polygon in the grid is a fold;
+        systematic, where folds reflect diagonal or anti-diagonal
+        patterns across the study area; random, where folds are
         randomly assigned into groups determined by n_groups parameter;
         and optimized_random, where random assignment of grids into
         groups are optimized by reducing disimilarity between folds.
     buffer_radius : integer, default=0
-        Buffer radius (dead zone) to exclude training points that are 
+        Buffer radius (dead zone) to exclude training points that are
         within a defined distance of test data within a fold.
     direction : string, default='diagonal'
         Choose direction of pattern for systematic grid assignment,
@@ -52,7 +48,7 @@ class HBLOCK(BaseSpatialCV):
         Number of folds to randomly assign grid polygons into.
     data : array
         Array containing covariates used in predictive task. Used to
-        calculate disimilarity of feature space between folds to 
+        calculate disimilarity of feature space between folds to
         find the optimized random grid assignment.
     n_sims : integer, default=10
         Number of iterations in which to find optimized grid assignment
@@ -64,26 +60,27 @@ class HBLOCK(BaseSpatialCV):
         unprojected spaces.
     random_state : int, default=None
         random_state is the seed used by the random number generator.
-        
+
     Examples
-    -------- 
-    
-    
+    --------
+
+
     """
+
     def __init__(
         self,
         tiles_x=5,
         tiles_y=5,
-        shape='square',
-        method='unique',
+        shape="square",
+        method="unique",
         buffer_radius=0,
-        direction='diagonal',
+        direction="diagonal",
         n_groups=5,
         data=None,
         n_sims=10,
-        distance_metric='euclidean',
-        random_state=None
-    ):        
+        distance_metric="euclidean",
+        random_state=None,
+    ):
         self.tiles_x = tiles_x
         self.tiles_y = tiles_y
         self.shape = shape
@@ -94,105 +91,104 @@ def __init__(
         self.data = data
         self.n_sims = n_sims
         self.distance_metric = distance_metric
-        self.n_splits = tiles_x*tiles_y
+        self.n_splits = tiles_x * tiles_y
         self.random_state = random_state
-        
+
     def _iter_test_indices(self, XYs):
         """
-        Generates integer indices corresponding to test sets and 
+        Generates integer indices corresponding to test sets and
         training indices to be excluded from model training.
-        
+
         Parameters
         ----------
         XYs : GeoSeries
             GeoSeries containing shapely Points that identify Easting
             and Northing coordinates of data points.
-            
+
         Yields
         ------
         test_indices : array
             The testing set indices for that fold.
         train_exclude : array
             The training set indices to exclude for that fold.
-        """       
+        """
         # Define grid type used in CV procedure
-        grid = construct_blocks(XYs, 
-                      tiles_x = self.tiles_x, 
-                      tiles_y = self.tiles_y, 
-                      shape = self.shape,
-                      method = self.method, 
-                      direction = self.direction, 
-                      n_groups = self.n_groups,
-                      data = self.data, 
-                      n_sims = self.n_sims,
-                      random_state = self.random_state)
-    
+        grid = construct_blocks(
+            XYs,
+            tiles_x=self.tiles_x,
+            tiles_y=self.tiles_y,
+            shape=self.shape,
+            method=self.method,
+            direction=self.direction,
+            n_groups=self.n_groups,
+            data=self.data,
+            n_sims=self.n_sims,
+            random_state=self.random_state,
+        )
+
         # Convert to GDF to use Geopandas functions
-        XYs = gpd.GeoDataFrame(({'geometry':XYs}))
-                            
+        XYs = gpd.GeoDataFrame(({"geometry": XYs}))
+
         # Assign pts to grids
         XYs = assign_pt_to_grid(XYs, grid, self.distance_metric)
         grid_ids = np.unique(grid.grid_id)
 
         # Yield test indices and optionally training indices within buffer
         for grid_id in grid_ids:
-            test_indices = XYs.loc[XYs['grid_id'] == grid_id ].index.values
+            test_indices = XYs.loc[XYs["grid_id"] == grid_id].index.values
             # Remove empty grids
             if len(test_indices) < 1:
                 continue
             grid_poly_buffer = grid.loc[[grid_id]].buffer(self.buffer_radius)
-            test_indices, train_exclude = \
-                super()._remove_buffered_indices(XYs, test_indices, 
-                                            self.buffer_radius, grid_poly_buffer)
-            yield test_indices, train_exclude                     
-                    
+            test_indices, train_exclude = super()._remove_buffered_indices(
+                XYs, test_indices, self.buffer_radius, grid_poly_buffer
+            )
+            yield test_indices, train_exclude
+
+
 class SKCV(BaseSpatialCV):
     """
-    Spatial K-fold cross-validator. Modification of standard 
-    CV to overcome biased prediction performance of estimates 
-    due to autocorrelation in spatial data. Overoptimistic bias 
-    in performance is prevented by ensuring spatial proximity of 
+    Spatial K-fold cross-validator. Modification of standard
+    CV to overcome biased prediction performance of estimates
+    due to autocorrelation in spatial data. Overoptimistic bias
+    in performance is prevented by ensuring spatial proximity of
     test data, and maintaing a training set that is within a certain
     spatial distance from the test dataset.
-    
+
     When K=N, SKCV becomes a spatial leave-one-out (SLOO) cross-validator.
-    
+
     Yields indices to split data into training and test sets.
-    
+
     Parameters
     ----------
     n_splits : int, default=10
         Number of folds. Must be at least 2.
     buffer_radius : integer, default=0
-        Buffer radius (dead zone) to exclude training points that are 
+        Buffer radius (dead zone) to exclude training points that are
         within a defined distance of test data within a fold.
     random_state : int, RandomState instance or None, optional, default=None
         random_state is the seed used by the random number generator.
 
     Examples
-    -------- 
-    
+    --------
+
     """
-    def __init__(
-        self,
-        n_splits=10,
-        buffer_radius = 0,
-        random_state = None
-    ):
+
+    def __init__(self, n_splits=10, buffer_radius=0, random_state=None):
         self.n_splits = n_splits
         self.buffer_radius = buffer_radius
         self.random_state = random_state
-        
+
     def _iter_test_indices(self, XYs):
         """
         Generates integer indices corresponding to test sets.
-        
+
         Parameters
         ----------
         X : GeoSeries
             GeoSeries containing shapely Points that identify Easting
             and Northing coordinates of data points.
-            
+
         Yields
         ------
         test_indices : array
@@ -200,16 +196,18 @@ def _iter_test_indices(self, XYs):
         train_exclude : array
             The training set indices to exclude for that fold.
         """
-              
-        if self.n_splits > len(XYs) :
+
+        if self.n_splits > len(XYs):
             raise ValueError(
                 "Number of specified n_splits (folds) is larger than number of data points. Given {} observations and {} folds.".format(
                     len(XYs), self.n_splits
                 )
             )
         sloo = len(XYs) == self.n_splits
-        lattice = any(XYs.geom_type == 'Polygon') or any(XYs.geom_type == 'MultiPolygon')
-        
+        lattice = any(XYs.geom_type == "Polygon") or any(
+            XYs.geom_type == "MultiPolygon"
+        )
+
         # If K = N, SLOO
         if sloo:
             num_samples = XYs.shape[0]
@@ -217,40 +215,48 @@ def _iter_test_indices(self, XYs):
         else:
             # Partion XYs space into folds
             XYs_to_2d = geometry_to_2d(XYs)
-            km_skcv = MiniBatchKMeans(n_clusters = self.n_splits, random_state=self.random_state)
+            km_skcv = MiniBatchKMeans(
+                n_clusters=self.n_splits, random_state=self.random_state
+            )
             labels = km_skcv.fit(XYs_to_2d).labels_
             uniques, counts = np.unique(labels, return_counts=True)
 
-            check_fold_n = (counts < 2)
+            check_fold_n = counts < 2
             if check_fold_n.any():
-                warn = "{} folds contain less than three points and do not form polygons.".format( check_fold_n.sum() )
+                warn = "{} folds contain less than three points and do not form polygons.".format(
+                    check_fold_n.sum()
+                )
                 warnings.warn(warn)
-            indices_from_folds = [np.argwhere(labels == i).reshape(-1) 
-                                                  for i in uniques]    
-        for fold_indices in indices_from_folds:   
+            indices_from_folds = [np.argwhere(labels == i).reshape(-1) for i in uniques]
+        for fold_indices in indices_from_folds:
             if sloo:
                 test_indices = np.array([fold_indices])
                 fold_polygon = XYs.loc[test_indices].buffer(self.buffer_radius)
             elif lattice:
                 test_indices = np.array(fold_indices)
-                fold_polygon = XYs.loc[test_indices].unary_union.buffer(self.buffer_radius) 
-            else: # skcv
+                fold_polygon = XYs.loc[test_indices].unary_union.buffer(
+                    self.buffer_radius
+                )
+            else:  # skcv
                 test_indices = np.array(fold_indices)
-                fold_polygon = XYs.loc[test_indices].unary_union.convex_hull.buffer(self.buffer_radius)
-                
-            test_indices, train_exclude = \
-                super()._remove_buffered_indices(XYs, test_indices, 
-                                            self.buffer_radius, fold_polygon)
+                fold_polygon = XYs.loc[test_indices].unary_union.convex_hull.buffer(
+                    self.buffer_radius
+                )
+
+            test_indices, train_exclude = super()._remove_buffered_indices(
+                XYs, test_indices, self.buffer_radius, fold_polygon
+            )
             yield test_indices, train_exclude
-                
+
+
 class RepeatedSKCV(SKCV):
     """
-    Repeats Spatial K-Fold cross-validation (SKCV) n times with different 
+    Repeats Spatial K-Fold cross-validation (SKCV) n times with different
     K-means randomization in each repetition. Given the sensitivity of K-means
     to the initial, randomly initialised starting values of the centroid seeds,
-    RepeatedSKCV repeats SKCV a number of times, yielding a generator of 
+    RepeatedSKCV repeats SKCV a number of times, yielding a generator of
     n_repeats * K test and training splits.
-    
+
     Parameters
     ----------
     n_splits : int, default=10
@@ -264,14 +270,8 @@ class RepeatedSKCV(SKCV):
     **cvargs : additional params
         Constructor parameters for cv.
     """
-    
-    def __init__(
-        self,
-        n_repeats=10,
-        n_splits=10,
-        random_state=None,
-        **cvargs
-    ):
+
+    def __init__(self, n_repeats=10, n_splits=10, random_state=None, **cvargs):
         if not isinstance(n_repeats, numbers.Integral):
             raise ValueError("Number of repetitions must be of Integral type.")
         if n_repeats <= 0:
@@ -280,102 +280,100 @@ def __init__(
         self.n_repeats = n_repeats
         self.n_splits = n_splits
         self.cvargs = cvargs
-                
+
     def split(self, XYs):
         n_repeats = self.n_repeats
         for idx in range(n_repeats):
             cv = self.cv(self.n_splits, **self.cvargs)
             for train_index, test_index in cv.split(XYs):
                 yield train_index, test_index
-                
+
+
 class UserDefinedSCV(BaseSpatialCV):
     """
     Spatial cross-validation using user-defined polygons.
-    
+
     Yields indices to split data into training and test sets.
-    
+
     Parameters
     ----------
     custom_polygons : string, GeoSeries
         File path to user defined grid polygons used to assign data
         points into folds.
     buffer_radius : integer, default=0
-        Buffer radius (dead zone) to exclude training points that are 
+        Buffer radius (dead zone) to exclude training points that are
         within a defined distance of test data within a fold.
     distance_metric : string, default='euclidean'
         Distance metric used to reconcile points that sit at exact
         border between two grids. Defaults to euclidean assuming
         projected coordinate system, otherwise use haversine for
         unprojected spaces.
-        
+
     Yields
     ------
     test_indices : array
         The testing set indices for that fold.
     train_exclude : array
         The training set indices to exclude for that fold.
-    
+
     """
-    def __init__(
-        self,
-        custom_polygons,
-        buffer_radius = 0,
-        distance_metric = 'euclidean'
-    ):  
+
+    def __init__(self, custom_polygons, buffer_radius=0, distance_metric="euclidean"):
         self.buffer_radius = buffer_radius
         self.custom_polygons = load_custom_polygon(custom_polygons)
         self.distance_metric = distance_metric
-        
+
     def _iter_test_indices(self, XYs):
         """
-        Generates integer indices corresponding to test sets and 
+        Generates integer indices corresponding to test sets and
         training indices to be excluded from model training.
-        
+
         Parameters
         ----------
         XYs : GeoSeries
             GeoSeries containing shapely Points that identify Easting
             and Northing coordinates of data points.
-            
+
         Yields
         ------
         test_indices : array
             The testing set indices for that fold.
         train_exclude : array
             The training set indices to exclude for that fold.
-        """  
+        """
         grid = self.custom_polygons
-        grid['grid_id'] = grid.index
+        grid["grid_id"] = grid.index
         grid_ids = np.unique(grid.grid_id)
-        
+
         XYs = assign_pt_to_grid(XYs, grid, self.distance_metric)
-        
+
         # Yield test indices and optionally training indices within buffer
         for grid_id in grid_ids:
-            test_indices = XYs.loc[XYs['grid_id'] == grid_id ].index.values
+            test_indices = XYs.loc[XYs["grid_id"] == grid_id].index.values
             # Remove empty grids
             if len(test_indices) < 1:
-                continue                
+                continue
             grid_poly_buffer = grid.loc[[grid_id]].buffer(self.buffer_radius)
-            test_indices, train_exclude = \
-                super()._remove_buffered_indices(XYs, test_indices, 
-                                            self.buffer_radius, grid_poly_buffer)
-            yield test_indices, train_exclude 
-        
-def compute_gcv(y, X):
-    """
-    Compute generalized cross-validation (GCV) for i.i.d data. GSV
-    is an approximation of leave-one-out (LOO) CV.
-    
-    
-    ADD: accomodate space.
-    """
-    y = y.reshape(-1, 1)
-    ols = spreg.ols.OLS(y, X)
-    hat = X.dot(np.linalg.inv(X.T.dot(X)).dot(X.T))
-    mse = np.mean( (y - ols.predy)**2)
-    n = len(y)
-    h_value = np.trace( np.identity(n) - hat ) / n
-    gcv = mse / h_value**2
-
-    return gcv
\ No newline at end of file
+            test_indices, train_exclude = super()._remove_buffered_indices(
+                XYs, test_indices, self.buffer_radius, grid_poly_buffer
+            )
+            yield test_indices, train_exclude
+
+
+# def compute_gcv(y, X):
+#     """
+#     Compute generalized cross-validation (GCV) for i.i.d data. GSV
+#     is an approximation of leave-one-out (LOO) CV.
+
+
+#     ADD: accomodate space.
+#     """
+#     y = y.reshape(-1, 1)
+#     ols = spreg.ols.OLS(y, X)
+#     hat = X.dot(np.linalg.inv(X.T.dot(X)).dot(X.T))
+#     mse = np.mean((y - ols.predy) ** 2)
+#     n = len(y)
+#     h_value = np.trace(np.identity(n) - hat) / n
+#     gcv = mse / h_value**2
+
+#     return gcv
diff --git a/spacv/tests/test_cv.py b/spacv/tests/test_cv.py
index f32bbf7..be42e28 100644
--- a/spacv/tests/test_cv.py
+++ b/spacv/tests/test_cv.py
@@ -1,33 +1,61 @@
 import unittest
-import numpy as np
+
 import geopandas as gpd
+import numpy as np
 import spacv
 
+
 class SKCV_Tester(unittest.TestCase):
     def setUp(self):
         np.random.seed(10)
         x = np.random.randint(0, 3000, 30)
         y = np.random.randint(0, 3000, 30)
-        
-        self.gdf = gpd.GeoDataFrame(
-            {'geometry' : gpd.points_from_xy(x,y)}
-        )
-        
+
+        self.gdf = gpd.GeoDataFrame({"geometry": gpd.points_from_xy(x, y)})
+
         self.fold_test_one = np.array([1, 5, 13, 16, 17, 18, 21, 22, 23, 25, 26, 28])
         self.fold_test_two = np.array([2, 4, 6, 7, 11, 12, 14, 27, 29])
         self.fold_test_three = np.array([0, 3, 8, 9, 10, 15, 19, 20, 24])
-    
-        self.fold_train_one = np.array([0, 2, 3, 4, 6, 8, 9, 10, 11, 12, 14, 15, 19, 20, 24, 27, 29])
-        self.fold_train_two = np.array([0, 1, 3, 5, 8, 9, 10, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28])
-        self.fold_train_three = np.array([1, 2, 4, 5, 6, 7, 11, 12, 13, 14, 16, 17, 18, 21, 22, 23, 25, 26, 27, 28, 29])
-    
+
+        self.fold_train_one = np.array(
+            [0, 2, 3, 4, 6, 8, 9, 10, 11, 12, 14, 15, 19, 20, 24, 27, 29]
+        )
+        self.fold_train_two = np.array(
+            [0, 1, 3, 5, 8, 9, 10, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28]
+        )
+        self.fold_train_three = np.array(
+            [
+                1,
+                2,
+                4,
+                5,
+                6,
+                7,
+                11,
+                12,
+                13,
+                14,
+                16,
+                17,
+                18,
+                21,
+                22,
+                23,
+                25,
+                26,
+                27,
+                28,
+                29,
+            ]
+        )
+
     def test_skcv(self):
         np.random.seed(10)
-        scv = spacv.SKCV(n_splits = 3, buffer_radius = 450, random_state=123)
+        scv = spacv.SKCV(n_splits=3, buffer_radius=450, random_state=123)
         self.assertEqual(scv.n_splits, 3)
         self.assertEqual(scv.buffer_radius, 450)
         self.assertEqual(scv.random_state, 123)
-        
+
         fold_train, fold_test = [], []
         for train, test in scv.split(self.gdf):
             fold_train.append(train)
@@ -36,120 +64,115 @@ def test_skcv(self):
         scv_fold_one = fold_test[0]
         scv_fold_two = fold_test[1]
         scv_fold_three = fold_test[2]
-        
+
         scv_train_one = fold_train[0]
         scv_train_two = fold_train[1]
         scv_train_three = fold_train[2]
-        
+
         np.testing.assert_equal(self.fold_test_one, scv_fold_one)
         np.testing.assert_equal(self.fold_test_two, scv_fold_two)
         np.testing.assert_equal(self.fold_test_three, scv_fold_three)
-        
+
         # Training pts removed in deadzone buffer check
         np.testing.assert_equal(self.fold_train_one, scv_train_one)
         np.testing.assert_equal(self.fold_train_two, scv_train_two)
         np.testing.assert_equal(self.fold_train_three, scv_train_three)
-        
-        
+
+
 class HBLOCK_Tester(unittest.TestCase):
     def setUp(self):
         np.random.seed(10)
         x = np.random.randint(0, 3000, 10)
         y = np.random.randint(0, 3000, 10)
-        
-        self.gdf = gpd.GeoDataFrame(
-            {'geometry' : gpd.points_from_xy(x,y)}
-        )
 
-        self.fold_test_one = np.array([0,4,6])
-        self.fold_test_two = np.array([2,3,7,8])
-        self.fold_test_three = np.array([1,5,9])
-        
-        self.fold_train_one = np.array([1,2,3,5,7,8, 9])
-        self.fold_train_two = np.array([0,1,4,5,6,9])
-        self.fold_train_three = np.array([0,2,3,4,6,7,8])
-        
+        self.gdf = gpd.GeoDataFrame({"geometry": gpd.points_from_xy(x, y)})
+
+        self.fold_test_one = np.array([0, 4, 6])
+        self.fold_test_two = np.array([2, 3, 7, 8])
+        self.fold_test_three = np.array([1, 5, 9])
+
+        self.fold_train_one = np.array([1, 2, 3, 5, 7, 8, 9])
+        self.fold_train_two = np.array([0, 1, 4, 5, 6, 9])
+        self.fold_train_three = np.array([0, 2, 3, 4, 6, 7, 8])
+
     def test_hblock(self):
-        
-        scv = spacv.HBLOCK(buffer_radius=0, tiles_x=3, tiles_y=3, 
-                           method='random', n_groups=3, random_state=12)
+        scv = spacv.HBLOCK(
+            buffer_radius=0,
+            tiles_x=3,
+            tiles_y=3,
+            method="random",
+            n_groups=3,
+            random_state=12,
+        )
         self.assertEqual(scv.tiles_x, 3)
         self.assertEqual(scv.tiles_y, 3)
-        self.assertEqual(scv.method, 'random')
-        self.assertEqual(scv.n_groups, 3)      
-        self.assertEqual(scv.random_state, 12)  
-    
+        self.assertEqual(scv.method, "random")
+        self.assertEqual(scv.n_groups, 3)
+        self.assertEqual(scv.random_state, 12)
+
         fold_train, fold_test = [], []
         for train, test in scv.split(self.gdf):
             fold_train.append(train)
             fold_test.append(test)
-            
+
         scv_fold_one = fold_test[0]
         scv_fold_two = fold_test[1]
         scv_fold_three = fold_test[2]
-        
+
         scv_train_one = fold_train[0]
         scv_train_two = fold_train[1]
         scv_train_three = fold_train[2]
-        
+
         np.testing.assert_equal(self.fold_test_one, scv_fold_one)
         np.testing.assert_equal(self.fold_test_two, scv_fold_two)
         np.testing.assert_equal(self.fold_test_three, scv_fold_three)
-        
+
         # Training pts removed in deadzone buffer check
         np.testing.assert_equal(self.fold_train_one, scv_train_one)
         np.testing.assert_equal(self.fold_train_two, scv_train_two)
         np.testing.assert_equal(self.fold_train_three, scv_train_three)
 
-    
+
 class SKCVUnprojected_Tester(unittest.TestCase):
     def setUp(self):
-        
         pass
-    
+
     def test_unprojected_coords_skcv(self):
-        
         pass
-    
+
+
 class SLOO_Tester(unittest.TestCase):
     def setUp(self):
-        
         pass
-    
+
     def test_sloo(self):
-        
         pass
-    
+
+
 class LatticeSKCV_Tester(unittest.TestCase):
     def setUp(self):
-        
         pass
-    
+
     def test_lattice_skcv(self):
-        
         pass
 
-    
+
 class RepeatSKCV_Tester(unittest.TestCase):
     def setUp(self):
-        
         pass
-    
+
     def test_repeated_skcv(self):
-        
         pass
-    
+
+
 class UserDefinedSCV_Tester(unittest.TestCase):
     def setUp(self):
-        
         pass
-    
+
     def test_userdefined_skcv(self):
-        
         pass
-    
 
-        
+
 suite = unittest.TestSuite()
 test_classes = [SKCV_Tester, HBLOCK_Tester]
 for i in test_classes:
@@ -158,4 +181,4 @@ def test_userdefined_skcv(self):
 
 if __name__ == "__main__":
     runner = unittest.TextTestRunner()
-    runner.run(suite)
\ No newline at end of file
+    runner.run(suite)
diff --git a/spacv/tests/test_grids.py b/spacv/tests/test_grids.py
index de0bf79..0d87bdc 100644
--- a/spacv/tests/test_grids.py
+++ b/spacv/tests/test_grids.py
@@ -1,124 +1,129 @@
 import unittest
-import numpy as np
+
 import geopandas as gpd
-from spacv.grid_builder import *
+import numpy as np
+from spacv.grid_builder import construct_blocks
+
 
 class SquareUniqueGrid_Tester(unittest.TestCase):
     def setUp(self):
         np.random.seed(10)
         self.tiles_x = 5
         self.tiles_y = 10
-        self.method = 'unique'
+        self.method = "unique"
         x = np.random.randint(0, 3000, 100)
         y = np.random.randint(0, 3000, 100)
-        self.XYs = gpd.GeoDataFrame(
-            {'geometry' : gpd.points_from_xy(x,y)}
-        )            
-    
+        self.XYs = gpd.GeoDataFrame({"geometry": gpd.points_from_xy(x, y)})
+
     def test_unique_grid(self):
         np.random.seed(10)
         grid = construct_blocks(self.XYs, self.tiles_x, self.tiles_y, self.method)
         # Test count of unique folds
-        np.testing.assert_equal(grid.shape[0], 5 * 10 )
-        np.testing.assert_equal( len(grid.grid_id.unique()), 50 )
-        
-        
+        np.testing.assert_equal(grid.shape[0], 5 * 10)
+        np.testing.assert_equal(len(grid.grid_id.unique()), 50)
+
+
 class SquareRandomGrid_Tester(unittest.TestCase):
-    def setUp(self):  
+    def setUp(self):
         np.random.seed(10)
         self.tiles_x = 4
         self.tiles_y = 4
-        self.method = 'random'
+        self.method = "random"
         self.n_groups = 5
-        self.shape = 'square'
+        self.shape = "square"
         self.random_state = 123
-        
-        self.grid_assignment = np.array([
-               [2, 4, 2, 1],
-               [3, 2, 3, 1],
-               [1, 0, 1, 1],
-               [0, 0, 1, 3]
-        ])
+
+        self.grid_assignment = np.array(
+            [[2, 4, 2, 1], [3, 2, 3, 1], [1, 0, 1, 1], [0, 0, 1, 3]]
+        )
 
         x = np.random.randint(0, 1000, 100)
         y = np.random.randint(0, 1000, 100)
 
-        self.XYs = gpd.GeoDataFrame(
-                    {'geometry' : gpd.points_from_xy(x,y)}
-                )     
-        
+        self.XYs = gpd.GeoDataFrame({"geometry": gpd.points_from_xy(x, y)})
+
     def test_random_grid(self):
         np.random.seed(10)
-    
-        grid = construct_blocks(XYs = self.XYs, 
-                                tiles_x = self.tiles_x, 
-                                tiles_y = self.tiles_y, 
-                                method = self.method, 
-                                n_groups = self.n_groups,
-                                shape = self.shape,
-                                random_state = self.random_state)
-
-        np.testing.assert_equal(grid.shape[0], 4 * 4 )
-        np.testing.assert_equal( len(grid.grid_id.unique()), 5 )
-        
-        np.testing.assert_equal(grid.grid_id.values.reshape(4,4), self.grid_assignment)
-
-        
+
+        grid = construct_blocks(
+            XYs=self.XYs,
+            tiles_x=self.tiles_x,
+            tiles_y=self.tiles_y,
+            method=self.method,
+            n_groups=self.n_groups,
+            shape=self.shape,
+            random_state=self.random_state,
+        )
+
+        np.testing.assert_equal(grid.shape[0], 4 * 4)
+        np.testing.assert_equal(len(grid.grid_id.unique()), 5)
+
+        np.testing.assert_equal(grid.grid_id.values.reshape(4, 4), self.grid_assignment)
+
+
 class SquareSystematicGrid_Tester(unittest.TestCase):
-    def setUp(self):  
+    def setUp(self):
         np.random.seed(10)
         self.tiles_x = 5
         self.tiles_y = 5
-        self.method = 'systematic'
-        self.direction = 'diagonal'
-        self.shape = 'square'
-        
-        self.grid_assignment = np.array([[4, 3, 2, 1, 0],
-                                         [5, 4, 3, 2, 1],
-                                         [6, 5, 4, 3, 2],
-                                         [7, 6, 5, 4, 3],
-                                         [8, 7, 6, 5, 4]])
+        self.method = "systematic"
+        self.direction = "diagonal"
+        self.shape = "square"
 
+        self.grid_assignment = np.array(
+            [
+                [4, 3, 2, 1, 0],
+                [5, 4, 3, 2, 1],
+                [6, 5, 4, 3, 2],
+                [7, 6, 5, 4, 3],
+                [8, 7, 6, 5, 4],
+            ]
+        )
 
         x = np.random.randint(0, 1000, 100)
         y = np.random.randint(0, 1000, 100)
 
-        self.XYs = gpd.GeoDataFrame(
-                    {'geometry' : gpd.points_from_xy(x,y)}
-                )     
-        
+        self.XYs = gpd.GeoDataFrame({"geometry": gpd.points_from_xy(x, y)})
+
     def test_systematic_grid(self):
         np.random.seed(10)
-    
-        grid = construct_blocks(XYs = self.XYs, 
-                                tiles_x = self.tiles_x, 
-                                tiles_y = self.tiles_y, 
-                                method = self.method, 
-                                direction = self.direction,
-                                shape = self.shape)
-        
-        np.testing.assert_equal(grid.shape[0], 5 * 5 )
-        np.testing.assert_equal(grid.grid_id.values.reshape(5,5), self.grid_assignment)
-
-        
+
+        grid = construct_blocks(
+            XYs=self.XYs,
+            tiles_x=self.tiles_x,
+            tiles_y=self.tiles_y,
+            method=self.method,
+            direction=self.direction,
+            shape=self.shape,
+        )
+
+        np.testing.assert_equal(grid.shape[0], 5 * 5)
+        np.testing.assert_equal(grid.grid_id.values.reshape(5, 5), self.grid_assignment)
+
+
 class SquareOptimizedRandomGrid_Tester(unittest.TestCase):
-    def setUp(self): 
+    def setUp(self):
         pass
-        
+
     def test_random_optimized(self):
         pass
-        
+
+
 class HexUniqueGrid_Tester(unittest.TestCase):
     def setUp(self):
         np.random.seed(10)
         pass
-    
+
     def test_hex_unique_grid(self):
         np.random.seed(10)
-        
-        
+
+
 suite = unittest.TestSuite()
-test_classes = [SquareUniqueGrid_Tester, SquareRandomGrid_Tester, SquareSystematicGrid_Tester]
+test_classes = [
+    SquareUniqueGrid_Tester,
+    SquareRandomGrid_Tester,
+    SquareSystematicGrid_Tester,
+]
 for i in test_classes:
     a = unittest.TestLoader().loadTestsFromTestCase(i)
     suite.addTest(a)
@@ -126,8 +131,3 @@ def test_hex_unique_grid(self):
 if __name__ == "__main__":
     runner = unittest.TextTestRunner()
     runner.run(suite)
-
-
-
-
-    
diff --git a/spacv/tests/test_viz.py b/spacv/tests/test_viz.py
index f8436be..24002b7 100644
--- a/spacv/tests/test_viz.py
+++ b/spacv/tests/test_viz.py
@@ -1,3 +1,3 @@
 import matplotlib
 
-matplotlib.use("agg")  # To prevent plots from using display
\ No newline at end of file
+matplotlib.use("agg")  # To prevent plots from using display
diff --git a/spacv/utils.py b/spacv/utils.py
index 3c5a89c..aa2bca4 100644
--- a/spacv/utils.py
+++ b/spacv/utils.py
@@ -1,39 +1,43 @@
-import numpy as np
 import geopandas as gpd
+import numpy as np
 import pandas as pd
-from shapely.geometry import Point, Polygon, LineString, MultiPolygon
+from shapely.geometry import LineString, MultiPolygon, Point, Polygon
+
 
 def geometry_to_2d(geometry):
-    return np.array(list(map(lambda x : (x.x, x.y), geometry)))
+    return np.array(list(map(lambda x: (x.x, x.y), geometry)))
+
 
 def convert_geoseries(XYs):
     if isinstance(XYs, gpd.GeoDataFrame):
         XYs = XYs.geometry
     if isinstance(XYs, np.ndarray):
-        XYs = gpd.GeoSeries( gpd.points_from_xy(XYs) )
+        XYs = gpd.GeoSeries(gpd.points_from_xy(XYs))
     if isinstance(XYs, gpd.GeoDataFrame):
-        if any(XYs.geom_type == 'Polygon') or any(XYs.geom_type == 'MultiPolygon'):
-            XYs = XYs.geometry.centroid        
+        if any(XYs.geom_type == "Polygon") or any(XYs.geom_type == "MultiPolygon"):
+            XYs = XYs.geometry.centroid
     return XYs
 
+
 def convert_geodataframe(XYs):
     if isinstance(XYs, gpd.GeoSeries):
-        XYs = gpd.GeoDataFrame({'geometry':XYs})
+        XYs = gpd.GeoDataFrame({"geometry": XYs})
     if isinstance(XYs, np.ndarray):
-        XYs = gpd.GeoDataFrame({'geometry': gpd.points_from_xy(XYs) })
+        XYs = gpd.GeoDataFrame({"geometry": gpd.points_from_xy(XYs)})
     if isinstance(XYs, (Point, Polygon, LineString, MultiPolygon)):
-        XYs = gpd.GeoDataFrame({'geometry': [XYs] })        
+        XYs = gpd.GeoDataFrame({"geometry": [XYs]})
     return XYs
 
+
 def convert_numpy(X):
     if isinstance(X, (pd.DataFrame, pd.Series, gpd.GeoDataFrame)):
         return X.values
     else:
         return X
-    
+
+
 def load_custom_polygon(custom_poly):
     if isinstance(custom_poly, (gpd.GeoDataFrame, gpd.GeoSeries)):
         return custom_poly.reset_index(drop=True)
     if isinstance(custom_poly, str):
         return gpd.read_file(custom_poly).reset_index(drop=True)
-    
\ No newline at end of file
diff --git a/spacv/visualisation.py b/spacv/visualisation.py
index 17f3e05..b688ee4 100644
--- a/spacv/visualisation.py
+++ b/spacv/visualisation.py
@@ -1,30 +1,33 @@
-import numpy as np
+import geopandas as gpd
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.colors import ListedColormap
-import geopandas as gpd
-from sklearn.neighbors import BallTree
-from scipy.spatial.distance import pdist, squareform
 from scipy.optimize import curve_fit
+from scipy.spatial.distance import pdist, squareform
+from sklearn.neighbors import BallTree
+
 from .utils import geometry_to_2d
 
 try:
     import seaborn as sns
-    sns.set_style("whitegrid") # pretty plots
+
+    sns.set_style("whitegrid")  # pretty plots
 except ModuleNotFoundError:
     pass
-    
+
 __all__ = [
     "variogram_at_lag",
     "compute_semivariance",
     "plot_autocorrelation_ranges",
     "aoa",
-    "plot_aoa"
+    "plot_aoa",
 ]
-    
+
+
 def variogram_at_lag(XYs, x, lags, bw):
     """
     Return semivariance values for defined lag of distance.
-    
+
     Parameters
     ----------
     XYs : Geoseries series
@@ -34,8 +37,8 @@ def variogram_at_lag(XYs, x, lags, bw):
     lags : array
         Array of distance lags in metres to obtain semivariances.
     bw : integer or float
-        Bandwidth, plus and minus lags to calculate semivariance. 
-        
+        Bandwidth, plus and minus lags to calculate semivariance.
+
     Returns
     -------
     semivariances : Array of floats
@@ -45,46 +48,50 @@ def variogram_at_lag(XYs, x, lags, bw):
     x = np.asarray(x)
     paired_distances = pdist(XYs)
     pd_m = squareform(paired_distances)
-    semivariances = np.empty(( len(lags) ), dtype=np.float64)
+    semivariances = np.empty((len(lags)), dtype=np.float64)
     for i, lag in enumerate(lags):
         # Mask pts outside bandwidth
-        lower = pd_m >= lag-bw
-        upper = pd_m <= lag+bw
+        lower = pd_m >= lag - bw
+        upper = pd_m <= lag + bw
         mask = np.logical_and(lower, upper)
         semivariances[i] = compute_semivariance(x, mask, bw)
     return np.c_[semivariances, lags].T
 
+
 def compute_semivariance(x, mask, bw):
     """
     Calculate semivariance for masked elements.
     """
     semis, counts = [], []
-    for i in range( len(x) ):
+    for i in range(len(x)):
         xi = np.array([x[i]])
-        mask_i = mask[i,:]
+        mask_i = mask[i, :]
         mask_i[:i] = False
         xj = x[mask_i]
-        ss = (xi - xj)**2 
-        filter_empty =  ss > 0. 
+        ss = (xi - xj) ** 2
+        filter_empty = ss > 0.0
         if len(ss[filter_empty]) > 0:
-            counts.append( len(ss[filter_empty]))
+            counts.append(len(ss[filter_empty]))
             semis.append(ss[filter_empty])
-    semivariance = np.sum( np.concatenate(semis)) / (2.0 * sum(counts) )
+    semivariance = np.sum(np.concatenate(semis)) / (2.0 * sum(counts))
     return semivariance
 
+
 def variogram(func):
     def send_params(*params):
         new_args = params[1:]
         mapping = map(lambda h: func(h, *new_args), params[0])
         return np.fromiter(mapping, dtype=float)
+
     return send_params
 
+
 @variogram
 def spherical(h, r, sill, nugget=0):
     """
     Spherical variogram model function. Calculates the
     dependent variable for a given lag (h). The nugget (b) defaults to be 0.
-    
+
     Parameters
     ----------
     h : float
@@ -92,28 +99,29 @@ def spherical(h, r, sill, nugget=0):
     r : float
         Effective range of autocorrelation.
     sill : float
-        The sill of the variogram, where the semivariance begins to saturate. 
+        The sill of the variogram, where the semivariance begins to saturate.
     nugget : float, default=0
         The nugget of the variogram. This is the value of independent
-        variable at the distance of zero. 
+        variable at the distance of zero.
     Returns
     -------
     gamma : numpy float
         Coefficients that describe effective range of spatial autocorrelation
     """
-    a = r / 1.
+    a = r / 1.0
     if h <= r:
         return nugget + sill * ((1.5 * (h / a)) - (0.5 * ((h / a) ** 3.0)))
     else:
         return nugget + sill
-    
+
+
 def plot_autocorrelation_ranges(XYs, X, lags, bw, **kwargs):
     """
-    Plot spatial autocorrelation ranges for input covariates. Suggested 
-    block size is proposed by taking the median autocorrelation range 
+    Plot spatial autocorrelation ranges for input covariates. Suggested
+    block size is proposed by taking the median autocorrelation range
     across the data and is reported by the horiztonal line. This function
     works best for projected coordinate systems.
-    
+
     Parameters
     ----------
     XYs : Geoseries series
@@ -123,55 +131,63 @@ def plot_autocorrelation_ranges(XYs, X, lags, bw, **kwargs):
     lags : array
         Array of distance lags in metres to obtain semivariances.
     bw : integer or float
-        Bandwidth, plus and minus lags to calculate semivariance. 
-    
+        Bandwidth, plus and minus lags to calculate semivariance.
+
     Returns
     -------
     fig : matplotlip Figure instance
         Figure of spatial weight network.
     ax : matplotlib Axes instance
-        Axes in which the figure is plotted. 
+        Axes in which the figure is plotted.
     """
-    alpha = kwargs.pop('alpha', .7)
-    font_size = kwargs.pop('font_size', 14)
-    block_suggestion_color = kwargs.pop('color', 'red')
-    figsize = kwargs.pop('figsize', (8,6))
-    
+    alpha = kwargs.pop("alpha", 0.7)
+    font_size = kwargs.pop("font_size", 14)
+    block_suggestion_color = kwargs.pop("color", "red")
+    figsize = kwargs.pop("figsize", (8, 6))
+
     ranges = []
     for col in X.values.T:
         # Fit spherical model and extract effective range parameter
         semis = variogram_at_lag(XYs, col, lags, bw)
-        sv,h = semis[:,0], semis[:,1]
+        sv, h = semis[:, 0], semis[:, 1]
         start_params = [np.nanmax(h), np.nanmax(sv)]
         bounds = (0, start_params)
-        cof, _ = curve_fit(spherical, h, sv, sigma=None,
-                             p0 = start_params, bounds=bounds,
-                             method='trf')
+        cof, _ = curve_fit(
+            spherical, h, sv, sigma=None, p0=start_params, bounds=bounds, method="trf"
+        )
         effective_range = cof[0]
         ranges.append(effective_range)
     x_labs = X.columns
     f, ax = plt.subplots(1, figsize=figsize)
-    ax.bar(x_labs, ranges, color='skyblue', alpha=alpha)
+    ax.bar(x_labs, ranges, color="skyblue", alpha=alpha)
     ax.set_ylabel("Ranges (m)")
     ax.set_xlabel("Variables")
     median_eff_range = np.median(ranges)
-    ax.text(0, median_eff_range + (np.max(ranges) / 100 * 3), '{:.3f}m'.format(median_eff_range), color=block_suggestion_color, size=font_size )
-    ax.axhline(median_eff_range,  color=block_suggestion_color, linestyle='--')
+    ax.text(
+        0,
+        median_eff_range + (np.max(ranges) / 100 * 3),
+        "{:.3f}m".format(median_eff_range),
+        color=block_suggestion_color,
+        size=font_size,
+    )
+    ax.axhline(median_eff_range, color=block_suggestion_color, linestyle="--")
     return f, ax
-    
-def aoa(new_data, 
-        training_data, 
-        model=None, 
-        thres=0.95,
-        fold_indices=None,
-        distance_metric='euclidean'
-       ):
+
+
+def aoa(
+    new_data,
+    training_data,
+    model=None,
+    thres=0.95,
+    fold_indices=None,
+    distance_metric="euclidean",
+):
     """
     Area of Applicability (AOA) measure for spatial prediction models from
-    Meyer and Pebesma (2020). The AOA defines the area for which, on average, 
-    the cross-validation error of the model applies, which is crucial for 
+    Meyer and Pebesma (2020). The AOA defines the area for which, on average,
+    the cross-validation error of the model applies, which is crucial for
     cases where spatial predictions are used to inform decision-making.
-    
+
     Parameters
     ----------
     new_data : GeoDataFrame
@@ -194,26 +210,28 @@ def aoa(new_data,
         Binary mask that occludes points outside predictive area of applicability.
     """
     if len(training_data) <= 1:
-        raise Exception('At least two training instances need to be specified.')
-                    
-    # Scale data 
+        raise Exception("At least two training instances need to be specified.")
+
+    # Scale data
     training_data = (training_data - np.mean(training_data)) / np.std(training_data)
     new_data = (new_data - np.mean(new_data)) / np.std(new_data)
 
     # Calculate nearest training instance to test data, return Euclidean distances
-    tree = BallTree(training_data, metric=distance_metric) 
+    tree = BallTree(training_data, metric=distance_metric)
     mindist, _ = tree.query(new_data, k=1, return_distance=True)
 
-    # Build matrix of pairwise distances 
+    # Build matrix of pairwise distances
     paired_distances = pdist(training_data)
     train_dist = squareform(paired_distances)
     np.fill_diagonal(train_dist, np.nan)
-    
+
     # Remove data points that are within the same fold
-    if fold_indices:            
+    if fold_indices:
         # Get number of training instances in each fold
         instances_in_folds = [len(fold) for fold in fold_indices]
-        instance_fold_id = np.repeat(np.arange(0, len(fold_indices)), instances_in_folds)
+        instance_fold_id = np.repeat(
+            np.arange(0, len(fold_indices)), instances_in_folds
+        )
 
         # Create mapping between training instance and fold ID
         fold_indices = np.concatenate(fold_indices)
@@ -221,20 +239,22 @@ def aoa(new_data,
 
         # Mask training points in same fold for DI measure calculation
         for i, row in enumerate(train_dist):
-            mask = folds[:,0] == folds[:,0][i]
+            mask = folds[:, 0] == folds[:, 0][i]
             train_dist[i, mask] = np.nan
 
     # Scale distance to nearest training point by average distance across training data
     train_dist_mean = np.nanmean(train_dist, axis=1)
     train_dist_avgmean = np.mean(train_dist_mean)
-    mindist /= train_dist_avgmean    
+    mindist /= train_dist_avgmean
 
     # Define threshold for AOA
     train_dist_min = np.nanmin(train_dist, axis=1)
-    aoa_train_stats = np.quantile(train_dist_min / train_dist_avgmean, 
-                                    q = np.array([0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1]))
-    thres = np.quantile(train_dist_min / train_dist_avgmean, q = thres)
-    
+    # aoa_train_stats = np.quantile(
+    #     train_dist_min / train_dist_avgmean,
+    #     q=np.array([0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1]),
+    # )
+    thres = np.quantile(train_dist_min / train_dist_avgmean, q=thres)
+
     # We choose the AOA as the area where the DI does not exceed the threshold
     DIs = mindist.reshape(-1)
     masked_result = np.repeat(1, len(mindist))
@@ -242,7 +262,10 @@ def aoa(new_data,
 
     return DIs, masked_result
 
-def plot_aoa(new_data, training_data, columns, figsize=(14,4), fold_indices=None, **kwargs):
+
+def plot_aoa(
+    new_data, training_data, columns, figsize=(14, 4), fold_indices=None, **kwargs
+):
     """
      Parameters
     ----------
@@ -257,43 +280,46 @@ def plot_aoa(new_data, training_data, columns, figsize=(14,4), fold_indices=None
         Width, height of figure in inches.
     fold_indices : iterable, default=None
         Iterable consisting of training indices that identify instances in the
-        folds. 
-        
+        folds.
+
     Returns
     -------
     fig : matplotlip Figure instance
         Figure of spatial weight network.
     ax : matplotlib Axes instance
-        Axes in which the figure is plotted. 
+        Axes in which the figure is plotted.
     """
     # Pop geometry for use later in plotting
     new_data = new_data.copy()
-    new_data_geometry = new_data.pop('geometry')
-    
+    new_data_geometry = new_data.pop("geometry")
+
     # Subset to variables
     new_data_aoa = new_data[columns]
     training_data_aoa = training_data[columns]
-    
-    DIs, masked_result = aoa(new_data_aoa, 
-                             training_data_aoa, 
-                             fold_indices=fold_indices)
-    
-    new_data.loc[:, 'DI'] = DIs
-    new_data.loc[:, 'AOA'] = masked_result
-    new_data.loc[:, 'geometry'] = new_data_geometry    
-    new_data = gpd.GeoDataFrame(new_data, geometry=new_data['geometry'])
-        
-    f,ax = plt.subplots(1, 2, figsize=figsize)
-    
-    new_data.plot(ax=ax[0], column='DI', legend=True, cmap='viridis')
-    new_data.plot(ax=ax[1], column='AOA', categorical=True, 
-                  legend=True, cmap=ListedColormap(['red', 'blue']))
-    training_data.plot(ax=ax[0], alpha=.3)
-    training_data.plot(ax=ax[1], alpha=.3)
-
-    ax[0].set_aspect('auto')
-    ax[1].set_aspect('auto')
-    ax[0].set_title('Dissimilarity index (DI)')
-    ax[1].set_title('AOA');
-
-    return f, ax
\ No newline at end of file
+
+    DIs, masked_result = aoa(new_data_aoa, training_data_aoa, fold_indices=fold_indices)
+
+    new_data.loc[:, "DI"] = DIs
+    new_data.loc[:, "AOA"] = masked_result
+    new_data.loc[:, "geometry"] = new_data_geometry
+    new_data = gpd.GeoDataFrame(new_data, geometry=new_data["geometry"])
+
+    f, ax = plt.subplots(1, 2, figsize=figsize)
+
+    new_data.plot(ax=ax[0], column="DI", legend=True, cmap="viridis")
+    new_data.plot(
+        ax=ax[1],
+        column="AOA",
+        categorical=True,
+        legend=True,
+        cmap=ListedColormap(["red", "blue"]),
+    )
+    training_data.plot(ax=ax[0], alpha=0.3)
+    training_data.plot(ax=ax[1], alpha=0.3)
+
+    ax[0].set_aspect("auto")
+    ax[1].set_aspect("auto")
+    ax[0].set_title("Dissimilarity index (DI)")
+    ax[1].set_title("AOA")
+
+    return f, ax

From 930d25c75611f13e00c3248eab6a90619f9f575e Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Wed, 31 May 2023 18:01:49 +0200
Subject: [PATCH 06/14] fix semivariance indexing bug that prevented curve_fit
 from fitting

---
 spacv/visualisation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacv/visualisation.py b/spacv/visualisation.py
index b688ee4..20c0c6c 100644
--- a/spacv/visualisation.py
+++ b/spacv/visualisation.py
@@ -149,7 +149,7 @@ def plot_autocorrelation_ranges(XYs, X, lags, bw, **kwargs):
     for col in X.values.T:
         # Fit spherical model and extract effective range parameter
         semis = variogram_at_lag(XYs, col, lags, bw)
-        sv, h = semis[:, 0], semis[:, 1]
+        sv, h = semis[0], semis[1]
         start_params = [np.nanmax(h), np.nanmax(sv)]
         bounds = (0, start_params)
         cof, _ = curve_fit(

From f41312486afa9ced3398078e898c8977e688413b Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Wed, 31 May 2023 18:05:40 +0200
Subject: [PATCH 07/14] allow autocorrelation to be calculated using geographic
 coordinates; add rudimentary variogram plotting; add multiprocessing

---
 spacv/visualisation.py | 140 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 124 insertions(+), 16 deletions(-)

diff --git a/spacv/visualisation.py b/spacv/visualisation.py
index 20c0c6c..15d8954 100644
--- a/spacv/visualisation.py
+++ b/spacv/visualisation.py
@@ -1,9 +1,15 @@
+import multiprocessing
+from typing import Tuple, Union
+
 import geopandas as gpd
 import matplotlib.pyplot as plt
 import numpy as np
+from matplotlib.axis import Axis
 from matplotlib.colors import ListedColormap
+from matplotlib.figure import Figure
 from scipy.optimize import curve_fit
 from scipy.spatial.distance import pdist, squareform
+from sklearn.metrics.pairwise import haversine_distances
 from sklearn.neighbors import BallTree
 
 from .utils import geometry_to_2d
@@ -24,7 +30,13 @@
 ]
 
 
-def variogram_at_lag(XYs, x, lags, bw):
+def variogram_at_lag(
+    XYs: gpd.GeoSeries,
+    x: Union[list, np.ndarray],
+    lags: np.ndarray,
+    bw: Union[int, float],
+    distance_metric: str = "euclidean",
+) -> np.ndarray:
     """
     Return semivariance values for defined lag of distance.
 
@@ -38,6 +50,10 @@ def variogram_at_lag(XYs, x, lags, bw):
         Array of distance lags in metres to obtain semivariances.
     bw : integer or float
         Bandwidth, plus and minus lags to calculate semivariance.
+    distance_metric : string
+        Distance function to calculate pairwise distances. Must be "euclidean" for
+        points in euclidean space, or "haversine" for points in a geographic CRS.
+        Defaults to "euclidean".
 
     Returns
     -------
@@ -46,15 +62,23 @@ def variogram_at_lag(XYs, x, lags, bw):
     """
     XYs = geometry_to_2d(XYs)
     x = np.asarray(x)
-    paired_distances = pdist(XYs)
-    pd_m = squareform(paired_distances)
+
+    if distance_metric == "euclidean":
+        paired_distances = pdist(XYs)
+        pd_m = squareform(paired_distances)
+    elif distance_metric == "haversine":
+        pd_m = haversine_distances(np.radians([*XYs]))
+        pd_m = pd_m * 6371000  # multiply by Earth radius to get meters
+
     semivariances = np.empty((len(lags)), dtype=np.float64)
+
     for i, lag in enumerate(lags):
         # Mask pts outside bandwidth
         lower = pd_m >= lag - bw
         upper = pd_m <= lag + bw
         mask = np.logical_and(lower, upper)
         semivariances[i] = compute_semivariance(x, mask, bw)
+
     return np.c_[semivariances, lags].T
 
 
@@ -115,7 +139,29 @@ def spherical(h, r, sill, nugget=0):
         return nugget + sill
 
 
-def plot_autocorrelation_ranges(XYs, X, lags, bw, **kwargs):
+def calculate_range(args):
+    XYs, col, lags, bw, distance_metric = args
+    semis = variogram_at_lag(XYs, col, lags, bw, distance_metric)
+    sv, h = semis[0], semis[1]
+    start_params = [np.nanmax(h), np.nanmax(sv)]
+    bounds = (0, start_params)
+    cof, _ = curve_fit(
+        spherical, h, sv, sigma=None, p0=start_params, bounds=bounds, method="trf"
+    )
+    effective_range = cof[0]
+    return effective_range
+
+
+def plot_autocorrelation_ranges(
+    XYs: gpd.GeoSeries,
+    X: Union[np.ndarray, gpd.GeoDataFrame],
+    lags: np.ndarray,
+    bw: Union[int, float],
+    distance_metric: str = "euclidean",
+    workers: int = 1,
+    verbose: bool = False,
+    **kwargs,
+) -> Tuple[Figure, Axis, list]:
     """
     Plot spatial autocorrelation ranges for input covariates. Suggested
     block size is proposed by taking the median autocorrelation range
@@ -132,6 +178,14 @@ def plot_autocorrelation_ranges(XYs, X, lags, bw, **kwargs):
         Array of distance lags in metres to obtain semivariances.
     bw : integer or float
         Bandwidth, plus and minus lags to calculate semivariance.
+    distance_metric : string
+        Distance function to calculate pairwise distances. Must be "euclidean" for
+        points in euclidean space, or "haversine" for points in a geographic CRS.
+        Defaults to "euclidean".
+    workers : int
+        Use multiprocessing for >1 worker. -1 uses all available cores. Defaults to 1.
+    verbose : bool
+        Print name of current column being processed.
 
     Returns
     -------
@@ -139,6 +193,7 @@ def plot_autocorrelation_ranges(XYs, X, lags, bw, **kwargs):
         Figure of spatial weight network.
     ax : matplotlib Axes instance
         Axes in which the figure is plotted.
+    ranges : list
     """
     alpha = kwargs.pop("alpha", 0.7)
     font_size = kwargs.pop("font_size", 14)
@@ -146,17 +201,32 @@ def plot_autocorrelation_ranges(XYs, X, lags, bw, **kwargs):
     figsize = kwargs.pop("figsize", (8, 6))
 
     ranges = []
-    for col in X.values.T:
-        # Fit spherical model and extract effective range parameter
-        semis = variogram_at_lag(XYs, col, lags, bw)
-        sv, h = semis[0], semis[1]
-        start_params = [np.nanmax(h), np.nanmax(sv)]
-        bounds = (0, start_params)
-        cof, _ = curve_fit(
-            spherical, h, sv, sigma=None, p0=start_params, bounds=bounds, method="trf"
-        )
-        effective_range = cof[0]
-        ranges.append(effective_range)
+
+    if workers == -1 or workers > 1:
+        pool = multiprocessing.Pool(workers)
+        results = []
+
+        for i, col in enumerate(X.values.T):
+            if verbose:
+                print(f"{i}: {X.columns[i]}")
+            # Fit spherical model and extract effective range parameter
+            args = (XYs, col, lags, bw, distance_metric)
+            results.append(pool.apply_async(calculate_range, (args,)))
+
+        for result in results:
+            ranges.append(result.get())
+
+        pool.close()
+        pool.join()
+    else:
+        for i, col in enumerate(X.values.T):
+            if verbose:
+                print(f"{i}: {X.columns[i]}")
+            # Fit spherical model and extract effective range parameter
+            args = (XYs, col, lags, bw, distance_metric)
+            eff_range = calculate_range(args)
+            ranges.append(eff_range)
+
     x_labs = X.columns
     f, ax = plt.subplots(1, figsize=figsize)
     ax.bar(x_labs, ranges, color="skyblue", alpha=alpha)
@@ -171,7 +241,7 @@ def plot_autocorrelation_ranges(XYs, X, lags, bw, **kwargs):
         size=font_size,
     )
     ax.axhline(median_eff_range, color=block_suggestion_color, linestyle="--")
-    return f, ax
+    return f, ax, ranges
 
 
 def aoa(
@@ -323,3 +393,41 @@ def plot_aoa(
     ax[1].set_title("AOA")
 
     return f, ax
+
+
+def plot_variogram(
+    XYs: gpd.GeoSeries,
+    col: Union[gpd.GeoSeries, np.ndarray],
+    lags: np.ndarray,
+    bw: Union[int, float],
+    distance_metric: str = "euclidean",
+) -> np.ndarray:
+    """
+    Return semivariance values for defined lag of distance.
+
+    Parameters
+    ----------
+    XYs : Geoseries series
+        Series containing X and Y coordinates.
+    col : array or list
+        Array (N,) containing variable.
+    lags : array
+        Array of distance lags in metres to obtain semivariances.
+    bw : integer or float
+        Bandwidth, plus and minus lags to calculate semivariance.
+    distance_metric : string
+        Distance function to calculate pairwise distances. Must be "euclidean" for
+        points in euclidean space, or "haversine" for points in a geographic CRS.
+        Defaults to "euclidean".
+
+    Returns
+    -------
+    semis : Array of floats
+        Array of semivariances at defined lag points for given variable.
+    """
+    semis = variogram_at_lag(XYs, col, lags, bw, distance_metric)
+    sv, h = semis[0], semis[1]
+    plt.scatter(h, sv)
+    plt.show()
+
+    return semis

From 5d9e82012db79962e566c370989ad82143665681 Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Wed, 31 May 2023 19:59:47 +0200
Subject: [PATCH 08/14] update deprecated gpd.sjoin param

---
 spacv/grid_builder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacv/grid_builder.py b/spacv/grid_builder.py
index 81e4bb5..d323ece 100644
--- a/spacv/grid_builder.py
+++ b/spacv/grid_builder.py
@@ -283,7 +283,7 @@ def assign_pt_to_grid(XYs, grid, distance_metric="euclidean", random_state=None)
     # Equate spatial reference systems if defined
     if not grid.crs == XYs.crs:
         grid.crs = XYs.crs
-    XYs = gpd.sjoin(XYs, grid, how="left", op="within")[["geometry", "grid_id"]]
+    XYs = gpd.sjoin(XYs, grid, how="left", predicate="within")[["geometry", "grid_id"]]
 
     # In rare cases, points will sit at the border separating two grids
     if XYs["grid_id"].isna().any():

From cbfb36c8b6c4042c4ccd13168e2099a4061d082a Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Wed, 31 May 2023 20:00:36 +0200
Subject: [PATCH 09/14] project epsg 4326 into meters when performing geometric
 operations

---
 spacv/grid_builder.py | 10 +++++++++-
 spacv/spacv.py        | 11 ++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/spacv/grid_builder.py b/spacv/grid_builder.py
index d323ece..95fd347 100644
--- a/spacv/grid_builder.py
+++ b/spacv/grid_builder.py
@@ -288,7 +288,15 @@ def assign_pt_to_grid(XYs, grid, distance_metric="euclidean", random_state=None)
     # In rare cases, points will sit at the border separating two grids
     if XYs["grid_id"].isna().any():
         # Find border pts and assign to nearest grid centroid
-        grid_centroid = grid.geometry.centroid
+        # First project to pseudo-mercator if using 4326
+        epsg = grid.crs.to_epsg()
+        if epsg == 4326:
+            grid = grid.to_crs(3857)
+            grid_centroid = grid.geometry.centroid
+            grid = grid.to_crs(epsg)  # return to original EPSG
+        else:
+            grid_centroid = grid.geometry.centroid
+
         grid_centroid = geometry_to_2d(grid_centroid)
         border_pt_index = XYs["grid_id"].isna()
         border_pts = XYs[border_pt_index].geometry
diff --git a/spacv/spacv.py b/spacv/spacv.py
index faeabd9..206116e 100644
--- a/spacv/spacv.py
+++ b/spacv/spacv.py
@@ -139,7 +139,16 @@ def _iter_test_indices(self, XYs):
             # Remove empty grids
             if len(test_indices) < 1:
                 continue
-            grid_poly_buffer = grid.loc[[grid_id]].buffer(self.buffer_radius)
+
+            epsg = grid.crs.to_epsg()
+            if epsg == 4326:
+                grid = grid.to_crs(3857)
+                grid_poly_buffer = grid.loc[[grid_id]].buffer(self.buffer_radius)
+                grid_poly_buffer = grid_poly_buffer.to_crs(epsg)
+                grid = grid.to_crs(epsg)  # return to original EPSG
+            else:
+                grid_poly_buffer = grid.loc[[grid_id]].buffer(self.buffer_radius)
+
             test_indices, train_exclude = super()._remove_buffered_indices(
                 XYs, test_indices, self.buffer_radius, grid_poly_buffer
             )

From 4b9efe175df211fe6f8b5f126ea917e6b7863fe5 Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Sat, 10 Jun 2023 09:08:35 +0200
Subject: [PATCH 10/14] Catch failed semivariance calculation and log error

---
 spacv/visualisation.py | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/spacv/visualisation.py b/spacv/visualisation.py
index 15d8954..8661f46 100644
--- a/spacv/visualisation.py
+++ b/spacv/visualisation.py
@@ -1,3 +1,4 @@
+import logging
 import multiprocessing
 from typing import Tuple, Union
 
@@ -32,10 +33,11 @@
 
 def variogram_at_lag(
     XYs: gpd.GeoSeries,
-    x: Union[list, np.ndarray],
+    x: Union[list, np.ndarray, gpd.GeoSeries],
     lags: np.ndarray,
     bw: Union[int, float],
     distance_metric: str = "euclidean",
+    col_name: str = None,
 ) -> np.ndarray:
     """
     Return semivariance values for defined lag of distance.
@@ -44,7 +46,7 @@ def variogram_at_lag(
     ----------
     XYs : Geoseries series
         Series containing X and Y coordinates.
-    X : array or list
+    X : array, list, or Geoseries
         Array (N,) containing variable.
     lags : array
         Array of distance lags in metres to obtain semivariances.
@@ -77,12 +79,12 @@ def variogram_at_lag(
         lower = pd_m >= lag - bw
         upper = pd_m <= lag + bw
         mask = np.logical_and(lower, upper)
-        semivariances[i] = compute_semivariance(x, mask, bw)
+        semivariances[i] = compute_semivariance(x, mask, bw, col_name)
 
     return np.c_[semivariances, lags].T
 
 
-def compute_semivariance(x, mask, bw):
+def compute_semivariance(x, mask, bw, col_name):
     """
     Calculate semivariance for masked elements.
     """
@@ -97,8 +99,14 @@ def compute_semivariance(x, mask, bw):
         if len(ss[filter_empty]) > 0:
             counts.append(len(ss[filter_empty]))
             semis.append(ss[filter_empty])
-    semivariance = np.sum(np.concatenate(semis)) / (2.0 * sum(counts))
-    return semivariance
+    try:
+        semivariance = np.sum(np.concatenate(semis)) / (2.0 * sum(counts))
+        return semivariance
+    except ValueError:
+        logging.error(
+            f"Could not calculate semivariances for {col_name}. Using 0 instead."
+        )
+        return 0
 
 
 def variogram(func):
@@ -140,8 +148,8 @@ def spherical(h, r, sill, nugget=0):
 
 
 def calculate_range(args):
-    XYs, col, lags, bw, distance_metric = args
-    semis = variogram_at_lag(XYs, col, lags, bw, distance_metric)
+    XYs, col, lags, bw, distance_metric, col_name = args
+    semis = variogram_at_lag(XYs, col, lags, bw, distance_metric, col_name)
     sv, h = semis[0], semis[1]
     start_params = [np.nanmax(h), np.nanmax(sv)]
     bounds = (0, start_params)
@@ -207,10 +215,11 @@ def plot_autocorrelation_ranges(
         results = []
 
         for i, col in enumerate(X.values.T):
+            col_name = X.columns[i]
             if verbose:
-                print(f"{i}: {X.columns[i]}")
+                print(f"{i}: {col_name}")
             # Fit spherical model and extract effective range parameter
-            args = (XYs, col, lags, bw, distance_metric)
+            args = (XYs, col, lags, bw, distance_metric, col_name)
             results.append(pool.apply_async(calculate_range, (args,)))
 
         for result in results:
@@ -220,10 +229,11 @@ def plot_autocorrelation_ranges(
         pool.join()
     else:
         for i, col in enumerate(X.values.T):
+            col_name = X.columns[i]
             if verbose:
-                print(f"{i}: {X.columns[i]}")
+                print(f"{i}: {col_name}")
             # Fit spherical model and extract effective range parameter
-            args = (XYs, col, lags, bw, distance_metric)
+            args = (XYs, col, lags, bw, distance_metric, col_name)
             eff_range = calculate_range(args)
             ranges.append(eff_range)
 

From 6a894fbf0be2f2f07e4e1010ef0f5046763d31fc Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Sat, 17 Jun 2023 16:03:29 +0200
Subject: [PATCH 11/14] add type hinting to HBLOCK

---
 spacv/spacv.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/spacv/spacv.py b/spacv/spacv.py
index 206116e..1d39b05 100644
--- a/spacv/spacv.py
+++ b/spacv/spacv.py
@@ -1,5 +1,6 @@
 import numbers
 import warnings
+from typing import Optional, Union
 
 import geopandas as gpd
 import numpy as np
@@ -69,17 +70,17 @@ class HBLOCK(BaseSpatialCV):
 
     def __init__(
         self,
-        tiles_x=5,
-        tiles_y=5,
-        shape="square",
-        method="unique",
-        buffer_radius=0,
-        direction="diagonal",
-        n_groups=5,
-        data=None,
-        n_sims=10,
-        distance_metric="euclidean",
-        random_state=None,
+        tiles_x: int = 5,
+        tiles_y: int = 5,
+        shape: str = "square",
+        method: str = "unique",
+        buffer_radius: Union[int, float] = 0,
+        direction: str = "diagonal",
+        n_groups: int = 5,
+        data: Optional[np.ndarray] = None,
+        n_sims: int = 10,
+        distance_metric: str = "euclidean",
+        random_state: Optional[int] = None,
     ):
         self.tiles_x = tiles_x
         self.tiles_y = tiles_y

From e280142c49106b7ccb1ebb746365d48a60c94fdf Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Sun, 10 Sep 2023 08:43:39 +0200
Subject: [PATCH 12/14] drop nans and only use a subset of 30000 points for AC
 ranges calc

---
 spacv/visualisation.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/spacv/visualisation.py b/spacv/visualisation.py
index 8661f46..a8d1306 100644
--- a/spacv/visualisation.py
+++ b/spacv/visualisation.py
@@ -65,6 +65,17 @@ def variogram_at_lag(
     XYs = geometry_to_2d(XYs)
     x = np.asarray(x)
 
+    # Remove nans from x and XYs for variogram calculation
+    nan_idx = np.isnan(x)
+    x = x[~nan_idx]
+    XYs = XYs[~nan_idx]
+
+    # Select only 30000 random points from x and XYs to calculate pairwise distances
+    if len(x) > 30000:
+        idx = np.random.choice(len(x), 30000, replace=False)
+        x = x[idx]
+        XYs = XYs[idx]
+
     if distance_metric == "euclidean":
         paired_distances = pdist(XYs)
         pd_m = squareform(paired_distances)
@@ -318,7 +329,7 @@ def aoa(
         folds = np.vstack((fold_indices, instance_fold_id)).T
 
         # Mask training points in same fold for DI measure calculation
-        for i, row in enumerate(train_dist):
+        for i, _ in enumerate(train_dist):
             mask = folds[:, 0] == folds[:, 0][i]
             train_dist[i, mask] = np.nan
 

From 472743b19917a3e5b891220002d32286b50efb64 Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Sun, 19 May 2024 12:18:29 +0200
Subject: [PATCH 13/14] update version to reflect changes

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d0ea701..1f77f43 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
       
 setup(
     name="spacv",
-    version="0.0.21",
+    version="0.0.22",
     description="Spatial cross-validation in Python",
     long_description=long_description,
     long_description_content_type="text/markdown",

From 954b1332d60ac753ceb9b90fb73b2a447957cd22 Mon Sep 17 00:00:00 2001
From: Daniel Lusk <daniel.lusk@posteo.net>
Date: Sun, 19 May 2024 13:17:21 +0200
Subject: [PATCH 14/14] remove pygeos from requirements now that it has been
 integrated with Shapely

---
 requirements.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 248d848..289910f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,4 @@ pandas
 geopandas
 shapely
 scikit-learn
-scipy
-pygeos
\ No newline at end of file
+scipy
\ No newline at end of file