Skip to content

Commit

Permalink
feat(cuda): add gpu support to compute the distance matrix
Browse files Browse the repository at this point in the history
uses `numba` which requires a `cuda` compatible gpu with the appropriate
driver and cuda sdk installed
https://numba.readthedocs.io/en/stable/cuda/overview.html#requirements
  • Loading branch information
loiccoyle committed Jun 24, 2024
1 parent ca91b76 commit ebb7a0c
Show file tree
Hide file tree
Showing 11 changed files with 479 additions and 180 deletions.
52 changes: 29 additions & 23 deletions examples/faces.ipynb

Large diffs are not rendered by default.

156 changes: 124 additions & 32 deletions examples/metrics.ipynb

Large diffs are not rendered by default.

109 changes: 80 additions & 29 deletions examples/performance.ipynb

Large diffs are not rendered by default.

27 changes: 16 additions & 11 deletions examples/performance_trick.ipynb

Large diffs are not rendered by default.

28 changes: 19 additions & 9 deletions phomo/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def parse_args(args: List[str]) -> argparse.Namespace:
nargs="+",
default=None,
)

parser.add_argument(
"-C",
"--tile-crop-ratio",
Expand All @@ -74,13 +73,6 @@ def parse_args(args: List[str]) -> argparse.Namespace:
type=int,
default=1,
)
parser.add_argument(
"-v",
"--verbose",
help="Verbosity.",
action="count",
default=0,
),
parser.add_argument(
"-b",
"--black-and-white",
Expand All @@ -101,6 +93,12 @@ def parse_args(args: List[str]) -> argparse.Namespace:
default=[],
type=float,
)
parser.add_argument(
"-G",
"--gpu",
help="Use GPU for distance matrix computation. Requires installing with `pip install 'phomo[cuda]'`.",
action="store_true",
)
parser.add_argument(
"-m",
"--metric",
Expand All @@ -116,6 +114,13 @@ def parse_args(args: List[str]) -> argparse.Namespace:
default=1,
type=int,
)
parser.add_argument(
"-v",
"--verbose",
help="Verbosity.",
action="count",
default=0,
)
return parser.parse_args(args)


Expand Down Expand Up @@ -180,7 +185,12 @@ def main():
grid_im = mosaic.grid.plot()
grid_im.show()
else:
mosaic_im = mosaic.build(workers=args.workers, metric=args.metric)
d_matrix = (
mosaic.d_matrix(metric=args.metric, workers=args.workers)
if not args.gpu
else mosaic.d_matrix_cuda(metric=args.metric)
)
mosaic_im = mosaic.build(d_matrix)
if args.output is None:
mosaic_im.show()
else:
Expand Down
105 changes: 80 additions & 25 deletions phomo/mosaic.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import logging
import math
from functools import partial
from multiprocessing.pool import Pool as MpPool
from typing import Optional, Tuple, Union
from typing import Literal, Tuple, Union

import numpy as np
from PIL import Image
from tqdm.auto import tqdm
from scipy.optimize import linear_sum_assignment
from tqdm.auto import tqdm

from .grid import Grid
from .master import Master
Expand Down Expand Up @@ -82,7 +83,7 @@ def _d_matrix_worker(
array = resize_array(array, (self.tile_shape[1], self.tile_shape[0]))
return metric_func(array, self.pool.arrays, **kwargs)

def compute_d_matrix(
def d_matrix(
self,
workers: int = 1,
metric: Union[str, MetricCallable] = "norm",
Expand Down Expand Up @@ -144,13 +145,81 @@ def compute_d_matrix(
self._log.debug("d_matrix shape: %s", d_matrix.shape)
return d_matrix

def build_greedy(
self,
workers: int = 1,
metric: Union[str, MetricCallable] = "norm",
d_matrix: Optional[np.ndarray] = None,
**kwargs,
) -> Image.Image:
def d_matrix_cuda(
self, metric: Literal["norm"] | Literal["greyscale"] = "norm"
) -> np.ndarray:
"""Compute the distance matrix using CUDA for GPU acceleration.
Args:
metric: The distance metric used for the distance matrix. Either "norm" or "greyscale".
Returns:
Distance matrix, shape: (number of master arrays, number of tiles in the pool).
"""

try:
from numba import cuda
except ImportError:
raise ImportError(
"Numba is required for CUDA support, run \"pip install 'phomo[cuda]'\" to install it."
)

if metric not in ["norm", "greyscale"]:
raise ValueError(
f"Invalid metric '{metric}'. When using gpu `metric' must be 'norm' or 'greyscale'."
)

self._log.info("Computing distance matrix with CUDA.")

# when the grid has been subdivided the master arrays will be smaller, so we grow them to match
# the tile size
grid_arrays = [
array
if array.shape == self.tile_shape
else resize_array(array, self.tile_shape)
for array in self.grid.arrays
]
pool_arrays = self.pool.arrays
if metric == "greyscale":
grid_arrays = [array.sum(axis=-1, keepdims=True) for array in grid_arrays]
pool_arrays = [array.sum(axis=-1, keepdims=True) for array in pool_arrays]

# Transfer the master and pool arrays to the GPU.
master_arrays_device = cuda.to_device(grid_arrays)
pool_arrays_device = cuda.to_device(pool_arrays)

# Allocate memory for the distance matrix on the GPU.
d_matrix_device = cuda.device_array((len(grid_arrays), len(pool_arrays)))

# Define the CUDA kernel for computing the distance matrix.
@cuda.jit
def compute_d_matrix_kernel(master_arrays, pool_arrays, d_matrix):
i, j = cuda.grid(2) # type: ignore
if i < master_arrays.shape[0] and j < pool_arrays.shape[0]:
distance = 0.0
for x in range(master_arrays.shape[1]):
for y in range(master_arrays.shape[2]):
for c in range(master_arrays.shape[3]):
diff = master_arrays[i, x, y, c] - pool_arrays[j, x, y, c]
distance += diff * diff
d_matrix[i, j] = math.sqrt(distance)

# Define the number of threads per block and blocks per grid.
threads_per_block = (16, 16)
blocks_per_grid_x = math.ceil(len(grid_arrays) / threads_per_block[0])
blocks_per_grid_y = math.ceil(len(pool_arrays) / threads_per_block[1])
blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y)

# Launch the kernel.
compute_d_matrix_kernel[blocks_per_grid, threads_per_block]( # type: ignore
master_arrays_device, pool_arrays_device, d_matrix_device
)

self._log.debug("d_matrix shape: %s", d_matrix_device.shape)
# Copy the result back to the host.
return d_matrix_device.copy_to_host()

def build_greedy(self, d_matrix: np.ndarray) -> Image.Image:
"""Construct the mosaic image using a greedy tile assignement algorithm.
Args:
Expand All @@ -167,10 +236,6 @@ def build_greedy(
"""
mosaic = np.zeros((self.size[1], self.size[0], 3))

# Compute the distance matrix.
if d_matrix is None:
d_matrix = self.compute_d_matrix(workers=workers, metric=metric, **kwargs)

# Keep track of tiles and sub arrays.
placed_master_arrays = set()
placed_tiles = set()
Expand Down Expand Up @@ -207,13 +272,7 @@ def build_greedy(
pbar.close()
return Image.fromarray(np.uint8(mosaic))

def build(
self,
workers: int = 1,
metric: Union[str, MetricCallable] = "norm",
d_matrix: Optional[np.ndarray] = None,
**kwargs,
) -> Image.Image:
def build(self, d_matrix: np.ndarray) -> Image.Image:
"""Construct the mosaic image by solving the linear sum assignment problem.
See: https://en.wikipedia.org/wiki/Assignment_problem
Expand All @@ -231,10 +290,6 @@ def build(
"""
mosaic = np.zeros((self.size[1], self.size[0], 3))

# Compute the distance matrix.
if d_matrix is None:
d_matrix = self.compute_d_matrix(workers=workers, metric=metric, **kwargs)

# expand the dmatrix to allow for repeated tiles
if self.n_appearances > 0:
d_matrix = np.tile(d_matrix, self.n_appearances)
Expand Down
3 changes: 2 additions & 1 deletion phomo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ def open_img_file(
Image instance.
"""
with Image.open(img_file) as img:
img = exif_transpose(img)
img_t = exif_transpose(img)
img = img_t if img_t is not None else img
if crop_ratio is not None:
img = crop_to_ratio(img, crop_ratio)
if size is not None:
Expand Down
Loading

0 comments on commit ebb7a0c

Please sign in to comment.