Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions test/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,9 @@ def make_image_pil(*args, **kwargs):
return to_pil_image(make_image(*args, **kwargs))


def make_image_cvcuda(*args, **kwargs):
return to_cvcuda_tensor(make_image(*args, **kwargs))
def make_image_cvcuda(*args, batch_dims=(1,), **kwargs):
# explicitly default batch_dims to (1,) since to_cvcuda_tensor requires a batch dimension (ndims == 4)
return to_cvcuda_tensor(make_image(*args, batch_dims=batch_dims, **kwargs))


def make_keypoints(canvas_size=DEFAULT_SIZE, *, num_points=4, dtype=None, device="cpu"):
Expand Down
60 changes: 58 additions & 2 deletions test/test_transforms_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3884,7 +3884,15 @@ def test_kernel_video(self):

@pytest.mark.parametrize(
"make_input",
[make_image_tensor, make_image_pil, make_image, make_video],
[
make_image_tensor,
make_image_pil,
make_image,
make_video,
pytest.param(
make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
),
],
)
def test_functional(self, make_input):
check_functional(F.gaussian_blur, make_input(), kernel_size=(3, 3))
Expand All @@ -3896,14 +3904,29 @@ def test_functional(self, make_input):
(F._misc._gaussian_blur_image_pil, PIL.Image.Image),
(F.gaussian_blur_image, tv_tensors.Image),
(F.gaussian_blur_video, tv_tensors.Video),
pytest.param(
F._misc._gaussian_blur_cvcuda,
cvcuda.Tensor,
marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"),
),
],
)
def test_functional_signature(self, kernel, input_type):
check_functional_kernel_signature_match(F.gaussian_blur, kernel=kernel, input_type=input_type)

@pytest.mark.parametrize(
"make_input",
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
[
make_image_tensor,
make_image_pil,
make_image,
make_bounding_boxes,
make_segmentation_mask,
make_video,
pytest.param(
make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available")
),
],
)
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("sigma", [5, 2.0, (0.5, 2), [1.3, 2.7]])
Expand Down Expand Up @@ -3994,6 +4017,39 @@ def test_functional_image_correctness(self, dimensions, kernel_size, sigma, dtyp

torch.testing.assert_close(actual, expected, rtol=0, atol=1)

@pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="test requires CVCUDA")
@needs_cuda
@pytest.mark.parametrize(
("dimensions", "kernel_size", "sigma"),
[
((10, 12), (3, 3), 0.8),
((10, 12), (3, 3), 0.5),
((10, 12), (3, 5), 0.8),
((10, 12), (3, 5), 0.5),
((26, 28), (23, 23), 1.7),
],
)
@pytest.mark.parametrize("color_space", ["RGB", "GRAY"])
@pytest.mark.parametrize("batch_dims", [(1,), (2,), (4,)])
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
def test_functional_cvcuda_parity(self, dimensions, kernel_size, sigma, color_space, batch_dims, dtype):
height, width = dimensions

image_tensor = make_image(
size=(height, width), color_space=color_space, batch_dims=batch_dims, dtype=dtype, device="cuda"
)
image_cvcuda = F.to_cvcuda_tensor(image_tensor)

expected = F.gaussian_blur_image(image_tensor, kernel_size=kernel_size, sigma=sigma)
actual = F._misc._gaussian_blur_cvcuda(image_cvcuda, kernel_size=kernel_size, sigma=sigma)
actual_torch = F.cvcuda_to_tensor(actual)

if dtype.is_floating_point:
torch.testing.assert_close(actual_torch, expected, rtol=0, atol=0.3)
else:
# uint8/16 gaussians can differ by up to max-value, most likely an overflow issue
torch.testing.assert_close(actual_torch, expected, rtol=0, atol=get_max_value(dtype))


class TestGaussianNoise:
@pytest.mark.parametrize(
Expand Down
67 changes: 65 additions & 2 deletions torchvision/transforms/v2/functional/_misc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import math
from typing import Optional
from typing import Optional, Sequence, TYPE_CHECKING

import PIL.Image
import torch
Expand All @@ -13,7 +13,14 @@

from ._meta import _convert_bounding_box_format

from ._utils import _get_kernel, _register_kernel_internal, is_pure_tensor
from ._utils import _get_kernel, _import_cvcuda, _is_cvcuda_available, _register_kernel_internal, is_pure_tensor

CVCUDA_AVAILABLE = _is_cvcuda_available()

if TYPE_CHECKING:
import cvcuda # type: ignore[import-not-found]
if CVCUDA_AVAILABLE:
cvcuda = _import_cvcuda() # noqa: F811


def normalize(
Expand Down Expand Up @@ -99,6 +106,41 @@ def _get_gaussian_kernel2d(
return kernel2d


def _validate_kernel_size_and_sigma(
kernel_size: Sequence[int] | int,
sigma: Sequence[float | int] | float | int | None = None,
) -> tuple[list[int], list[float]]:
# duplicated logic from gaussian_blur_image for use in gaussian_blur_cvcuda
if isinstance(kernel_size, int):
kernel_size = [kernel_size, kernel_size]
elif len(kernel_size) != 2:
raise ValueError(f"If kernel_size is a sequence its length should be 2. Got {len(kernel_size)}")
for ksize in kernel_size:
if ksize % 2 == 0 or ksize < 0:
raise ValueError(f"kernel_size should have odd and positive integers. Got {kernel_size}")

if sigma is None:
sigma = [ksize * 0.15 + 0.35 for ksize in kernel_size]
else:
if isinstance(sigma, (list, tuple)):
length = len(sigma)
if length == 1:
s = sigma[0]
sigma = [s, s]
elif length != 2:
raise ValueError(f"If sigma is a sequence, its length should be 2. Got {length}")
elif isinstance(sigma, (int, float)):
s = float(sigma)
sigma = [s, s]
else:
raise TypeError(f"sigma should be either float or sequence of floats. Got {type(sigma)}")
for s in sigma:
if s <= 0.0:
raise ValueError(f"sigma should have positive values. Got {sigma}")

return kernel_size, sigma


@_register_kernel_internal(gaussian_blur, torch.Tensor)
@_register_kernel_internal(gaussian_blur, tv_tensors.Image)
def gaussian_blur_image(
Expand Down Expand Up @@ -181,6 +223,27 @@ def gaussian_blur_video(
return gaussian_blur_image(video, kernel_size, sigma)


def _gaussian_blur_cvcuda(
image: "cvcuda.Tensor", kernel_size: list[int], sigma: Optional[list[float]] = None
) -> "cvcuda.Tensor":
cvcuda = _import_cvcuda()

kernel_size, sigma = _validate_kernel_size_and_sigma(kernel_size, sigma)

return cvcuda.gaussian(
image,
tuple(kernel_size),
tuple(sigma),
border=cvcuda.Border.REFLECT,
)


if CVCUDA_AVAILABLE:
_gaussian_blur_cvcuda_registered = _register_kernel_internal(gaussian_blur, _import_cvcuda().Tensor)(
_gaussian_blur_cvcuda
)


def gaussian_noise(inpt: torch.Tensor, mean: float = 0.0, sigma: float = 0.1, clip: bool = True) -> torch.Tensor:
"""See :class:`~torchvision.transforms.v2.GaussianNoise`"""
if torch.jit.is_scripting():
Expand Down