Fix dtype dispatch in cubecl module ops (#3658)

laggui · web-flow · commit f5fe97895ce8 · 2025-09-03T11:46:14.000-04:00
* Add dtype dispatch to cubecl module ops

* Fix output option

* Fix other int + float combined dispatch

* Clippy allow
diff --git a/crates/burn-cubecl/src/kernel/conv/deform_conv_transpose2d.rs b/crates/burn-cubecl/src/kernel/conv/deform_conv_transpose2d.rs
@@ -2,7 +2,7 @@ use std::marker::PhantomData;
 
 use burn_tensor::{
     Shape,
-    ops::{DeformConv2dBackward, DeformConvOptions, FloatTensorOps as _},
+    ops::{DeformConvOptions, FloatTensorOps as _},
 };
 use cubecl::{
     AtomicFeature, CubeDim, CubeLaunch, Feature, calculate_cube_count_elemwise,
@@ -30,7 +30,7 @@ use crate::{
 use super::{bilinear_interpolate, deform_im2col, index};
 
 /// Calculate the [deformable 2D convolution](crate::ops::ModuleOps::deform_conv2d) backward pass using convolutions.
-#[allow(clippy::single_range_in_vec_init)]
+#[allow(clippy::single_range_in_vec_init, clippy::type_complexity)]
 pub(crate) fn deform_conv2d_backward<
     R: CubeRuntime,
     E: FloatElement,
@@ -44,7 +44,16 @@ pub(crate) fn deform_conv2d_backward<
     bias: Option<CubeTensor<R>>,
     out_grad: CubeTensor<R>,
     options: DeformConvOptions<2>,
-) -> Result<DeformConv2dBackward<CubeBackend<R, E, I, BT>>, ConvSetupError> {
+) -> Result<
+    (
+        CubeTensor<R>,
+        CubeTensor<R>,
+        CubeTensor<R>,
+        Option<CubeTensor<R>>,
+        Option<CubeTensor<R>>,
+    ),
+    ConvSetupError,
+> {
     let [_, _, out_h, out_w] = out_grad.shape.dims();
     let [_, _, kernel_h, kernel_w] = weight.shape.dims();
 
@@ -80,7 +89,7 @@ pub(crate) fn deform_conv2d_backward<
         (out_h, out_w),
     )?;
 
-    Ok(DeformConv2dBackward::new(
+    Ok((
         input_gradient,
         offset_gradient,
         weight_grad,
diff --git a/crates/burn-cubecl/src/ops/float_ops.rs b/crates/burn-cubecl/src/ops/float_ops.rs
@@ -190,9 +190,13 @@ where
         indices: IntTensor<Self>,
     ) -> FloatTensor<Self> {
         execute_with_dtype!(
-            float(tensor.dtype),
-            E,
-            kernel::gather::<R, E, I>(dim, tensor, indices)
+            int(indices.dtype),
+            I,
+            execute_with_dtype!(
+                float(tensor.dtype),
+                E,
+                kernel::gather::<R, E, I>(dim, tensor, indices)
+            )
         )
     }
 
@@ -203,9 +207,13 @@ where
         value: FloatTensor<Self>,
     ) -> FloatTensor<Self> {
         execute_with_dtype!(
-            float(tensor.dtype, value.dtype),
-            E,
-            kernel::scatter::<R, E, I>(dim, tensor, indices, value)
+            int(indices.dtype),
+            I,
+            execute_with_dtype!(
+                float(tensor.dtype, value.dtype),
+                E,
+                kernel::scatter::<R, E, I>(dim, tensor, indices, value)
+            )
         )
     }
 
@@ -215,9 +223,13 @@ where
         indices: IntTensor<Self>,
     ) -> FloatTensor<Self> {
         execute_with_dtype!(
-            float(tensor.dtype),
-            E,
-            kernel::select::<R, E, I>(tensor, dim, indices)
+            int(indices.dtype),
+            I,
+            execute_with_dtype!(
+                float(tensor.dtype),
+                E,
+                kernel::select::<R, E, I>(tensor, dim, indices)
+            )
         )
     }
 
@@ -228,9 +240,13 @@ where
         value: FloatTensor<Self>,
     ) -> FloatTensor<Self> {
         execute_with_dtype!(
-            float(tensor.dtype, value.dtype),
-            E,
-            kernel::select_assign::<R, E, I>(tensor, dim, indices, value)
+            int(indices.dtype),
+            I,
+            execute_with_dtype!(
+                float(tensor.dtype, value.dtype),
+                E,
+                kernel::select_assign::<R, E, I>(tensor, dim, indices, value)
+            )
         )
     }
 
diff --git a/crates/burn-cubecl/src/ops/module_ops.rs b/crates/burn-cubecl/src/ops/module_ops.rs
@@ -1,6 +1,7 @@
 use crate::{
     CubeBackend, CubeRuntime, FloatElement, IntElement,
     element::BoolElement,
+    execute_with_dtype,
     kernel::{
         self,
         conv::{ConvStrategy, ConvTranspose2dStrategy},
@@ -25,7 +26,12 @@ where
         bias: Option<FloatTensor<Self>>,
         options: ConvOptions<1>,
     ) -> FloatTensor<Self> {
-        kernel::conv::conv::<R, F, 1>(x, weight, bias, options, ConvStrategy::default()).unwrap()
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::conv::conv::<R, E, 1>(x, weight, bias, options, ConvStrategy::default())
+                .unwrap()
+        )
     }
 
     fn conv2d(
@@ -34,7 +40,12 @@ where
         bias: Option<FloatTensor<Self>>,
         options: ConvOptions<2>,
     ) -> FloatTensor<Self> {
-        kernel::conv::conv::<R, F, 2>(x, weight, bias, options, ConvStrategy::default()).unwrap()
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::conv::conv::<R, E, 2>(x, weight, bias, options, ConvStrategy::default())
+                .unwrap()
+        )
     }
 
     fn deform_conv2d(
@@ -45,7 +56,11 @@ where
         bias: Option<FloatTensor<Self>>,
         options: DeformConvOptions<2>,
     ) -> FloatTensor<Self> {
-        kernel::conv::deform_conv2d::<R, F>(x, offset, weight, mask, bias, options).unwrap()
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::conv::deform_conv2d::<R, E>(x, offset, weight, mask, bias, options).unwrap()
+        )
     }
 
     fn deform_conv2d_backward(
@@ -57,16 +72,19 @@ where
         output_grad: FloatTensor<Self>,
         options: DeformConvOptions<2>,
     ) -> DeformConv2dBackward<Self> {
-        kernel::conv::deform_conv2d_backward::<R, F, I, BT>(
-            x,
-            offset,
-            weight,
-            mask,
-            bias,
-            output_grad,
-            options,
-        )
-        .unwrap()
+        execute_with_dtype!(float(x.dtype), E, {
+            let (x, o, w, m, b) = kernel::conv::deform_conv2d_backward::<R, E, I, BT>(
+                x,
+                offset,
+                weight,
+                mask,
+                bias,
+                output_grad,
+                options,
+            )
+            .unwrap();
+            DeformConv2dBackward::new(x, o, w, m, b)
+        })
     }
 
     fn conv3d(
@@ -75,7 +93,11 @@ where
         bias: Option<FloatTensor<Self>>,
         options: ConvOptions<3>,
     ) -> FloatTensor<Self> {
-        kernel::conv::conv::<R, F, 3>(x, weight, bias, options, ConvStrategy::Direct).unwrap()
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::conv::conv::<R, E, 3>(x, weight, bias, options, ConvStrategy::Direct).unwrap()
+        )
     }
 
     fn conv_transpose2d(
@@ -84,14 +106,18 @@ where
         bias: Option<FloatTensor<Self>>,
         options: ConvTransposeOptions<2>,
     ) -> FloatTensor<Self> {
-        kernel::conv::conv_transpose2d::<R, F, I>(
-            x,
-            weight,
-            bias,
-            options,
-            ConvTranspose2dStrategy::default(),
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::conv::conv_transpose2d::<R, E, I>(
+                x,
+                weight,
+                bias,
+                options,
+                ConvTranspose2dStrategy::default(),
+            )
+            .unwrap()
         )
-        .unwrap()
     }
 
     fn conv_transpose3d(
@@ -100,7 +126,11 @@ where
         bias: Option<FloatTensor<Self>>,
         options: ConvTransposeOptions<3>,
     ) -> FloatTensor<Self> {
-        kernel::conv::conv_transpose3d::<R, F>(x, weight, bias, options)
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::conv::conv_transpose3d::<R, E>(x, weight, bias, options)
+        )
     }
 
     fn avg_pool2d(
@@ -110,7 +140,11 @@ where
         padding: [usize; 2],
         count_include_pad: bool,
     ) -> FloatTensor<Self> {
-        kernel::pool::avg_pool2d::<R, F>(x, kernel_size, stride, padding, count_include_pad)
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::pool::avg_pool2d::<R, E>(x, kernel_size, stride, padding, count_include_pad)
+        )
     }
 
     fn avg_pool2d_backward(
@@ -121,13 +155,17 @@ where
         padding: [usize; 2],
         count_include_pad: bool,
     ) -> FloatTensor<Self> {
-        kernel::pool::avg_pool2d_backward::<R, F>(
-            x,
-            grad,
-            kernel_size,
-            stride,
-            padding,
-            count_include_pad,
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::pool::avg_pool2d_backward::<R, E>(
+                x,
+                grad,
+                kernel_size,
+                stride,
+                padding,
+                count_include_pad,
+            )
         )
     }
 
@@ -138,7 +176,11 @@ where
         padding: [usize; 2],
         dilation: [usize; 2],
     ) -> FloatTensor<Self> {
-        kernel::pool::max_pool2d::<R, F>(x, kernel_size, stride, padding, dilation)
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::pool::max_pool2d::<R, E>(x, kernel_size, stride, padding, dilation)
+        )
     }
 
     fn max_pool2d_with_indices(
@@ -148,15 +190,17 @@ where
         padding: [usize; 2],
         dilation: [usize; 2],
     ) -> MaxPool2dWithIndices<Self> {
-        let (output, indices) = kernel::pool::max_pool2d_with_indices::<R, F, I>(
-            x,
-            kernel_size,
-            stride,
-            padding,
-            dilation,
-        );
+        execute_with_dtype!(float(x.dtype), E, {
+            let (output, indices) = kernel::pool::max_pool2d_with_indices::<R, E, I>(
+                x,
+                kernel_size,
+                stride,
+                padding,
+                dilation,
+            );
 
-        MaxPool2dWithIndices::new(output, indices)
+            MaxPool2dWithIndices::new(output, indices)
+        })
     }
 
     fn max_pool2d_with_indices_backward(
@@ -168,34 +212,54 @@ where
         output_grad: FloatTensor<Self>,
         indices: IntTensor<Self>,
     ) -> MaxPool2dBackward<Self> {
-        MaxPool2dBackward::new(kernel::pool::max_pool2d_with_indices_backward::<R, F, I>(
-            x,
-            output_grad,
-            indices,
-            kernel_size,
-            stride,
-            padding,
-            dilation,
-        ))
+        execute_with_dtype!(
+            int(indices.dtype),
+            I,
+            execute_with_dtype!(
+                float(x.dtype),
+                E,
+                MaxPool2dBackward::new(kernel::pool::max_pool2d_with_indices_backward::<R, E, I>(
+                    x,
+                    output_grad,
+                    indices,
+                    kernel_size,
+                    stride,
+                    padding,
+                    dilation,
+                ))
+            )
+        )
     }
 
     fn adaptive_avg_pool2d(x: FloatTensor<Self>, output_size: [usize; 2]) -> FloatTensor<Self> {
-        kernel::pool::adaptive_avg_pool2d::<R, F>(x, output_size)
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::pool::adaptive_avg_pool2d::<R, E>(x, output_size)
+        )
     }
 
     fn adaptive_avg_pool2d_backward(
         x: FloatTensor<Self>,
         grad: FloatTensor<Self>,
     ) -> FloatTensor<Self> {
-        kernel::pool::adaptive_avg_pool2d_backward::<R, F>(x, grad)
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::pool::adaptive_avg_pool2d_backward::<R, E>(x, grad)
+        )
     }
 
     fn interpolate(
         x: FloatTensor<Self>,
         output_size: [usize; 2],
         options: InterpolateOptions,
     ) -> FloatTensor<Self> {
-        kernel::interpolate::interpolate::<R, F>(x, output_size, options)
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::interpolate::interpolate::<R, E>(x, output_size, options)
+        )
     }
 
     fn interpolate_backward(
@@ -204,6 +268,10 @@ where
         output_size: [usize; 2],
         options: InterpolateOptions,
     ) -> FloatTensor<Self> {
-        kernel::interpolate::interpolate_backward::<R, F>(x, grad, output_size, options)
+        execute_with_dtype!(
+            float(x.dtype),
+            E,
+            kernel::interpolate::interpolate_backward::<R, E>(x, grad, output_size, options)
+        )
     }
 }
diff --git a/crates/burn-tensor/src/tests/module/bilinear_interpolate.rs b/crates/burn-tensor/src/tests/module/bilinear_interpolate.rs