enable back resize

kornia · Feb 14, 2024 · 41874db · 41874db
1 parent 1512345
commit 41874db
Show file tree

Hide file tree

Showing 8 changed files with 720 additions and 507 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,6 +9,7 @@ categories = ["image-processing", "computer-vision", "machine-learning"]
 license-file = "LICENSE"
 
 [dependencies]
+anyhow = "1.0.79"
 num-traits = "0.2.17"
 image = { version = "0.24.5" }
 turbojpeg = {version = "1.0.0"}
@@ -19,7 +20,7 @@ candle-core = { version = "0.3.2", optional = true }
 
 [dev-dependencies]
 tempfile = "3.9.0"
-rerun = "0.12.1"
+rerun = "0.13.0"
 criterion = { version = "0.5.1", features = ["html_reports"] }
 
 [features]

diff --git a/src/color.rs b/src/color.rs
@@ -1,4 +1,5 @@
 use crate::image::Image;
+use anyhow::Result;
 use ndarray::{Array3, Zip};
 use num_traits::{Num, NumCast};
 
@@ -21,7 +22,7 @@ use num_traits::{Num, NumCast};
 /// The grayscale image.
 ///
 /// Precondition: the input image must have 3 channels.
-pub fn gray_from_rgb<T>(image: &Image<T, 3>) -> Result<Image<T, 1>, std::io::Error>
+pub fn gray_from_rgb<T>(image: &Image<T, 3>) -> Result<Image<T, 1>>
 where
     T: Default
         + Copy
@@ -38,22 +39,22 @@ where
     // let image_f32 = image.cast::<f32>();
     //let mut output = Array3::<u8>::zeros(image.data.dim());
     //let mut output = Array3::<u8>::zeros((image.image_size().height, image.image_size().width, 1));
-    let rw = T::from(0.299).unwrap_or_else(|| T::from(0.0).unwrap());
-    let gw = T::from(0.587).unwrap_or_else(|| T::from(0.0).unwrap());
-    let bw = T::from(0.114).unwrap_or_else(|| T::from(0.0).unwrap());
+    let rw = T::from(0.299).unwrap();
+    let gw = T::from(0.587).unwrap();
+    let bw = T::from(0.114).unwrap();
 
     let mut output = Image::<T, 1>::from_shape(image.image_size())?;
 
     Zip::from(output.data.rows_mut())
         .and(image.data.rows())
         .par_for_each(|mut out, inp| {
             assert_eq!(inp.len(), 3);
-            let r = NumCast::from(inp[0]).unwrap_or_else(|| T::from(0.0).unwrap());
-            let g = NumCast::from(inp[1]).unwrap_or_else(|| T::from(0.0).unwrap());
-            let b = NumCast::from(inp[2]).unwrap_or_else(|| T::from(0.0).unwrap());
+            let r = NumCast::from(inp[0]).unwrap();
+            let g = NumCast::from(inp[1]).unwrap();
+            let b = NumCast::from(inp[2]).unwrap();
             let gray = rw * r + gw * g + bw * b;
 
-            out[0] = NumCast::from(gray).unwrap_or_else(|| T::from(0.0).unwrap());
+            out[0] = NumCast::from(gray).unwrap();
         });
 
     Ok(output)
@@ -68,7 +69,7 @@ mod tests {
         let image_path = std::path::Path::new("tests/data/dog.jpeg");
         let image = F::read_image_jpeg(image_path).unwrap();
         let image_norm = image.cast_and_scale::<f32>(1. / 255.0).unwrap();
-        let gray = super::gray_from_rgb(&image_norm).unwrap();
+        let gray = super::gray_from_rgb(&image_norm.cast::<f64>().unwrap()).unwrap();
         assert_eq!(gray.num_channels(), 1);
         assert_eq!(gray.image_size().width, 258);
         assert_eq!(gray.image_size().height, 195);

diff --git a/src/image.rs b/src/image.rs
@@ -1,5 +1,5 @@
 //use crate::io;
-use std::path::Path;
+use anyhow::Result;
 
 /// Image size in pixels
 ///
@@ -45,17 +45,22 @@ pub struct Image<T, const CHANNELS: usize> {
 
 // provisionally, we will use the following types:
 impl<T, const CHANNELS: usize> Image<T, CHANNELS> {
-    pub fn new(shape: ImageSize, data: Vec<T>) -> Result<Self, std::io::Error> {
+    pub fn new(shape: ImageSize, data: Vec<T>) -> Result<Self> {
         // check if the data length matches the image size
         if data.len() != shape.width * shape.height * CHANNELS {
-            return Err(std::io::Error::new(
-                std::io::ErrorKind::InvalidData,
-                format!(
-                    "Data length ({}) does not match the image size ({})",
-                    data.len(),
-                    shape.width * shape.height * CHANNELS
-                ),
+            return Err(anyhow::anyhow!(
+                "Data length ({}) does not match the image size ({})",
+                data.len(),
+                shape.width * shape.height * CHANNELS
             ));
+            //return Err(Error::new(
+            //    std::io::ErrorKind::InvalidData,
+            //    format!(
+            //        "Data length ({}) does not match the image size ({})",
+            //        data.len(),
+            //        shape.width * shape.height * CHANNELS
+            //    ),
+            //));
         }
 
         // allocate the image data
@@ -66,7 +71,7 @@ impl<T, const CHANNELS: usize> Image<T, CHANNELS> {
         Ok(Image { data })
     }
 
-    pub fn from_shape(shape: ImageSize) -> Result<Self, std::io::Error>
+    pub fn from_shape(shape: ImageSize) -> Result<Self>
     where
         T: Clone + Default,
     {
@@ -76,7 +81,18 @@ impl<T, const CHANNELS: usize> Image<T, CHANNELS> {
         Ok(image)
     }
 
-    pub fn cast<U>(self) -> Result<Image<U, CHANNELS>, std::io::Error>
+    pub fn empty_like(&self) -> Result<Self>
+    where
+        T: Clone + Default,
+    {
+        let shape = self.image_size();
+        let data = vec![T::default(); shape.width * shape.height * CHANNELS];
+        let image = Image::new(shape, data)?;
+
+        Ok(image)
+    }
+
+    pub fn cast<U>(self) -> Result<Image<U, CHANNELS>>
     where
         U: Clone + Default + num_traits::NumCast + std::fmt::Debug,
         T: Copy + num_traits::NumCast + std::fmt::Debug,
@@ -88,7 +104,7 @@ impl<T, const CHANNELS: usize> Image<T, CHANNELS> {
         Ok(Image { data: casted_data })
     }
 
-    pub fn cast_and_scale<U>(self, scale: U) -> Result<Image<U, CHANNELS>, std::io::Error>
+    pub fn cast_and_scale<U>(self, scale: U) -> Result<Image<U, CHANNELS>>
     where
         U: Copy
             + Clone

diff --git a/src/io/functions.rs b/src/io/functions.rs
@@ -1,3 +1,4 @@
+use anyhow::Result;
 use std::path::Path;
 
 use crate::image::{Image, ImageSize};
@@ -13,31 +14,28 @@ use super::jpeg::{ImageDecoder, ImageEncoder};
 /// # Returns
 ///
 /// A tensor containing the JPEG image data.
-pub fn read_image_jpeg(file_path: &Path) -> Result<Image<u8, 3>, std::io::Error> {
+pub fn read_image_jpeg(file_path: &Path) -> Result<Image<u8, 3>> {
     // verify the file exists and is a JPEG
     if !file_path.exists() {
-        return Err(std::io::Error::new(
-            std::io::ErrorKind::NotFound,
-            format!("File does not exist: {}", file_path.to_str().unwrap()),
-        ));
+        return Err(anyhow::anyhow!("File does not exist: {}", file_path.to_str().unwrap()).into());
     }
 
     let file_path = match file_path.extension() {
         Some(ext) => {
             if ext == "jpg" || ext == "jpeg" {
                 file_path
             } else {
-                return Err(std::io::Error::new(
-                    std::io::ErrorKind::InvalidData,
-                    format!("File is not a JPEG: {}", file_path.to_str().unwrap()),
+                return Err(anyhow::anyhow!(
+                    "File is not a JPEG: {}",
+                    file_path.to_str().unwrap()
                 ));
             }
         }
         None => {
-            return Err(std::io::Error::new(
-                std::io::ErrorKind::InvalidData,
-                format!("File has no extension: {}", file_path.to_str().unwrap()),
-            ))
+            return Err(anyhow::anyhow!(
+                "File has no extension: {}",
+                file_path.to_str().unwrap()
+            ));
         }
     };
 
@@ -59,7 +57,7 @@ pub fn read_image_jpeg(file_path: &Path) -> Result<Image<u8, 3>, std::io::Error>
 ///
 /// * `file_path` - The path to the JPEG image.
 /// * `image` - The tensor containing the JPEG image data.
-pub fn write_image_jpeg(file_path: &Path, image: Image<u8, 3>) -> Result<(), std::io::Error> {
+pub fn write_image_jpeg(file_path: &Path, image: Image<u8, 3>) -> Result<()> {
     // compress the image
     let jpeg_data = ImageEncoder::new().encode(image);
 
@@ -78,7 +76,7 @@ pub fn write_image_jpeg(file_path: &Path, image: Image<u8, 3>) -> Result<(), std
 /// * `file_path` - The path to the image.
 ///
 // TODO: return sophus::TensorView
-pub fn read_image_any(file_path: &Path) -> Result<Image<u8, 3>, std::io::Error> {
+pub fn read_image_any(file_path: &Path) -> Result<Image<u8, 3>> {
     // verify the file exists
     if !file_path.exists() {
         panic!("File does not exist: {}", file_path.to_str().unwrap());

diff --git a/src/lib.rs b/src/lib.rs
@@ -2,6 +2,6 @@ pub mod color;
 pub mod image;
 pub mod io;
 pub mod normalize;
-//pub mod resize;
+pub mod resize;
 pub mod tensor;
 pub mod threshold;
diff --git a/src/resize.rs b/src/resize.rs
@@ -1,4 +1,5 @@
 use crate::image::{Image, ImageSize};
+use anyhow::Result;
 use ndarray::{stack, Array2, Array3, Zip};
 
 /// Create a meshgrid of x and y coordinates
@@ -49,26 +50,26 @@ pub fn meshgrid(x: &Array2<f32>, y: &Array2<f32>) -> (Array2<f32>, Array2<f32>)
 ///
 /// The interpolated pixel value.
 // TODO: add support for other data types. Maybe use a trait? or template?
-fn bilinear_interpolation(image: &Array3<u8>, u: f32, v: f32, c: usize) -> f32 {
+fn bilinear_interpolation(image: &Array3<f32>, u: f32, v: f32, c: usize) -> f32 {
     let (height, width, _) = image.dim();
     let iu = u.trunc() as usize;
     let iv = v.trunc() as usize;
 
     let frac_u = u.fract();
     let frac_v = v.fract();
-    let val00 = image[[iv, iu, c]] as f32;
+    let val00 = image[[iv, iu, c]];
     let val01 = if iu + 1 < width {
-        image[[iv, iu + 1, c]] as f32
+        image[[iv, iu + 1, c]]
     } else {
         val00
     };
     let val10 = if iv + 1 < height {
-        image[[iv + 1, iu, c]] as f32
+        image[[iv + 1, iu, c]]
     } else {
         val00
     };
     let val11 = if iu + 1 < width && iv + 1 < height {
-        image[[iv + 1, iu + 1, c]] as f32
+        image[[iv + 1, iu + 1, c]]
     } else {
         val00
     };
@@ -94,7 +95,7 @@ fn bilinear_interpolation(image: &Array3<u8>, u: f32, v: f32, c: usize) -> f32 {
 /// # Returns
 ///
 /// The interpolated pixel value.
-fn nearest_neighbor_interpolation(image: &Array3<u8>, u: f32, v: f32, c: usize) -> f32 {
+fn nearest_neighbor_interpolation(image: &Array3<f32>, u: f32, v: f32, c: usize) -> f32 {
     let (height, width, _) = image.dim();
 
     let iu = u.round() as usize;
@@ -103,7 +104,7 @@ fn nearest_neighbor_interpolation(image: &Array3<u8>, u: f32, v: f32, c: usize)
     let iu = iu.clamp(0, width - 1);
     let iv = iv.clamp(0, height - 1);
 
-    image[[iv, iu, c]] as f32
+    image[[iv, iu, c]]
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -138,24 +139,19 @@ impl Default for ResizeOptions {
 /// # Returns
 ///
 /// The resized image.
-pub fn resize<T, const CHANNELS: usize>(
-    image: &Image<T, CHANNELS>,
+pub fn resize<const CHANNELS: usize>(
+    image: &Image<f32, CHANNELS>,
     new_size: ImageSize,
     optional_args: ResizeOptions,
-) -> Image<T, CHANNELS>
-where
-    T: num_traits::FromPrimitive + std::fmt::Debug + Send + Sync + Copy,
-{
-    let image_size = image.image_size();
-
+) -> Result<Image<f32, CHANNELS>> {
     // create the output image
-    let mut output = ndarray::Array3::<T>::zeros((new_size.height, new_size.width, CHANNELS));
+    let mut output = Image::from_shape(new_size.clone())?;
 
     // create a grid of x and y coordinates for the output image
     // and interpolate the values from the input image.
-    let x = ndarray::Array::linspace(0., (image_size.width - 1) as f32, new_size.width)
+    let x = ndarray::Array::linspace(0., (image.width() - 1) as f32, new_size.width)
         .insert_axis(ndarray::Axis(0));
-    let y = ndarray::Array::linspace(0., (image_size.height - 1) as f32, new_size.height)
+    let y = ndarray::Array::linspace(0., (image.height() - 1) as f32, new_size.height)
         .insert_axis(ndarray::Axis(0));
 
     // create the meshgrid of x and y coordinates, arranged in a 2D grid of shape (height, width)
@@ -168,26 +164,26 @@ where
     // iterate over the output image and interpolate the pixel values
 
     Zip::from(xy.rows())
-        .and(output.rows_mut())
+        .and(output.data.rows_mut())
         .par_for_each(|uv, mut out| {
             assert_eq!(uv.len(), 2);
             let (u, v) = (uv[0], uv[1]);
 
             // compute the pixel values for each channel
             let pixels = (0..image.num_channels()).map(|k| match optional_args.interpolation {
                 InterpolationMode::Bilinear => bilinear_interpolation(&image.data, u, v, k),
-                //InterpolationMode::NearestNeighbor => {
-                //    nearest_neighbor_interpolation(&image.data, u, v, k)
-                //}
+                InterpolationMode::NearestNeighbor => {
+                    nearest_neighbor_interpolation(&image.data, u, v, k)
+                }
             });
 
             // write the pixel values to the output image
             for (k, pixel) in pixels.enumerate() {
-                out[k] = pixel as u8;
+                out[k] = num_traits::FromPrimitive::from_f32(pixel).unwrap();
             }
         });
 
-    Image { data: output }
+    Ok(output)
 }
 
 #[cfg(test)]
@@ -196,15 +192,23 @@ mod tests {
     #[test]
     fn resize_smoke_ch3() {
         use crate::image::{Image, ImageSize};
-        let image = Image::from_shape_vec([4, 5, 3], vec![0; 4 * 5 * 3]);
+        let image = Image::<_, 3>::new(
+            ImageSize {
+                width: 4,
+                height: 5,
+            },
+            vec![0f32; 4 * 5 * 3],
+        )
+        .unwrap();
         let image_resized = super::resize(
             &image,
             ImageSize {
                 width: 2,
                 height: 3,
             },
             super::ResizeOptions::default(),
-        );
+        )
+        .unwrap();
         assert_eq!(image_resized.num_channels(), 3);
         assert_eq!(image_resized.image_size().width, 2);
         assert_eq!(image_resized.image_size().height, 3);
@@ -213,15 +217,23 @@ mod tests {
     #[test]
     fn resize_smoke_ch1() {
         use crate::image::{Image, ImageSize};
-        let image = Image::from_shape_vec([4, 5, 1], vec![0; 4 * 5 * 1]);
+        let image = Image::<_, 1>::new(
+            ImageSize {
+                width: 4,
+                height: 5,
+            },
+            vec![0f32; 4 * 5 * 1],
+        )
+        .unwrap();
         let image_resized = super::resize(
             &image,
             ImageSize {
                 width: 2,
                 height: 3,
             },
             super::ResizeOptions::default(),
-        );
+        )
+        .unwrap();
         assert_eq!(image_resized.num_channels(), 1);
         assert_eq!(image_resized.image_size().width, 2);
         assert_eq!(image_resized.image_size().height, 3);