Addressed PR review

pytorch · Mar 5, 2024 · 5314d43 · 5314d43
1 parent 4401c2e
commit 5314d43
Show file tree

Hide file tree

Showing 4 changed files with 124 additions and 48 deletions.
diff --git a/test/test_image.py b/test/test_image.py
@@ -119,6 +119,25 @@ def test_decode_jpeg_with_exif_orientation(tmpdir, orientation):
     torch.testing.assert_close(expected, output)
 
 
+@pytest.mark.parametrize("size", [65533, 1, 5, 9, 10])
+def test_invalid_exif(tmpdir, size):
+    # Inspired from a PIL test:
+    # https://github.com/python-pillow/Pillow/blob/8f63748e50378424628155994efd7e0739a4d1d1/Tests/test_file_jpeg.py#L299
+    fp = os.path.join(tmpdir, f"invalid_exif.jpg")
+    t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8)
+    im = F.to_pil_image(t)
+    im.save(fp, "JPEG", exif=b"1" * size)
+
+    data = read_file(fp)
+    output = decode_image(data, apply_exif_orientation=True)
+
+    pimg = Image.open(fp)
+    pimg = ImageOps.exif_transpose(pimg)
+
+    expected = F.pil_to_tensor(pimg)
+    torch.testing.assert_close(expected, output)
+
+
 def test_decode_jpeg_errors():
     with pytest.raises(RuntimeError, match="Expected a non empty 1-dimensional tensor"):
         decode_jpeg(torch.empty((100, 1), dtype=torch.uint8))

diff --git a/torchvision/csrc/io/image/cpu/decode_image.cpp b/torchvision/csrc/io/image/cpu/decode_image.cpp
@@ -27,9 +27,6 @@ torch::Tensor decode_image(
   if (memcmp(jpeg_signature, datap, 3) == 0) {
     return decode_jpeg(data, mode, apply_exif_orientation);
   } else if (memcmp(png_signature, datap, 4) == 0) {
-    TORCH_CHECK(
-        !apply_exif_orientation,
-        "Unsupported option apply_exif_orientation=true for PNG")
     return decode_png(data, mode);
   } else {
     TORCH_CHECK(

diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
@@ -200,50 +200,7 @@ torch::Tensor decode_jpeg(
 
   int exif_orientation = 0;
   if (apply_exif_orientation) {
-    // Check for Exif marker APP1
-    jpeg_saved_marker_ptr exif_marker = 0;
-    jpeg_saved_marker_ptr cmarker = cinfo.marker_list;
-    while (cmarker && exif_marker == 0) {
-      if (cmarker->marker == APP1) {
-        exif_marker = cmarker;
-      }
-      cmarker = cmarker->next;
-    }
-
-    if (exif_marker) {
-      // Code below is inspired from OpenCV
-      // https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/modules/imgcodecs/src/exif.cpp
-
-      // Bytes from Exif size field to the first TIFF header
-      constexpr size_t start_offset = 6;
-      if (exif_marker->data_length > start_offset) {
-        auto* exif_data_ptr = exif_marker->data + start_offset;
-        auto size = exif_marker->data_length - start_offset;
-        std::vector<unsigned char> exif_data_vec(
-            exif_data_ptr, exif_data_ptr + size);
-
-        auto endianness = get_endianness(exif_data_vec);
-
-        // Checking whether Tag Mark (0x002A) correspond to one contained in the
-        // Jpeg file
-        uint16_t tag_mark = get_uint16(exif_data_vec, endianness, 2);
-        if (tag_mark == REQ_EXIF_TAG_MARK) {
-          auto offset = get_uint32(exif_data_vec, endianness, 4);
-          size_t num_entry = get_uint16(exif_data_vec, endianness, offset);
-          offset += 2; // go to start of tag fields
-          constexpr size_t tiff_field_size = 12;
-          for (size_t entry = 0; entry < num_entry; entry++) {
-            // Here we just search for orientation tag and parse it
-            auto tag_num = get_uint16(exif_data_vec, endianness, offset);
-            if (tag_num == ORIENTATION_EXIF_TAG) {
-              exif_orientation =
-                  get_uint16(exif_data_vec, endianness, offset + 8);
-            }
-            offset += tiff_field_size;
-          }
-        }
-      }
-    }
+    exif_orientation = fetch_exif_orientation(&cinfo);
   }
 
   jpeg_start_decompress(&cinfo);

diff --git a/torchvision/csrc/io/image/cpu/exif.h b/torchvision/csrc/io/image/cpu/exif.h
@@ -1,5 +1,55 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this
+license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without
+modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright
+notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote
+products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is"
+and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are
+disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any
+direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
 #pragma once
+#include <jpeglib.h>
 #include <torch/types.h>
+#include <vector>
 
 namespace vision {
 namespace image {
@@ -13,7 +63,7 @@ constexpr uint16_t ORIENTATION_EXIF_TAG = 0x0112;
 constexpr uint16_t INCORRECT_TAG = -1;
 
 // Functions in this module are taken from OpenCV
-// https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/modules/imgcodecs/src/exif.cpp
+// https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp
 inline uint16_t get_endianness(const std::vector<unsigned char>& exif_data) {
   if ((exif_data.size() < 1) ||
       (exif_data.size() > 1 && exif_data[0] != exif_data[1])) {
@@ -58,6 +108,59 @@ inline uint32_t get_uint32(
       (exif_data[offset + 2] << 8) + exif_data[offset + 3];
 }
 
+inline int fetch_exif_orientation(j_decompress_ptr cinfo) {
+  int exif_orientation = -1;
+  // Check for Exif marker APP1
+  jpeg_saved_marker_ptr exif_marker = 0;
+  jpeg_saved_marker_ptr cmarker = cinfo->marker_list;
+  while (cmarker && exif_marker == 0) {
+    if (cmarker->marker == APP1) {
+      exif_marker = cmarker;
+    }
+    cmarker = cmarker->next;
+  }
+
+  if (exif_marker) {
+    // Code below is inspired from OpenCV
+    // https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp
+
+    // Bytes from Exif size field to the first TIFF header
+    constexpr size_t start_offset = 6;
+    if (exif_marker->data_length > start_offset) {
+      auto* exif_data_ptr = exif_marker->data + start_offset;
+      auto size = exif_marker->data_length - start_offset;
+      std::vector<unsigned char> exif_data_vec(
+          exif_data_ptr, exif_data_ptr + size);
+
+      auto endianness = get_endianness(exif_data_vec);
+
+      // Checking whether Tag Mark (0x002A) correspond to one contained in the
+      // Jpeg file
+      uint16_t tag_mark = get_uint16(exif_data_vec, endianness, 2);
+      if (tag_mark == REQ_EXIF_TAG_MARK) {
+        auto offset = get_uint32(exif_data_vec, endianness, 4);
+        size_t num_entry = get_uint16(exif_data_vec, endianness, offset);
+        offset += 2; // go to start of tag fields
+        constexpr size_t tiff_field_size = 12;
+        for (size_t entry = 0; entry < num_entry; entry++) {
+          // Here we just search for orientation tag and parse it
+          auto tag_num = get_uint16(exif_data_vec, endianness, offset);
+          if (tag_num == INCORRECT_TAG) {
+            break;
+          }
+          if (tag_num == ORIENTATION_EXIF_TAG) {
+            exif_orientation =
+                get_uint16(exif_data_vec, endianness, offset + 8);
+            break;
+          }
+          offset += tiff_field_size;
+        }
+      }
+    }
+  }
+  return exif_orientation;
+}
+
 constexpr uint16_t IMAGE_ORIENTATION_TL = 1; // normal orientation
 constexpr uint16_t IMAGE_ORIENTATION_TR = 2; // needs horizontal flip
 constexpr uint16_t IMAGE_ORIENTATION_BR = 3; // needs 180 rotation