Merge branch 'main' into patch-1

pytorch · Jun 27, 2024 · 3086f67 · 3086f67
2 parents 78d4089 + bf01bab
commit 3086f67
Show file tree

Hide file tree

Showing 40 changed files with 1,179 additions and 153 deletions.
diff --git a/.github/workflows/build-cmake.yml b/.github/workflows/build-cmake.yml
@@ -54,7 +54,7 @@ jobs:
         export GPU_ARCH_TYPE=cpu
         export GPU_ARCH_VERSION=''
 
-        ./.github/scripts/cmake.sh
+        ${CONDA_RUN} ./.github/scripts/cmake.sh
 
   windows:
     strategy:

diff --git a/.github/workflows/prototype-tests-linux-gpu.yml b/.github/workflows/prototype-tests-linux-gpu.yml
@@ -1,5 +1,8 @@
 name: Prototype tests on Linux
 
+# IMPORTANT: This workflow has been manually disabled from the GitHub interface
+# in June 2024. The file is kept for reference in case we ever put this back.
+
 on:
   pull_request:
 

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -68,7 +68,7 @@ jobs:
         export GPU_ARCH_TYPE=cpu
         export GPU_ARCH_VERSION=''
 
-        ./.github/scripts/unittest.sh
+        ${CONDA_RUN} ./.github/scripts/unittest.sh
 
   unittests-windows:
     strategy:

diff --git a/benchmarks/encoding.py b/benchmarks/encoding.py
@@ -0,0 +1,67 @@
+import os
+import platform
+import statistics
+
+import torch
+import torch.utils.benchmark as benchmark
+import torchvision
+
+
+def print_machine_specs():
+    print("Processor:", platform.processor())
+    print("Platform:", platform.platform())
+    print("Logical CPUs:", os.cpu_count())
+    print(f"\nCUDA device: {torch.cuda.get_device_name()}")
+    print(f"Total Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
+
+
+def get_data():
+    transform = torchvision.transforms.Compose(
+        [
+            torchvision.transforms.PILToTensor(),
+        ]
+    )
+    path = os.path.join(os.getcwd(), "data")
+    testset = torchvision.datasets.Places365(
+        root="./data", download=not os.path.exists(path), transform=transform, split="val"
+    )
+    testloader = torch.utils.data.DataLoader(
+        testset, batch_size=1000, shuffle=False, num_workers=1, collate_fn=lambda batch: [r[0] for r in batch]
+    )
+    return next(iter(testloader))
+
+
+def run_benchmark(batch):
+    results = []
+    for device in ["cpu", "cuda"]:
+        batch_device = [t.to(device=device) for t in batch]
+        for size in [1, 100, 1000]:
+            for num_threads in [1, 12, 24]:
+                for stmt, strat in zip(
+                    [
+                        "[torchvision.io.encode_jpeg(img) for img in batch_input]",
+                        "torchvision.io.encode_jpeg(batch_input)",
+                    ],
+                    ["unfused", "fused"],
+                ):
+                    batch_input = batch_device[:size]
+                    t = benchmark.Timer(
+                        stmt=stmt,
+                        setup="import torchvision",
+                        globals={"batch_input": batch_input},
+                        label="Image Encoding",
+                        sub_label=f"{device.upper()} ({strat}): {stmt}",
+                        description=f"{size} images",
+                        num_threads=num_threads,
+                    )
+                    results.append(t.blocked_autorange())
+    compare = benchmark.Compare(results)
+    compare.print()
+
+
+if __name__ == "__main__":
+    print_machine_specs()
+    batch = get_data()
+    mean_h, mean_w = statistics.mean(t.shape[-2] for t in batch), statistics.mean(t.shape[-1] for t in batch)
+    print(f"\nMean image size: {int(mean_h)}x{int(mean_w)}")
+    run_benchmark(batch)
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
@@ -350,6 +350,7 @@ Color
     v2.RGB
     v2.RandomGrayscale
     v2.GaussianBlur
+    v2.GaussianNoise
     v2.RandomInvert
     v2.RandomPosterize
     v2.RandomSolarize
@@ -368,6 +369,7 @@ Functionals
     v2.functional.grayscale_to_rgb
     v2.functional.to_grayscale
     v2.functional.gaussian_blur
+    v2.functional.gaussian_noise
     v2.functional.invert
     v2.functional.posterize
     v2.functional.solarize

diff --git a/examples/cpp/README.rst → examples/cpp/README.md b/examples/cpp/README.rst → examples/cpp/README.md
diff --git a/references/classification/transforms.py b/references/classification/transforms.py
@@ -19,9 +19,9 @@ def get_mixup_cutmix(*, mixup_alpha, cutmix_alpha, num_classes, use_v2):
         )
     if cutmix_alpha > 0:
         mixup_cutmix.append(
-            transforms_module.CutMix(alpha=mixup_alpha, num_classes=num_classes)
+            transforms_module.CutMix(alpha=cutmix_alpha, num_classes=num_classes)
             if use_v2
-            else RandomCutMix(num_classes=num_classes, p=1.0, alpha=mixup_alpha)
+            else RandomCutMix(num_classes=num_classes, p=1.0, alpha=cutmix_alpha)
         )
     if not mixup_cutmix:
         return None

diff --git a/test/test_datasets.py b/test/test_datasets.py
@@ -782,32 +782,46 @@ def inject_fake_data(self, tmpdir, config):
 
         annotation_folder = tmpdir / self._ANNOTATIONS_FOLDER
         os.makedirs(annotation_folder)
+
+        segmentation_kind = config.pop("segmentation_kind", "list")
         info = self._create_annotation_file(
-            annotation_folder, self._ANNOTATIONS_FILE, file_names, num_annotations_per_image
+            annotation_folder,
+            self._ANNOTATIONS_FILE,
+            file_names,
+            num_annotations_per_image,
+            segmentation_kind=segmentation_kind,
         )
 
         info["num_examples"] = num_images
         return info
 
-    def _create_annotation_file(self, root, name, file_names, num_annotations_per_image):
+    def _create_annotation_file(self, root, name, file_names, num_annotations_per_image, segmentation_kind="list"):
         image_ids = [int(file_name.stem) for file_name in file_names]
         images = [dict(file_name=str(file_name), id=id) for file_name, id in zip(file_names, image_ids)]
 
-        annotations, info = self._create_annotations(image_ids, num_annotations_per_image)
+        annotations, info = self._create_annotations(image_ids, num_annotations_per_image, segmentation_kind)
         self._create_json(root, name, dict(images=images, annotations=annotations))
 
         return info
 
-    def _create_annotations(self, image_ids, num_annotations_per_image):
+    def _create_annotations(self, image_ids, num_annotations_per_image, segmentation_kind="list"):
         annotations = []
         annotion_id = 0
+
         for image_id in itertools.islice(itertools.cycle(image_ids), len(image_ids) * num_annotations_per_image):
+            segmentation = {
+                "list": [torch.rand(8).tolist()],
+                "rle": {"size": [10, 10], "counts": [1]},
+                "rle_encoded": {"size": [2400, 2400], "counts": "PQRQ2[1\\Y2f0gNVNRhMg2"},
+                "bad": 123,
+            }[segmentation_kind]
+
             annotations.append(
                 dict(
                     image_id=image_id,
                     id=annotion_id,
                     bbox=torch.rand(4).tolist(),
-                    segmentation=[torch.rand(8).tolist()],
+                    segmentation=segmentation,
                     category_id=int(torch.randint(91, ())),
                     area=float(torch.rand(1)),
                     iscrowd=int(torch.randint(2, size=(1,))),
@@ -832,11 +846,27 @@ def test_slice_error(self):
             with pytest.raises(ValueError, match="Index must be of type integer"):
                 dataset[:2]
 
+    def test_segmentation_kind(self):
+        if isinstance(self, CocoCaptionsTestCase):
+            return
+
+        for segmentation_kind in ("list", "rle", "rle_encoded"):
+            config = {"segmentation_kind": segmentation_kind}
+            with self.create_dataset(config) as (dataset, _):
+                dataset = datasets.wrap_dataset_for_transforms_v2(dataset, target_keys="all")
+                list(dataset)
+
+        config = {"segmentation_kind": "bad"}
+        with self.create_dataset(config) as (dataset, _):
+            dataset = datasets.wrap_dataset_for_transforms_v2(dataset, target_keys="all")
+            with pytest.raises(ValueError, match="COCO segmentation expected to be a dict or a list"):
+                list(dataset)
+
 
 class CocoCaptionsTestCase(CocoDetectionTestCase):
     DATASET_CLASS = datasets.CocoCaptions
 
-    def _create_annotations(self, image_ids, num_annotations_per_image):
+    def _create_annotations(self, image_ids, num_annotations_per_image, segmentation_kind="list"):
         captions = [str(idx) for idx in range(num_annotations_per_image)]
         annotations = combinations_grid(image_id=image_ids, caption=captions)
         for id, annotation in enumerate(annotations):
@@ -2442,28 +2472,68 @@ def inject_fake_data(self, tmpdir, config):
         base_folder = os.path.join(tmpdir, "fer2013")
         os.makedirs(base_folder)
 
+        use_icml = config.pop("use_icml", False)
+        use_fer = config.pop("use_fer", False)
+
         num_samples = 5
-        with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file:
-            writer = csv.DictWriter(
-                file,
-                fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",),
-                quoting=csv.QUOTE_NONNUMERIC,
-                quotechar='"',
-            )
-            writer.writeheader()
-            for _ in range(num_samples):
-                row = dict(
-                    pixels=" ".join(
-                        str(pixel) for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
-                    )
+
+        if use_icml or use_fer:
+            pixels_key, usage_key = (" pixels", " Usage") if use_icml else ("pixels", "Usage")
+            fieldnames = ("emotion", usage_key, pixels_key) if use_icml else ("emotion", pixels_key, usage_key)
+            filename = "icml_face_data.csv" if use_icml else "fer2013.csv"
+            with open(os.path.join(base_folder, filename), "w", newline="") as file:
+                writer = csv.DictWriter(
+                    file,
+                    fieldnames=fieldnames,
+                    quoting=csv.QUOTE_NONNUMERIC,
+                    quotechar='"',
+                )
+                writer.writeheader()
+                for i in range(num_samples):
+                    row = {
+                        "emotion": str(int(torch.randint(0, 7, ()))),
+                        usage_key: "Training" if i % 2 else "PublicTest",
+                        pixels_key: " ".join(
+                            str(pixel)
+                            for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
+                        ),
+                    }
+
+                    writer.writerow(row)
+        else:
+            with open(os.path.join(base_folder, f"{config['split']}.csv"), "w", newline="") as file:
+                writer = csv.DictWriter(
+                    file,
+                    fieldnames=("emotion", "pixels") if config["split"] == "train" else ("pixels",),
+                    quoting=csv.QUOTE_NONNUMERIC,
+                    quotechar='"',
                 )
-                if config["split"] == "train":
-                    row["emotion"] = str(int(torch.randint(0, 7, ())))
+                writer.writeheader()
+                for _ in range(num_samples):
+                    row = dict(
+                        pixels=" ".join(
+                            str(pixel)
+                            for pixel in datasets_utils.create_image_or_video_tensor((48, 48)).view(-1).tolist()
+                        )
+                    )
+                    if config["split"] == "train":
+                        row["emotion"] = str(int(torch.randint(0, 7, ())))
 
-                writer.writerow(row)
+                    writer.writerow(row)
 
         return num_samples
 
+    def test_icml_file(self):
+        config = {"split": "test"}
+        with self.create_dataset(config=config) as (dataset, _):
+            assert all(s[1] is None for s in dataset)
+
+        for split in ("train", "test"):
+            for d in ({"use_icml": True}, {"use_fer": True}):
+                config = {"split": split, **d}
+                with self.create_dataset(config=config) as (dataset, _):
+                    assert all(s[1] is not None for s in dataset)
+
 
 class GTSRBTestCase(datasets_utils.ImageDatasetTestCase):
     DATASET_CLASS = datasets.GTSRB