Add DALI proxy option to EfficientNet example (#5791)

Add DALI proxy option to EfficientNet example Add performance check of DALI proxy variant to efficient net performance test Signed-off-by: Joaquin Anton Guirao <[email protected]>
NVIDIA · Jan 27, 2025 · f625871 · f625871
1 parent 6f0e6b5
commit f625871
Show file tree

Hide file tree

Showing 5 changed files with 433 additions and 81 deletions.
diff --git a/docs/examples/use_cases/pytorch/efficientnet/image_classification/autoaugment.py b/docs/examples/use_cases/pytorch/efficientnet/image_classification/autoaugment.py
@@ -96,7 +96,7 @@ def solarize_add(image, addition=0, threshold=128):
             for i in range(256):
                 if i < threshold:
                     res = i + addition if i + addition <= 255 else 255
-                    res = res if res >= 0 else 0
+                    res = int(res if res >= 0 else 0)
                     lut.append(res)
                 else:
                     lut.append(i)

diff --git a/docs/examples/use_cases/pytorch/efficientnet/image_classification/dali.py b/docs/examples/use_cases/pytorch/efficientnet/image_classification/dali.py
@@ -20,31 +20,37 @@
 from nvidia.dali.auto_aug import auto_augment, trivial_augment
 
 
-@pipeline_def(enable_conditionals=True)
-def training_pipe(data_dir, interpolation, image_size, output_layout, automatic_augmentation,
-                  dali_device="gpu", rank=0, world_size=1):
-    rng = fn.random.coin_flip(probability=0.5)
-
-    jpegs, labels = fn.readers.file(name="Reader", file_root=data_dir, shard_id=rank,
-                                    num_shards=world_size, random_shuffle=True, pad_last_batch=True)
-
-    if dali_device == "gpu":
-        decoder_device = "mixed"
-        resize_device = "gpu"
-    else:
-        decoder_device = "cpu"
-        resize_device = "cpu"
-
-    images = fn.decoders.image_random_crop(jpegs, device=decoder_device, output_type=types.RGB,
-                                           random_aspect_ratio=[0.75, 4.0 / 3.0],
-                                           random_area=[0.08, 1.0])
-
-    images = fn.resize(images, device=resize_device, size=[image_size, image_size],
-                       interp_type=interpolation, antialias=False)
+def efficientnet_processing_training(
+    jpegs_input,
+    interpolation,
+    image_size,
+    output_layout,
+    automatic_augmentation,
+    dali_device="gpu",
+):
+    """
+    Image processing part of the ResNet training pipeline (excluding data loading)
+    """
+    decoder_device = "mixed" if dali_device == "gpu" else "cpu"
+    images = fn.decoders.image_random_crop(
+        jpegs_input,
+        device=decoder_device,
+        output_type=types.RGB,
+        random_aspect_ratio=[0.75, 4.0 / 3.0],
+        random_area=[0.08, 1.0],
+    )
+
+    images = fn.resize(
+        images,
+        size=[image_size, image_size],
+        interp_type=interpolation,
+        antialias=False,
+    )
 
     # Make sure that from this point we are processing on GPU regardless of dali_device parameter
     images = images.gpu()
 
+    rng = fn.random.coin_flip(probability=0.5)
     images = fn.flip(images, horizontal=rng)
 
     # Based on the specification, apply the automatic augmentation policy. Note, that from the point
@@ -53,33 +59,138 @@ def training_pipe(data_dir, interpolation, image_size, output_layout, automatic_
     # We pass the shape of the image after the resize so the translate operations are done
     # relative to the image size.
     if automatic_augmentation == "autoaugment":
-        output = auto_augment.auto_augment_image_net(images, shape=[image_size, image_size])
+        output = auto_augment.auto_augment_image_net(
+            images, shape=[image_size, image_size]
+        )
     elif automatic_augmentation == "trivialaugment":
-        output = trivial_augment.trivial_augment_wide(images, shape=[image_size, image_size])
+        output = trivial_augment.trivial_augment_wide(
+            images, shape=[image_size, image_size]
+        )
     else:
         output = images
 
-    output = fn.crop_mirror_normalize(output, dtype=types.FLOAT, output_layout=output_layout,
-                                      crop=(image_size, image_size),
-                                      mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
-                                      std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
+    output = fn.crop_mirror_normalize(
+        output,
+        dtype=types.FLOAT,
+        output_layout=output_layout,
+        crop=(image_size, image_size),
+        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
+        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
+    )
 
-    return output, labels
+    return output
 
 
-@pipeline_def
-def validation_pipe(data_dir, interpolation, image_size, image_crop, output_layout, rank=0,
-                    world_size=1):
-    jpegs, label = fn.readers.file(name="Reader", file_root=data_dir, shard_id=rank,
-                                   num_shards=world_size, random_shuffle=False, pad_last_batch=True)
+@pipeline_def(enable_conditionals=True)
+def training_pipe(
+    data_dir,
+    interpolation,
+    image_size,
+    output_layout,
+    automatic_augmentation,
+    dali_device="gpu",
+    rank=0,
+    world_size=1,
+):
+    jpegs, labels = fn.readers.file(
+        name="Reader",
+        file_root=data_dir,
+        shard_id=rank,
+        num_shards=world_size,
+        random_shuffle=True,
+        pad_last_batch=True,
+    )
+    outputs = efficientnet_processing_training(
+        jpegs,
+        interpolation,
+        image_size,
+        output_layout,
+        automatic_augmentation,
+        dali_device,
+    )
+    return outputs, labels
+
 
+@pipeline_def(enable_conditionals=True)
+def training_pipe_external_source(
+    interpolation,
+    image_size,
+    output_layout,
+    automatic_augmentation,
+    dali_device="gpu",
+    rank=0,
+    world_size=1,
+):
+    filepaths = fn.external_source(name="images", no_copy=True)
+    jpegs = fn.io.file.read(filepaths)
+    outputs = efficientnet_processing_training(
+        jpegs,
+        interpolation,
+        image_size,
+        output_layout,
+        automatic_augmentation,
+        dali_device,
+    )
+    return outputs
+
+
+def efficientnet_processing_validation(
+    jpegs, interpolation, image_size, image_crop, output_layout
+):
+    """
+    Image processing part of the ResNet validation pipeline (excluding data loading)
+    """
     images = fn.decoders.image(jpegs, device="mixed", output_type=types.RGB)
 
-    images = fn.resize(images, resize_shorter=image_size, interp_type=interpolation,
-                       antialias=False)
+    images = fn.resize(
+        images,
+        resize_shorter=image_size,
+        interp_type=interpolation,
+        antialias=False,
+    )
+
+    output = fn.crop_mirror_normalize(
+        images,
+        dtype=types.FLOAT,
+        output_layout=output_layout,
+        crop=(image_crop, image_crop),
+        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
+        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
+    )
+    return output
+
 
-    output = fn.crop_mirror_normalize(images, dtype=types.FLOAT, output_layout=output_layout,
-                                      crop=(image_crop, image_crop),
-                                      mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
-                                      std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
-    return output, label
+@pipeline_def
+def validation_pipe(
+    data_dir,
+    interpolation,
+    image_size,
+    image_crop,
+    output_layout,
+    rank=0,
+    world_size=1,
+):
+    jpegs, label = fn.readers.file(
+        name="Reader",
+        file_root=data_dir,
+        shard_id=rank,
+        num_shards=world_size,
+        random_shuffle=False,
+        pad_last_batch=True,
+    )
+    outputs = efficientnet_processing_validation(
+        jpegs, interpolation, image_size, image_crop, output_layout
+    )
+    return outputs, label
+
+
+@pipeline_def
+def validation_pipe_external_source(
+    interpolation, image_size, image_crop, output_layout
+):
+    filepaths = fn.external_source(name="images", no_copy=True)
+    jpegs = fn.io.file.read(filepaths)
+    outputs = efficientnet_processing_validation(
+        jpegs, interpolation, image_size, image_crop, output_layout
+    )
+    return outputs