[App] ResNet Compiled App (2/2) - Pipeline (#165)

Adds ResNet and image classifier pipeline functionality. Includes changes from #428 See huggingface implementation for original API inspiration. Resolves CentML/hidet#60
hidet-org · Jul 22, 2024 · 742a6b6 · 742a6b6
1 parent b75e5d8
commit 742a6b6
Show file tree

Hide file tree

Showing 24 changed files with 702 additions and 15 deletions.
diff --git a/python/hidet/apps/hf.py b/python/hidet/apps/hf.py
@@ -1,3 +1,14 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Optional
 
 import torch

diff --git a/python/hidet/apps/image_classification/README.md b/python/hidet/apps/image_classification/README.md
@@ -0,0 +1,29 @@
+## Hidet Image Classification Compiled App
+
+### Quickstart
+
+```
+from hidet.apps.image_classification.pipeline.pipeline import ImageClassificationPipeline
+from hidet.apps.image_classification.processing.image_processor import ChannelDimension
+from datasets import load_dataset
+
+
+dataset = load_dataset("huggingface/cats-image", split="test", trust_remote_code=True)
+
+pipeline = ImageClassificationPipeline("microsoft/resnet-50", batch_size=1, kernel_search_space=0)
+
+res = pipeline(dataset["image"], input_data_format=ChannelDimension.CHANNEL_LAST, top_k=3)
+```
+
+An image classifier app currently only supports ResNet50 from Huggingface. Currently supports PIL + torch/hidet tensors as image input. 
+
+Load sample datasets using the datasets library, and change label ids back to string labels using the Huggingface config. Returns the top k candidates with the highest score.
+
+If the weights used are not public, be sure to modify `hidet.toml` so that option `auth_tokens.for_huggingface` is set to your Huggingface account credential.
+
+### Model Details
+
+A `PretrainedModelForImageClassification` is a `PretrainedModel` that allows us to `create_pretrained_model` from a Huggingface identifier. `PretrainedModelForImageClassification` defines a forward function that accepts Hidet tensors as input and returns logits as output.
+
+Interact with a `PretrainedModelForImageClassification` using `ImageClassificationPipeline`. The pipeline instantiates a pre-processor that adapts the image type for Hidet and performs transformations on the image before calling the pretrained model graph. Specify batch size and model name using the pipeline constructor.
+
diff --git a/python/hidet/apps/image_classification/__init__.py b/python/hidet/apps/image_classification/__init__.py
@@ -1 +1,12 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .modeling import *
diff --git a/python/hidet/apps/image_classification/app.py b/python/hidet/apps/image_classification/app.py
@@ -1,3 +1,14 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Sequence
 
 from hidet.graph.tensor import Tensor

diff --git a/python/hidet/apps/image_classification/builder.py b/python/hidet/apps/image_classification/builder.py
@@ -1,8 +1,20 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Optional
 
 from transformers import PretrainedConfig
 
 from hidet.apps import hf
+from hidet.apps.image_classification.processing.image_processor import BaseImageProcessor
 from hidet.apps.image_classification.app import ImageClassificationApp
 from hidet.apps.image_classification.modeling.pretrained import PretrainedModelForImageClassification
 from hidet.apps.modeling_outputs import ImageClassifierOutput
@@ -19,6 +31,7 @@ def create_image_classifier(
     revision: Optional[str] = None,
     dtype: str = "float32",
     device: str = "cuda",
+    batch_size: int = 1,
     kernel_search_space: int = 2,
 ):
     # load the huggingface config according to (model, revision) pair
@@ -28,7 +41,7 @@ def create_image_classifier(
     model = PretrainedModelForImageClassification.create_pretrained_model(
         config, revision=revision, dtype=dtype, device=device
     )
-    inputs: Tensor = symbol(["bs", 3, 224, 224], dtype=dtype, device=device)
+    inputs: Tensor = symbol([batch_size, 3, 224, 224], dtype=dtype, device=device)
     outputs: ImageClassifierOutput = model.forward(inputs)
     graph: FlowGraph = trace_from(outputs.logits, inputs)
 
@@ -43,14 +56,10 @@ def create_image_classifier(
     )
 
 
-# def create_image_processor(
-#     name: str,
-#     revision: Optional[str] = None,
-#     **kwargs
-# ) -> BaseProcessor:
-#     # load the huggingface config according to (model, revision) pair
-#     config: PretrainedConfig = hf.load_pretrained_config(name, revision=revision)
+def create_image_processor(name: str, revision: Optional[str] = None, **kwargs):
+    # load the huggingface config according to (model, revision) pair
+    config: PretrainedConfig = hf.load_pretrained_config(name, revision=revision)
 
-#     processor = BaseImageProcessor.load_module(config, module_type=ModuleType.PROCESSING)
+    processor = BaseImageProcessor.load_module(config.architectures[0])
 
-#     return processor(**kwargs)
+    return processor(**kwargs)
diff --git a/python/hidet/apps/image_classification/modeling/__init__.py b/python/hidet/apps/image_classification/modeling/__init__.py
@@ -1 +1,12 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .resnet import *
diff --git a/python/hidet/apps/image_classification/modeling/pretrained.py b/python/hidet/apps/image_classification/modeling/pretrained.py
@@ -1,3 +1,14 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from typing import Optional
 
 import torch
@@ -9,7 +20,7 @@
 import hidet
 
 
-class PretrainedModelForImageClassification(PretrainedModel[ImageClassifierOutput]):
+class PretrainedModelForImageClassification(PretrainedModel):
     @classmethod
     def create_pretrained_model(
         cls, config: PretrainedConfig, revision: Optional[str] = None, dtype: Optional[str] = None, device: str = "cuda"
@@ -37,3 +48,6 @@ def create_pretrained_model(
         cls.copy_weights(torch_model, hidet_model)
 
         return hidet_model
+
+    def forward(self, *args, **kwargs) -> ImageClassifierOutput:
+        raise NotImplementedError()
diff --git a/python/hidet/apps/image_classification/modeling/resnet/__init__.py b/python/hidet/apps/image_classification/modeling/resnet/__init__.py
@@ -1 +1,12 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from .modeling import ResNetForImageClassification, ResNetModel
diff --git a/python/hidet/apps/image_classification/modeling/resnet/modeling.py b/python/hidet/apps/image_classification/modeling/resnet/modeling.py
@@ -1,3 +1,15 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import asdict
 from typing import Sequence
 from transformers import ResNetConfig

diff --git a/python/hidet/apps/image_classification/pipeline/__init__.py b/python/hidet/apps/image_classification/pipeline/__init__.py
@@ -0,0 +1,12 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .pipeline import ImageClassificationPipeline
diff --git a/python/hidet/apps/image_classification/pipeline/pipeline.py b/python/hidet/apps/image_classification/pipeline/pipeline.py
@@ -0,0 +1,75 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Iterable, Optional, Sequence
+from hidet.apps import hf
+from hidet.apps.image_classification.builder import create_image_classifier, create_image_processor
+from hidet.apps.image_classification.processing import BaseImageProcessor, ChannelDimension, ImageInput
+from hidet.graph.tensor import Tensor
+
+
+class ImageClassificationPipeline:
+    def __init__(
+        self,
+        name: str,
+        revision: Optional[str] = None,
+        batch_size: int = 1,
+        pre_processor: Optional[BaseImageProcessor] = None,
+        dtype: str = "float32",
+        device: str = "cuda",
+        kernel_search_space: int = 2,
+    ):
+        if pre_processor is None:
+            self.pre_processor = create_image_processor(name, revision)
+        else:
+            self.pre_processor = pre_processor
+
+        self.model = create_image_classifier(name, revision, dtype, device, batch_size, kernel_search_space)
+        self.config = hf.load_pretrained_config(name, revision)
+
+    def __call__(self, model_inputs: Any, **kwargs):
+        """
+        Run through image classification pipeline end to end.
+        images: ImageInput
+            List or single instance of numpy array, PIL image, or torch tensor
+        input_data_format: ChannelDimension
+            Input data is channel first or last
+        batch_size: int (default 1)
+            Batch size to feed model inputs
+        top_k: int (default 5)
+            Return scores for top k results
+        """
+        if not isinstance(model_inputs, Iterable):
+            model_inputs = [model_inputs]
+        if not isinstance(model_inputs, Sequence):
+            model_inputs = list(model_inputs)
+
+        assert isinstance(model_inputs, Sequence)
+
+        processed_inputs = self.preprocess(model_inputs, **kwargs)
+        model_outputs = self.forward(processed_inputs, **kwargs)
+        outputs = self.postprocess(model_outputs, **kwargs)
+
+        return outputs
+
+    def preprocess(self, images: ImageInput, input_data_format: ChannelDimension, **kwargs):
+        # TODO accept inputs other than ImageInput type, e.g. url or dataset
+        return self.pre_processor(images, input_data_format=input_data_format, **kwargs)
+
+    def postprocess(self, model_outputs: Tensor, top_k: int = 5, **kwargs):
+        top_k = min(top_k, self.config.num_labels)
+        torch_outputs = model_outputs.torch()
+        values, indices = torch_outputs.topk(top_k, sorted=False)
+        labels = [[self.config.id2label[int(x.item())] for x in t] for t in indices]
+        return [[{"label": label, "score": value.item()} for label, value in zip(a, b)] for a, b in zip(labels, values)]
+
+    def forward(self, model_inputs: Tensor, **kwargs) -> Tensor:
+        return self.model.classify([model_inputs])[0]
diff --git a/python/hidet/apps/image_classification/processing/__init__.py b/python/hidet/apps/image_classification/processing/__init__.py
@@ -0,0 +1,13 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .image_processor import ChannelDimension, BaseImageProcessor, ImageInput
+from .resnet import *
diff --git a/python/hidet/apps/image_classification/processing/image_processor.py b/python/hidet/apps/image_classification/processing/image_processor.py
@@ -0,0 +1,91 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum, auto, unique
+from typing import Dict, List, Optional, Sequence, Type, Union
+import torch
+import numpy as np
+from hidet.graph.tensor import Tensor, from_torch
+
+
+@unique
+class ChannelDimension(Enum):
+    CHANNEL_FIRST = auto()
+    CHANNEL_LAST = auto()
+    CHANNEL_SINGLE = auto()
+
+
+ImageInput = Union[
+    "PIL.Image.Image", np.ndarray, "torch.Tensor", List["PIL.Image.Image"], List[np.ndarray], List["torch.Tensor"]
+]  # noqa
+
+
+class BaseImageProcessor:
+
+    processor_registry: Dict[str, Type["BaseImageProcessor"]] = {}
+
+    def __init__(self, dtype: Optional[str] = None, device: str = "cuda"):
+        super().__init__()
+
+        self.dtype = dtype
+        self.device = device
+
+    @classmethod
+    def register(cls, arch: str, processor_class: Type["BaseImageProcessor"]):
+        cls.processor_registry[arch] = processor_class
+
+    @classmethod
+    def load_module(cls, arch: str):
+        return cls.processor_registry[arch]
+
+    def __call__(self, images: ImageInput, **kwargs) -> Tensor:
+        return self.preprocess(images, **kwargs)
+
+    def preprocess(self, images: ImageInput, **kwargs) -> Tensor:
+        raise NotImplementedError("Image processors should implement their own preprocess step.")
+
+    def rescale(self, image: Tensor, scale: float) -> Tensor:
+        return image * scale
+
+    def normalize(
+        self, image: Tensor, mean: Union[float, Sequence[float]], std: Union[float, Sequence[float]]
+    ) -> Tensor:
+        """
+        Normalize image on per channel basis as
+        (mean - pixel) / std
+        mean and std are broadcast across channels if scalar value provided.
+        """
+        num_channels: int = image.shape[-3]
+
+        if isinstance(mean, Sequence):
+            if len(mean) != num_channels:
+                raise ValueError(f"means need {num_channels} values, one for each channel, got {len(mean)}.")
+        else:
+            mean = [mean] * num_channels
+        channel_means = from_torch(torch.Tensor(mean).view(num_channels, 1, 1)).to(self.dtype, self.device)
+
+        if isinstance(std, Sequence):
+            if len(std) != num_channels:
+                raise ValueError(f"stds need {num_channels} values, one for each channel, got {len(std)}.")
+        else:
+            std = [std] * num_channels
+        channel_stds = from_torch(torch.Tensor(std).view(num_channels, 1, 1)).to(self.dtype, self.device)
+
+        return (image - channel_means) / channel_stds
+
+    def center_square_crop(self, image: Tensor, size: int):
+        assert image.shape[-2:] >= (size, size)
+
+        pad_width = image.shape[-2] - size
+        start = (pad_width // 2) + (pad_width % 2)
+        end = image.shape[-2] - (pad_width // 2)
+
+        return image[:, :, start:end, start:end]