Add U55 and U85 16A8W linear tests (#14453)

pytorchbot · Ninja91 · web-flow · commit f801c4fa1db0 · 2025-09-20T20:10:47.000-07:00
This PR was created by the merge bot to help merge the original PR into the main branch. ghstack PR number: #14368 by @Ninja91 ^ Please use this as the source of truth for the PR details, comments, and reviews ghstack PR base: https://github.com/pytorch/executorch/tree/gh/Ninja91/19/base ghstack PR head: https://github.com/pytorch/executorch/tree/gh/Ninja91/19/head Merge bot PR base: https://github.com/pytorch/executorch/tree/main Merge bot PR head: https://github.com/pytorch/executorch/tree/gh/Ninja91/19/orig @diff-train-skip-merge Co-authored-by: Nitin Jain <jainnitin@meta.com>
diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py
@@ -9,6 +9,7 @@
 from typing import Tuple
 
 import pytest
+
 import torch
 from executorch.backends.arm.quantizer.arm_quantizer import (
     get_symmetric_a16w8_quantization_config,
@@ -308,3 +309,71 @@ def test_linear_16a8w_tosa_INT(test_data: torch.Tensor):
     )
     # Run the pipeline
     pipeline.run()
+
+
+@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
+@common.XfailIfNoCorstone300
+@pytest.mark.xfail(
+    reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947",
+    strict=False,
+)
+def test_linear_16a8w_u55_INT16(test_data: torch.Tensor):
+    """Test linear operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    test_data, out_features, has_bias, per_channel_quantization = test_data()
+    in_features = test_data.shape[-1]
+
+    pipeline = EthosU55PipelineINT[input_t1](
+        Linear(
+            in_features=in_features,
+            out_features=out_features,
+            bias=has_bias,
+        ),
+        (test_data,),
+        aten_op,
+        exir_ops=[],
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        run_on_fvp=True,
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_linear_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT)
+@common.XfailIfNoCorstone320
+@pytest.mark.xfail(
+    reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947",
+    strict=False,
+)
+def test_linear_16a8w_u85_INT16(test_data: torch.Tensor):
+    """Test linear operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    test_data, out_features, has_bias, per_channel_quantization = test_data()
+    in_features = test_data.shape[-1]
+
+    pipeline = EthosU85PipelineINT[input_t1](
+        Linear(
+            in_features=in_features,
+            out_features=out_features,
+            bias=has_bias,
+        ),
+        (test_data,),
+        aten_op,
+        exir_ops=[],
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        run_on_fvp=True,
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_linear_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
diff --git a/backends/arm/tosa/quant_utils.py b/backends/arm/tosa/quant_utils.py
@@ -20,6 +20,7 @@
 
 from executorch.backends.arm.tosa.mapping import TosaArg
 from torch.fx import Node
+
 from tosa.RoundingMode import RoundingMode  # type: ignore
 
 
@@ -318,6 +319,7 @@ def build_rescale(
     per_channel=False,
 ):
     import serializer.tosa_serializer as ts  # type: ignore
+
     import tosa.Op as TosaOp  # type: ignore
 
     scaleWidth = 32