|
9 | 9 | from typing import Tuple
|
10 | 10 |
|
11 | 11 | import pytest
|
| 12 | + |
12 | 13 | import torch
|
13 | 14 | from executorch.backends.arm.quantizer.arm_quantizer import (
|
14 | 15 | get_symmetric_a16w8_quantization_config,
|
@@ -308,3 +309,71 @@ def test_linear_16a8w_tosa_INT(test_data: torch.Tensor):
|
308 | 309 | )
|
309 | 310 | # Run the pipeline
|
310 | 311 | pipeline.run()
|
| 312 | + |
| 313 | + |
| 314 | +@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT) |
| 315 | +@common.XfailIfNoCorstone300 |
| 316 | +@pytest.mark.xfail( |
| 317 | + reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947", |
| 318 | + strict=False, |
| 319 | +) |
| 320 | +def test_linear_16a8w_u55_INT16(test_data: torch.Tensor): |
| 321 | + """Test linear operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)""" |
| 322 | + test_data, out_features, has_bias, per_channel_quantization = test_data() |
| 323 | + in_features = test_data.shape[-1] |
| 324 | + |
| 325 | + pipeline = EthosU55PipelineINT[input_t1]( |
| 326 | + Linear( |
| 327 | + in_features=in_features, |
| 328 | + out_features=out_features, |
| 329 | + bias=has_bias, |
| 330 | + ), |
| 331 | + (test_data,), |
| 332 | + aten_op, |
| 333 | + exir_ops=[], |
| 334 | + per_channel_quantization=per_channel_quantization, |
| 335 | + use_to_edge_transform_and_lower=True, |
| 336 | + run_on_fvp=True, |
| 337 | + ) |
| 338 | + |
| 339 | + pipeline.change_args( |
| 340 | + "quantize", |
| 341 | + get_symmetric_a16w8_linear_quantizer( |
| 342 | + per_channel_quantization=per_channel_quantization |
| 343 | + ), |
| 344 | + ) |
| 345 | + pipeline.run() |
| 346 | + |
| 347 | + |
| 348 | +@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT) |
| 349 | +@common.XfailIfNoCorstone320 |
| 350 | +@pytest.mark.xfail( |
| 351 | + reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947", |
| 352 | + strict=False, |
| 353 | +) |
| 354 | +def test_linear_16a8w_u85_INT16(test_data: torch.Tensor): |
| 355 | + """Test linear operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" |
| 356 | + test_data, out_features, has_bias, per_channel_quantization = test_data() |
| 357 | + in_features = test_data.shape[-1] |
| 358 | + |
| 359 | + pipeline = EthosU85PipelineINT[input_t1]( |
| 360 | + Linear( |
| 361 | + in_features=in_features, |
| 362 | + out_features=out_features, |
| 363 | + bias=has_bias, |
| 364 | + ), |
| 365 | + (test_data,), |
| 366 | + aten_op, |
| 367 | + exir_ops=[], |
| 368 | + per_channel_quantization=per_channel_quantization, |
| 369 | + use_to_edge_transform_and_lower=True, |
| 370 | + run_on_fvp=True, |
| 371 | + ) |
| 372 | + |
| 373 | + pipeline.change_args( |
| 374 | + "quantize", |
| 375 | + get_symmetric_a16w8_linear_quantizer( |
| 376 | + per_channel_quantization=per_channel_quantization |
| 377 | + ), |
| 378 | + ) |
| 379 | + pipeline.run() |
0 commit comments