Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 97 additions & 97 deletions auto_round/inference/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,27 +439,11 @@ def fp8_static_scheme_checker(
requirements=["autoawq", "transformers"],
)

# BackendInfos["auto_round_kernel"] = BackendInfo(
# device=["cpu"],
# sym=[True, False],
# packing_format=GPTQ_FORMAT_NO_ZP,
# bits=[2, 4, 8],
# group_size=None,
# priority=6,
# checkers=[ark_feature_checker],
# alias=["ark"],
# compute_dtype=["float32", "float16"],
# data_type=["int"],
# act_bits=WOQ_DEFAULT_ACT_BITS,
# requirements=["torch>=2.9.0", "auto_round_kernel"],
# systems=["linux"],
# )

BackendInfos["auto_round_kernel_xpu"] = BackendInfo(
device=["xpu"],
sym=[True],
BackendInfos["auto_round_kernel"] = BackendInfo(
device=["cpu"],
sym=[True, False],
packing_format=GPTQ_FORMAT_NO_ZP,
bits=[4, 8],
bits=[2, 4, 8],
group_size=None,
priority=6,
checkers=[ark_feature_checker],
Expand All @@ -471,26 +455,10 @@ def fp8_static_scheme_checker(
systems=["linux"],
)

# BackendInfos["auto_round_kernel_zp"] = BackendInfo(
# device=["cpu"],
# sym=[True, False],
# packing_format=GPTQ_FORMAT,
# bits=[2, 4, 8],
# group_size=None,
# priority=6,
# checkers=[ark_feature_checker],
# alias=["ark"],
# compute_dtype=["float32", "float16"],
# data_type=["int"],
# act_bits=WOQ_DEFAULT_ACT_BITS,
# requirements=["torch>=2.9.0", "auto_round_kernel"],
# systems=["linux"],
# )

BackendInfos["auto_round_kernel_zp_xpu"] = BackendInfo(
BackendInfos["auto_round_kernel_xpu"] = BackendInfo(
device=["xpu"],
sym=[True],
packing_format=GPTQ_FORMAT,
packing_format=GPTQ_FORMAT_NO_ZP,
bits=[4, 8],
group_size=None,
priority=6,
Expand All @@ -499,31 +467,15 @@ def fp8_static_scheme_checker(
compute_dtype=["float32", "float16"],
data_type=["int"],
act_bits=WOQ_DEFAULT_ACT_BITS,
requirements=["torch>=2.9.0", "auto_round_kernel"],
requirements=["torch>=2.8.0", "auto_round_kernel"],
systems=["linux"],
)

# BackendInfos["auto_round_kernel_awq"] = BackendInfo(
# device=["cpu"],
# sym=[True, False],
# packing_format=AWQ_FORMAT,
# bits=[2, 4, 8],
# group_size=None,
# priority=6,
# checkers=[ark_feature_checker],
# alias=["ark"],
# compute_dtype=["float32", "float16"],
# data_type=["int"],
# act_bits=WOQ_DEFAULT_ACT_BITS,
# requirements=["torch>=2.9.0", "auto_round_kernel"],
# systems=["linux"],
# )

BackendInfos["auto_round_kernel_awq_xpu"] = BackendInfo(
device=["xpu"],
sym=[True],
packing_format=AWQ_FORMAT,
bits=[4, 8],
BackendInfos["auto_round_kernel_zp"] = BackendInfo(
device=["cpu"],
sym=[True, False],
packing_format=GPTQ_FORMAT,
bits=[2, 4, 8],
group_size=None,
priority=6,
checkers=[ark_feature_checker],
Expand All @@ -535,66 +487,114 @@ def fp8_static_scheme_checker(
systems=["linux"],
)

BackendInfos["ipex_gptq_cpu"] = BackendInfo(
device=["cpu"],
sym=[True, False],
packing_format=GPTQ_FORMAT,
bits=[4],
group_size=None,
priority=5,
checkers=[],
compute_dtype=["float16", "bfloat16"],
data_type=["int"],
act_bits=WOQ_DEFAULT_ACT_BITS,
alias=["ipex"],
requirements=["torch<2.9", "intel-extension-for-pytorch>=2.5"],
)

BackendInfos["ipex_gptq"] = BackendInfo(
BackendInfos["auto_round_kernel_zp_xpu"] = BackendInfo(
device=["xpu"],
sym=[True, False],
sym=[True],
packing_format=GPTQ_FORMAT,
bits=[4],
bits=[4, 8],
group_size=None,
priority=5,
checkers=[],
compute_dtype=["float16", "bfloat16"],
priority=6,
checkers=[ark_feature_checker],
alias=["ark"],
compute_dtype=["float32", "float16"],
data_type=["int"],
act_bits=WOQ_DEFAULT_ACT_BITS,
alias=["ipex"],
requirements=["intel-extension-for-pytorch>=2.5"],
requirements=["torch>=2.8.0", "auto_round_kernel"],
systems=["linux"],
)

BackendInfos["ipex_awq_cpu"] = BackendInfo(
BackendInfos["auto_round_kernel_awq"] = BackendInfo(
device=["cpu"],
sym=[True, False],
packing_format=AWQ_FORMAT,
bits=[4],
group_size=None,
priority=5,
checkers=[],
compute_dtype=["float16", "bfloat16"],
priority=6,
checkers=[ark_feature_checker],
alias=["ark"],
compute_dtype=["float32", "float16"],
data_type=["int"],
act_bits=WOQ_DEFAULT_ACT_BITS,
alias=["ipex"],
requirements=["torch<2.9", "intel-extension-for-pytorch>=2.5"],
requirements=["torch>=2.9.0", "auto_round_kernel"],
systems=["linux"],
)


BackendInfos["ipex_awq"] = BackendInfo(
BackendInfos["auto_round_kernel_awq_xpu"] = BackendInfo(
device=["xpu"],
sym=[True, False],
sym=[True],
packing_format=AWQ_FORMAT,
bits=[4],
group_size=None,
priority=5,
checkers=[],
compute_dtype=["float16", "bfloat16"],
priority=6,
checkers=[ark_feature_checker],
alias=["ark"],
compute_dtype=["float32", "float16"],
data_type=["int"],
act_bits=WOQ_DEFAULT_ACT_BITS,
alias=["ipex"],
requirements=["intel-extension-for-pytorch>=2.5"],
requirements=["torch>=2.8.0", "auto_round_kernel"],
systems=["linux"],
)

# BackendInfos["ipex_gptq_cpu"] = BackendInfo(
# device=["cpu"],
# sym=[True, False],
# packing_format=GPTQ_FORMAT,
# bits=[4],
# group_size=None,
# priority=5,
# checkers=[],
# compute_dtype=["float16", "bfloat16"],
# data_type=["int"],
# act_bits=WOQ_DEFAULT_ACT_BITS,
# alias=["ipex"],
# requirements=["torch<2.9", "intel-extension-for-pytorch>=2.5"],
# )

# BackendInfos["ipex_gptq"] = BackendInfo(
# device=["xpu"],
# sym=[True, False],
# packing_format=GPTQ_FORMAT,
# bits=[4],
# group_size=None,
# priority=5,
# checkers=[],
# compute_dtype=["float16", "bfloat16"],
# data_type=["int"],
# act_bits=WOQ_DEFAULT_ACT_BITS,
# alias=["ipex"],
# requirements=["intel-extension-for-pytorch>=2.5"],
# )

# BackendInfos["ipex_awq_cpu"] = BackendInfo(
# device=["cpu"],
# sym=[True, False],
# packing_format=AWQ_FORMAT,
# bits=[4],
# group_size=None,
# priority=5,
# checkers=[],
# compute_dtype=["float16", "bfloat16"],
# data_type=["int"],
# act_bits=WOQ_DEFAULT_ACT_BITS,
# alias=["ipex"],
# requirements=["torch<2.9", "intel-extension-for-pytorch>=2.5"],
# )


# BackendInfos["ipex_awq"] = BackendInfo(
# device=["xpu"],
# sym=[True, False],
# packing_format=AWQ_FORMAT,
# bits=[4],
# group_size=None,
# priority=5,
# checkers=[],
# compute_dtype=["float16", "bfloat16"],
# data_type=["int"],
# act_bits=WOQ_DEFAULT_ACT_BITS,
# alias=["ipex"],
# requirements=["intel-extension-for-pytorch>=2.5"],
# )
BackendInfos["hpu"] = BackendInfo(
device=["hpu"],
sym=[True, False],
Expand Down
58 changes: 58 additions & 0 deletions auto_round_extension/ark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
## What is AutoRound Kernel?
[TODO]

## Key Features
[TODO]

## Installation
### Install via pip
```bash
# install the latest auto-round-kernel version and this cmd will update your local pytorch version if needed
pip install auto-round-kernel
# or install together with a specific pytorch version to install the corresponding auto-round-kernel version, e.g., for torch 2.8.x
pip install auto-round-kernel torch~=2.8.0
```

<details>
<summary>Other Installation Methods</summary>

### Install via Script
```bash
curl -fsSL https://raw.githubusercontent.com/intel/auto-round/main/auto_round_extension/ark/install_kernel.py
python3 install_kernel.py
```
**Notes:**
Recommend to use this method if you want to keep your current PyTorch and auto-round versions.
This installation script will detect the current environment and install the corresponding auto-round-kernel version.

### Install via auto_round
```bash
pip install auto-round
auto-round-kernel-install
```

</details>

### Versioning Scheme
The version number of auto-round-kernel follows the format:
`{auto-round major version}.{auto-round minor version}.{oneAPI version}.{kernel version}`

**For example: v0.9.1.1**
- The first two digits (0.9) correspond to the major and minor version of the auto_round framework.
- The third digit (1) represents the major version of Intel oneAPI and PyTorch version: `1` indicate support for oneAPI 2025.1 and torch 2.8, `2` indicate support for oneAPI 2025.2 and torch 2.9.
- The final digit (1) is the patch version of auto-round-kernel, reflecting updates, bug fixes, or improvements to the kernel package itself.

**Version mapping table**

| auto-round-kernel Version | auto-round Version | oneAPI Version | Supported PyTorch Version |
|:-------------------------:|:------------------:|:--------------:|:-------------------------:|
| 0.9.1.x | 0.9.x | 2025.1 | 2.8.x |
| 0.9.2.x | 0.9.x | 2025.2 | 2.9.x |

**Notes:** oneAPI version is aligned with PyTorch version during auto-round-kernel binary build, but it is not required in runtime.

### Validated Hardware Environment
#### CPU based on [Intel 64 architecture or compatible processors](https://en.wikipedia.org/wiki/X86-64):
* Intel Xeon Scalable processor (Granite Rapids)
#### GPU built on Intel's Xe architecture:
* Intel Arc B-Series Graphics (Battlemage)
60 changes: 60 additions & 0 deletions auto_round_extension/ark/install_kernel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) 2026 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import subprocess
import sys


def get_torch_minor():
try:
import torch

m = re.match(r"^(\d+)\.(\d+)", torch.__version__)
return f"{m.group(1)}.{m.group(2)}" if m else None
except ImportError:
return None


def get_auto_round_minor():
try:
import auto_round

m = re.match(r"^(\d+)\.(\d+)", auto_round.__version__)
return f"{m.group(1)}.{m.group(2)}" if m else None
except ImportError:
return None


# Map torch minor version to kernel version
auto_round_minor = "0.9" if get_auto_round_minor() is None else get_auto_round_minor()
KERNEL_MAP = {
"2.8": f"auto-round-kernel~={auto_round_minor}.1.0",
"2.9": f"auto-round-kernel~={auto_round_minor}.2.0",
}


def main():
torch_minor = get_torch_minor()
if torch_minor and torch_minor in KERNEL_MAP:
pkg = KERNEL_MAP[torch_minor]
print(f"Detected torch {torch_minor}, installing {pkg} ...")
subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "--upgrade-strategy", "only-if-needed"])
else:
print("torch not found or no mapping for your version. Installing the latest auto-round-kernel ...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "auto-round-kernel"])


if __name__ == "__main__":
main()
Loading
Loading