Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion auto_round/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def save_quantized(

@OutputFormat.register("auto_awq")
class AutoAWQFormat(OutputFormat):
support_schemes = ["W4A16", "W2A16", "W3A16", "W8A16", "BF16", "W2A16G64", "W2A16G32"]
support_schemes = ["W4A16"]
format_name = "auto_awq"

@staticmethod
Expand Down
3 changes: 1 addition & 2 deletions docs/step_by_step.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,10 @@ adopted within the community, **only 4-bits quantization is supported**. Please
|export format | supported scheme |
|--------------|------------------|
|**auto_round** | W4A16, W2A16, W3A16, W8A16, MXFP4, MXFP8, NVFP4, FPW8A16, W2A16G64, W2A16G32, FP8_STATIC, BF16|
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Schemes without kernel support are shown in gray (or marked differently).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This issue has not been resolved

|**auto_awq / auto_round:auto_awq** | W4A16, W2A16, W3A16, W8A16, BF16, W2A16G64, W2A16G32 |
|**auto_awq / auto_round:auto_awq** | W4A16|
|**auto_gptq / auto_round:auto_gptq / auto_round:gptqmodel**|W4A16, W2A16, W3A16, W8A16, BF16, W2A16G64, W2A16G32|
|**llm_compressor / auto_round:llm_compressor** | MXFP4, MXFP8, NVFP4, FPW8A16, FP8_STATIC |
|**gguf** | GGUF:Q4_0, GGUF:Q4_1, GGUF:Q5_0, GGUF:Q5_1, GGUF:Q2_K_S, GGUF:Q3_K_S, GGUF:Q3_K_M, GGUF:Q3_K_L, GGUF:Q4_K_S, GGUF:Q4_K_M, GGUF:Q5_K_S, GGUF:Q5_K_M, GGUF:Q6_K, GGUF:Q8_0 |
|**itrex / itrex_xpu** | W4A16, W2A16, W3A16, W8A16, BF16, W2A16G64, W2A16G32 |
|**fake** | all scheme|
### Hardware Compatibility

Expand Down
1 change: 1 addition & 0 deletions test/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

# Automatic choose local path or model name.
def get_model_path(model_name: str) -> str:
model_name = model_name.rstrip("/")
ut_path = f"/tf_dataset/auto_round/models/{model_name}"
local_path = f"/models/{model_name.split('/')[-1]}"

Expand Down
13 changes: 8 additions & 5 deletions test/test_cpu/export/test_gguf_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

from ...helpers import get_model_path, get_tiny_model

AUTO_ROUND_PATH = __file__.split("/")
AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")])


class TestGGUF:

Expand All @@ -26,15 +29,15 @@ def teardown_class(self):
def test_basic_usage(self, tiny_gemma_model_path, tiny_qwen_model_path):
python_path = sys.executable
res = os.system(
f"cd .. && {python_path} -m auto_round --model {tiny_gemma_model_path} "
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {tiny_gemma_model_path} "
f" --bs 16 --iters 0 --nsamples 1 --format gguf:q4_k_m"
)
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"
shutil.rmtree("./saved", ignore_errors=True)

res = os.system(
f"cd .. && {python_path} -m auto_round --model {tiny_qwen_model_path}"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {tiny_qwen_model_path}"
f" --bs 16 --iters 1 --nsamples 1 --format fake,gguf:q4_0"
)
if res > 0 or res == -1:
Expand Down Expand Up @@ -162,15 +165,15 @@ def test_all_format(self, tiny_qwen_model_path):
# for gguf_format in ["gguf:q4_0", "gguf:q4_1", "gguf:q4_k_m", "gguf:q6_k"]:
for gguf_format in ["gguf:q4_k_m"]:
res = os.system(
f"cd .. && {python_path} -m auto_round --model {model_name} "
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {model_name} "
f" --bs 16 --iters 1 --nsamples 1 --seqlen 16 --format {gguf_format}"
)
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"
shutil.rmtree("../../tmp_autoround", ignore_errors=True)

res = os.system(
f"cd .. && {python_path} -m auto_round --model {model_name}"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {model_name}"
f" --bs 16 --iters 0 --nsamples 1 --seqlen 16 --format fake,{gguf_format}"
)
if res > 0 or res == -1:
Expand All @@ -179,7 +182,7 @@ def test_all_format(self, tiny_qwen_model_path):

# test mixed q2_k_s
res = os.system(
f"cd .. && {python_path} -m auto_round --model {model_name}"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {model_name}"
f" --bs 16 --iters 0 --nsamples 1 --seqlen 16 --scheme GGUF:Q2_K_MIXED"
)
if res > 0 or res == -1:
Expand Down
20 changes: 12 additions & 8 deletions test/test_cpu/utils/test_cli_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@

from ...helpers import get_model_path

AUTO_ROUND_PATH = __file__.split("/")
AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")])


class TestAutoRoundCmd:

@classmethod
def setup_class(self):
pass
Expand All @@ -21,48 +25,48 @@ def test_auto_round_cmd(self, tiny_opt_model_path, tiny_qwen_vl_model_path):
python_path = sys.executable

# Test llm script
res = os.system(f"cd .. && {python_path} -m auto_round -h")
res = os.system(f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round -h")
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"

res = os.system(
f"cd .. && {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 32 --iter 2 --nsamples 1 --format auto_gptq,auto_round --output_dir ./saved --tasks piqa"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 32 --iter 2 --nsamples 1 --format auto_gptq,auto_round --output_dir ./saved --tasks piqa"
)
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"

res = os.system(
f"cd .. && {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 8 --iter 1 --nsamples 1 --eval_task_by_task --tasks openbookqa --bs 32"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model {tiny_opt_model_path} --seqlen 8 --iter 1 --nsamples 1 --eval_task_by_task --tasks openbookqa --bs 32"
)
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"

res = os.system(
f"cd .. && {python_path} -c 'from auto_round.__main__ import run_light; run_light()' --seqlen 8 --iter 2 --nsamples 8 --output_dir ./saved --tasks lambada_openai"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -c 'from auto_round.__main__ import run_light; run_light()' --seqlen 8 --iter 2 --nsamples 8 --output_dir ./saved --tasks lambada_openai"
)
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"

# test mllm script

# test auto_round_mllm --eval help
res = os.system(f"cd .. && {python_path} -m auto_round --eval -h")
res = os.system(f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --eval -h")
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"

# test auto_round_mllm --lmms help
res = os.system(f"cd .. && {python_path} -m auto_round --eval --lmms -h")
res = os.system(f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --eval --lmms -h")
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"

res = os.system(
f"cd .. && {python_path} -m auto_round --mllm --model {tiny_qwen_vl_model_path} --iter 2 --nsamples 2 --seqlen 32 --format auto_round --output_dir ./saved"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --mllm --model {tiny_qwen_vl_model_path} --iter 2 --nsamples 2 --seqlen 32 --format auto_round --output_dir ./saved"
)
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"

res = os.system(
f"cd .. && {python_path} -m auto_round --mllm --iter 2 --nsamples 2 --model {tiny_qwen_vl_model_path} --seqlen 32 --format auto_round"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --mllm --iter 2 --nsamples 2 --model {tiny_qwen_vl_model_path} --seqlen 32 --format auto_round"
" --quant_nontext_module --output_dir ./saved "
)
if res > 0 or res == -1:
Expand Down
24 changes: 12 additions & 12 deletions test/test_cuda/advanced/test_fp8_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def test_gguf_imatrix(self):
# print(tokenizer.decode(model.generate(**inputs, max_new_tokens=50)[0]))

def test_small_model_rtn(self):
model, tokenizer = self.tiny_fp8_model()
ar = AutoRound(model=model, tokenizer=tokenizer, iters=0)
model_name = get_model_path("qwen/Qwen3-0.6B-FP8")
ar = AutoRound(model=model_name, iters=0)
_, folder = ar.quantize_and_save(output_dir=self.save_dir)
model_args = f"pretrained={self.save_dir}"
result = simple_evaluate(model="hf", model_args=model_args, tasks="lambada_openai", batch_size="auto")
Expand All @@ -75,8 +75,8 @@ def test_small_model_rtn(self):
shutil.rmtree(self.save_dir, ignore_errors=True)

def test_small_model_iters1(self):
model, tokenizer = self.tiny_fp8_model()
ar = AutoRound(model=model, tokenizer=tokenizer, iters=1)
model_name = get_model_path("qwen/Qwen3-0.6B-FP8")
ar = AutoRound(model=model_name, iters=1)
_, folder = ar.quantize_and_save(output_dir=self.save_dir)
model_args = f"pretrained={self.save_dir}"
result = simple_evaluate(model="hf", model_args=model_args, tasks="lambada_openai", batch_size="auto")
Expand All @@ -86,25 +86,25 @@ def test_small_model_iters1(self):
shutil.rmtree(self.save_dir, ignore_errors=True)

def test_medium_model_rtn(self):
model, tokenizer = self.tiny_fp8_model()
ar = AutoRound(model=model, tokenizer=tokenizer, iters=0)
model_name = get_model_path("qwen/Qwen3-0.6B-FP8")
ar = AutoRound(model=model_name, iters=0)
_, folder = ar.quantize_and_save(output_dir=self.save_dir)
model_args = f"pretrained={self.save_dir}"
result = simple_evaluate(model="hf", model_args=model_args, tasks="lambada_openai", batch_size="auto")
print(result["results"]["lambada_openai"]["acc,none"])
assert result["results"]["lambada_openai"]["acc,none"] > 0.55
assert result["results"]["lambada_openai"]["acc,none"] > 0.33

shutil.rmtree(self.save_dir, ignore_errors=True)

def test_medium_model_rtn_with_lm_head(self):
model, tokenizer = self.tiny_fp8_model()
model_name = get_model_path("qwen/Qwen3-0.6B-FP8")
layer_config = {"lm_head": {"bits": 4}}
ar = AutoRound(model=model, tokenizer=tokenizer, iters=0, layer_config=layer_config)
ar = AutoRound(model=model_name, iters=0, layer_config=layer_config)
_, folder = ar.quantize_and_save(output_dir=self.save_dir)
model_args = f"pretrained={self.save_dir}"
result = simple_evaluate(model="hf", model_args=model_args, tasks="lambada_openai", batch_size="auto")
print(result["results"]["lambada_openai"]["acc,none"])
assert result["results"]["lambada_openai"]["acc,none"] > 0.55
assert result["results"]["lambada_openai"]["acc,none"] > 0.33

shutil.rmtree(self.save_dir, ignore_errors=True)

Expand Down Expand Up @@ -135,9 +135,9 @@ def test_fp8_model_gguf(self):

def test_diff_datatype(self):
for scheme in ["NVFP4", "MXFP4"]:
model, tokenizer = self.tiny_fp8_model()
model_name = get_model_path("qwen/Qwen3-0.6B-FP8")
for iters in [0, 1]:
print(f"Testing scheme: {scheme}, iters: {iters}")
ar = AutoRound(model=model, tokenizer=tokenizer, iters=iters, scheme=scheme)
ar = AutoRound(model_name, iters=iters, scheme=scheme)
ar.quantize_and_save(output_dir=self.save_dir)
shutil.rmtree(self.save_dir, ignore_errors=True)
2 changes: 1 addition & 1 deletion test/test_cuda/advanced/test_multiple_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def test_device_map_for_triton(self):

@multi_card
def test_mllm_device_map(self):
model_name = get_model_path("qwen/Qwen2-VL-2B-Instruct/")
model_name = get_model_path("qwen/Qwen2-VL-2B-Instruct")
from auto_round import AutoRoundMLLM

device_map = "0,1"
Expand Down
7 changes: 5 additions & 2 deletions test/test_cuda/advanced/test_multiple_card_calib.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

from auto_round.testing_utils import multi_card

AUTO_ROUND_PATH = __file__.split("/")
AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")])


def get_accuracy(data):
match = re.search(r"\|acc\s+\|[↑↓]\s+\|\s+([\d.]+)\|", data)
Expand Down Expand Up @@ -41,7 +44,7 @@ def test_multiple_card_calib(self):

##test llm script
res = os.system(
f"cd .. && {python_path} -m auto_round --model /models/Meta-Llama-3.1-8B-Instruct --devices '0,1' --quant_lm_head --iters 1 --nsamples 1 --output_dir None"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model /models/Meta-Llama-3.1-8B-Instruct --devices '0,1' --quant_lm_head --iters 1 --nsamples 1 --output_dir None"
)
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"
Expand All @@ -52,7 +55,7 @@ def test_multiple_card_nvfp4(self):

##test llm script
res = os.system(
f"cd .. && {python_path} -m auto_round --model facebook/opt-125m --scheme NVFP4 --devices '0,1' --iters 1 --nsamples 1 --enable_torch_compile --low_gpu_mem_usage"
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {python_path} -m auto_round --model facebook/opt-125m --scheme NVFP4 --devices '0,1' --iters 1 --nsamples 1 --enable_torch_compile --low_gpu_mem_usage"
)
if res > 0 or res == -1:
assert False, "cmd line test fail, please have a check"
3 changes: 2 additions & 1 deletion test/test_cuda/backends/test_marlin_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
from auto_round import AutoRound, AutoRoundConfig
from auto_round.eval.evaluation import simple_evaluate_user_model

from ...helpers import model_infer
from ...helpers import get_model_path, model_infer


class TestAutoRoundMarlinBackend:
save_dir = "./saved"
model_name = get_model_path("facebook/opt-125m")

@pytest.fixture(autouse=True, scope="class")
def setup_and_teardown_class(self):
Expand Down
11 changes: 7 additions & 4 deletions test/test_cuda/export/test_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@

from ...helpers import get_model_path, get_tiny_model, save_tiny_model

AUTO_ROUND_PATH = __file__.split("/")
AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")])


class TestAutoRound:
save_dir = "./saved"
Expand Down Expand Up @@ -56,16 +59,16 @@ def test_gguf_format(self, tiny_qwen_model_path, dataloader):

save_dir = os.path.join(os.path.dirname(__file__), "saved")
res = os.system(
f"cd .. && {sys.executable} -m auto_round --model {tiny_qwen_model_path} --iter 2 "
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {sys.executable} -m auto_round --model {tiny_qwen_model_path} --iter 2 "
f"--output_dir {save_dir} --nsample 2 --format gguf:q4_0 --device 0"
)
print(save_dir)
assert not (res > 0 or res == -1), "qwen2 tuning fail"

from llama_cpp import Llama

gguf_file = os.listdir(f"{save_dir}/tmp_tiny_qwen_model_path-gguf")[0]
llm = Llama(f"{save_dir}/tmp_tiny_qwen_model_path-gguf/{gguf_file}", n_gpu_layers=-1)
gguf_file = os.listdir(f"{save_dir}/tiny_qwen_model_path-gguf")[0]
llm = Llama(f"{save_dir}/tiny_qwen_model_path-gguf/{gguf_file}", n_gpu_layers=-1)
output = llm("There is a girl who likes adventure,", max_tokens=32)
print(output)
shutil.rmtree(save_dir, ignore_errors=True)
Expand Down Expand Up @@ -155,7 +158,7 @@ def test_vlm_gguf(self):
autoround.quantize_and_save(output_dir=quantized_model_path, format="gguf:q4_0")
assert "mmproj-model.gguf" in os.listdir("./saved")
file_size = os.path.getsize("./saved/Qwen2-VL-2B-Instruct-Q4_0.gguf") / 1024**2
assert abs(file_size - 4242) < 5.0
assert abs(file_size - 894) < 5.0
file_size = os.path.getsize("./saved/mmproj-model.gguf") / 1024**2
assert abs(file_size - 2580) < 5.0
shutil.rmtree("./saved", ignore_errors=True)
Expand Down
7 changes: 6 additions & 1 deletion test/test_cuda/integrations/test_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
# @slow
@require_torch_gpu
@require_accelerate
class AutoRoundTest:
class TestAutoRound:
model_name = "OPEA/Qwen2.5-1.5B-Instruct-int4-sym-inc"
input_text = "There is a girl who likes adventure,"
EXPECTED_OUTPUTS = set()
Expand Down Expand Up @@ -205,3 +205,8 @@ def test_mixed_bits(self):
text = "There is a girl who likes adventure,"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
tokenizer.decode(model.generate(**inputs, max_new_tokens=5)[0])


# FAILED export/test_gguf.py::TestAutoRound::test_gguf_format - AssertionError: qwen2 tuning fail
# FAILED export/test_gguf.py::TestAutoRound::test_all_format - SystemExit: 1
# FAILED export/test_gguf.py::TestAutoRound::test_vlm_gguf - AttributeError: 'Qwen2VLForConditionalGeneration' object has no attribute 'last_layer_name_to_block_name'
14 changes: 9 additions & 5 deletions test/test_cuda/models/test_support_vlms.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
from auto_round import AutoRoundConfig # # must import for auto-round format
from auto_round.testing_utils import require_gptqmodel, require_package_version_ut, require_vlm_env

AUTO_ROUND_PATH = __file__.split("/")
AUTO_ROUND_PATH = "/".join(AUTO_ROUND_PATH[: AUTO_ROUND_PATH.index("test")])


class TestSupportVLMS:

@classmethod
def setup_class(self):
self.save_dir = os.path.join(os.path.dirname(__file__), "ut_saved")
Expand All @@ -26,7 +30,7 @@ def test_qwen2(self):
model_path = "/models/Qwen2-VL-2B-Instruct/"
# test tune
res = os.system(
f"cd .. && {self.python_path} -m auto_round --mllm "
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round --mllm "
f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}"
)
assert not (res > 0 or res == -1), "qwen2 tuning fail"
Expand Down Expand Up @@ -81,7 +85,7 @@ def test_phi3(self):
model_path = "/models/Phi-3.5-vision-instruct/"
## test tune
res = os.system(
f"cd .. && {self.python_path} -m auto_round --mllm "
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round --mllm "
f"--model {model_path} --iter 2 --output_dir {self.save_dir} --device {self.device}"
)
assert not (res > 0 or res == -1), "Phi-3.5 tuning fail"
Expand Down Expand Up @@ -129,7 +133,7 @@ def test_phi3_vision_awq(self):
model_path = "/models/Phi-3.5-vision-instruct/"
## test tune
res = os.system(
f"cd .. && {self.python_path} -m auto_round --mllm "
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round --mllm "
f"--model {model_path} --iter 2 --quant_nontext_module "
f"--nsample 64 --seqlen 32 "
f"--format auto_awq --output_dir {self.save_dir} --device {self.device}"
Expand Down Expand Up @@ -177,7 +181,7 @@ def test_glm(self):
model_path = "/models/glm-4v-9b/"
## test tune
res = os.system(
f"cd .. && {self.python_path} -m auto_round "
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round "
f"--model {model_path} --iter 1 --output_dir {self.save_dir} --device {self.device}"
)
assert not (res > 0 or res == -1), "glm-4v-9b tuning fail"
Expand All @@ -186,7 +190,7 @@ def test_granite_vision(self):
model_path = "/models/granite-vision-3.2-2b"
## test tune
res = os.system(
f"cd .. && {self.python_path} -m auto_round "
f"PYTHONPATH='AUTO_ROUND_PATH:$PYTHONPATH' {self.python_path} -m auto_round "
f"--model {model_path} --iter 1 --output_dir {self.save_dir} --device {self.device}"
)
assert not (res > 0 or res == -1), "granite-vision-3.2-2b tuning fail"
1 change: 0 additions & 1 deletion test/test_cuda/quantization/test_mix_bits.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ def test_mixed_autoround_format_vllm(self, tiny_opt_model_path, dataloader):
}
autoround = AutoRound(
tiny_opt_model_path,
self.tokenizer,
scheme="W4A16",
iters=2,
seqlen=2,
Expand Down
Loading