intel · n1ck-guo · Aug 4, 2025 · Aug 4, 2025 · Aug 12, 2025 · Aug 12, 2025
diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py
@@ -55,6 +55,8 @@ def __init__(self, *args, **kwargs):
 
         self.add_argument("--eval", action="store_true", help="whether to use eval only mode")
 
+        self.add_argument("--sq", action="store_true", help="whether to use smoothquant")
+
         self.add_argument("--bits", default=4, type=int, help="number of weight bits")
 
         self.add_argument("--eval_bs", default=None, type=int, help="batch size in evaluation")
@@ -534,6 +536,33 @@ def tune(args):
 
     enable_torch_compile = True if "--enable_torch_compile" in sys.argv else False
 
+    # sq
+    if args.sq:
+        from auto_round.calib_dataset import get_dataloader
+
+        dataloader = get_dataloader(tokenizer, args.seqlen, bs=8, nsamples=args.nsamples)
+        auto_alpha_args = {
+            "init_alpha": 0.5,
+            "alpha_min": 0.1,
+            "alpha_max": 1.0,
+            "alpha_step": 0.1,
+            "shared_criterion": "mean",
+            "n_samples": 512,  ##512 for cuda, 128 for cpu?
+            # "do_blockwise": True
+        }
+        from auto_round.smooth_quant import SmoothQuant
+
+        model = model.to(device_str)
+        sq = SmoothQuant(model, dataloader, device=model.device, group_size=-1)
+        model = sq.transform_model(
+            alpha=0.5,
+            # alpha="auto",
+            auto_alpha_args=auto_alpha_args,
+            folding=True,
+            op_types=[torch.nn.Linear, torch.nn.Conv2d],
+            calib_iter=100,
+        )
+
     autoround = round(
         model=model,
         tokenizer=tokenizer,
@@ -768,6 +797,7 @@ def eval(args):
             if file.endswith(".gguf"):
                 is_gguf_file = True
                 gguf_file = file
+        model = os.path.dirname(args.model)
     eval_model_dtype = get_model_dtype(args.eval_model_dtype)
     if is_gguf_file:
         import torch

diff --git a/auto_round/smooth_quant/__init__.py b/auto_round/smooth_quant/__init__.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from auto_round.smooth_quant.sq import SmoothQuant