intel · wenhuach21 · Jan 8, 2026 · Jan 7, 2026 · Jan 7, 2026 · Jan 8, 2026
diff --git a/README.md b/README.md
@@ -195,7 +195,7 @@ ar.quantize_and_save(output_dir="./qmodel", format="auto_round")
 
 ##### Algorithm Settings
 - **`enable_alg_ext` (bool)**: [Experimental Feature] Only for `iters>0`. Enable algorithm variants for specific schemes (e.g., MXFP4/W2A16) that could bring notable improvements. Default is `False`.
-- **`disable_opt_rtn` (bool)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `False` (improved RTN enabled).
+- **`disable_opt_rtn` (bool)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `None` (improved RTN enabled).
 
 ##### Tuning Process Parameters
 - **`iters` (int)**: Number of tuning iterations (default is `200`). Common values: 0 (RTN mode), 50 (with lr=5e-3 recommended), 1000. Higher values increase accuracy but slow down tuning.

diff --git a/auto_round/__main__.py b/auto_round/__main__.py
@@ -255,7 +255,9 @@ def __init__(self, *args, **kwargs):
         )
         tuning.add_argument(
             "--disable_opt_rtn",
-            action="store_true",
+            "--disable-opt-rtn",
+            action=argparse.BooleanOptionalAction,
+            default=None,
             help="Disable optimization for RTN (Round-To-Nearest) mode when iters=0. "
             "RTN is fast but less accurate; keeping optimization enabled is recommended.",
         )

diff --git a/auto_round/autoround.py b/auto_round/autoround.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING, Optional, Union
 
 import torch
 
@@ -85,7 +85,7 @@ def __new__(
         enable_adam: bool = False,
         extra_config: ExtraConfig = None,
         enable_alg_ext: bool = None,
-        disable_opt_rtn: bool = None,
+        disable_opt_rtn: Optional[bool] = None,
         low_cpu_mem_usage: bool = False,
         **kwargs,
     ) -> BaseCompressor:
@@ -112,7 +112,7 @@ def __new__(
             enable_alg_ext (bool, optional): Enable algorithm extension (primarily for INT2)
                                              for better accuracy. Defaults to False.
             disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0) for fast quatnziation
-                                              with lower accuracy. Defaults to False.
+                                              with lower accuracy. Defaults to None.
             low_cpu_mem_usage (bool, optional): Lower CPU memory mode. Defaults to False.
 
             bits (int, optional): Weight quantization bits. Defaults to 4.
@@ -291,7 +291,7 @@ class AutoRoundLLM(LLMCompressor):
         act_dynamic (bool, optional): Dynamic activation quantization. Defaults to True.
         enable_torch_compile (bool, optional): Enable torch.compile for quant blocks/layers. Defaults to False.
         device_map (str | dict, optional): Device placement map. Defaults to None.
-        disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to False.
+        disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to None.
         enable_alg_ext (bool, optional): Enable algorithm extension (primarily for INT2). Defaults to False.
         **kwargs: Backward compatible options:
             - enable_alg_ext, quant_lm_head, lr, lr_scheduler, sampler, not_use_best_mse, dynamic_max_gap,

diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
@@ -189,7 +189,7 @@ def __init__(
         device_map: Union[str, torch.device, int, dict] = 0,
         enable_torch_compile: bool = False,
         enable_alg_ext: bool = False,
-        disable_opt_rtn: bool = False,
+        disable_opt_rtn: Optional[bool] = None,
         seed: int = 42,
         low_cpu_mem_usage: bool = False,
         **kwargs,
@@ -226,7 +226,7 @@ def __init__(
             act_dynamic (bool, optional): Dynamic activation quantization. Defaults to True.
             enable_torch_compile (bool, optional): Enable torch.compile for quant blocks/layers. Defaults to False.
             device_map (str | dict, optional): Device placement map. Defaults to None.
-            disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to False.
+            disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to None.
             enable_alg_ext (bool, optional): Enable algorithm extension (primarily for INT2). Defaults to False.
             **kwargs: Backward compatible options:
                 - enable_alg_ext, quant_lm_head, lr, lr_scheduler, not_use_best_mse, dynamic_max_gap,
@@ -389,6 +389,19 @@ def __init__(
                 "for bits <= 2, it is recommended to enable `auto-round-best` " "and turn on `--enable_alg_ext` "
             )
 
+        # Automatically adjust the disable_opt_rtn option if the user does not explicitly set it.
+        if (
+            self.bits >= 8
+            and self.act_bits >= 16
+            and self.iters == 0
+            and self.data_type == "int"
+            and disable_opt_rtn is None
+        ):
+            logger.warning("for INT8 RTN quantization, set `--disable_opt_rtn` as default.")
+            disable_opt_rtn = True
+        if disable_opt_rtn is None:
+            disable_otp_rtn = False
+
         self.minmax_lr = minmax_lr or self.lr
         self.enable_alg_ext = enable_alg_ext
         self.not_use_best_mse = not_use_best_mse

diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py
@@ -14,7 +14,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, fields
-from typing import Any, Callable, Union
+from typing import Any, Callable, Optional, Union
 
 import torch
 
@@ -32,7 +32,7 @@ def __init__(
         self,
         # tuning
         amp: bool = True,
-        disable_opt_rtn: bool = True,
+        disable_opt_rtn: Optional[bool] = True,
         enable_alg_ext: bool = False,
         enable_minmax_tuning: bool = True,
         enable_norm_bias_tuning: bool = False,
@@ -74,7 +74,7 @@ def __init__(
 
         Args:
             amp (bool): Whether to use automatic mixed precision (default is True).
-            disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to False.
+            disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to True.
             enable_alg_ext (bool, optional): Enable algorithm extension (primarily for INT2). Defaults to False.
             enable_minmax_tuning (bool, optional): Enable weight min-max tuning. Defaults to True.
             enable_norm_bias_tuning (bool): Whether to enable fast norm/layer_bias tuning.
@@ -247,7 +247,7 @@ def is_default(self):
 @dataclass
 class TuningExtraConfig(BaseExtraConfig):
     amp: bool = True
-    disable_opt_rtn: bool = True
+    disable_opt_rtn: Optional[bool] = True
     enable_alg_ext: bool = False
     enable_minmax_tuning: bool = True
     enable_norm_bias_tuning: bool = False

diff --git a/docs/step_by_step.md b/docs/step_by_step.md
@@ -393,7 +393,7 @@ We will try to optimize the RAM usage in the future. The RAM usage is about 1.1-
 | Qwen3-32B | W2A16/W4A16/W8A16 | OOM with 240G                   | ---                           | OOM with 240G                    | ---                              |
 | Qwen3-32B | MXFP4/MXFP8       | 160G                            | 200s * len of options         | 200G                             | 240s * len of options            |
 | Qwen3-32B | GGUF*             | 210G                            | 80s * len of options          | 200G                             | 60s * len of options             |
-</details> 
+</details>
 
 
 #### Limitations

diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,6 @@
 accelerate
 datasets
 numpy
-# packaging # for python version <= 3.9
 py-cpuinfo
 threadpoolctl
 torch