From 52e5bf68bf02e5329dc0792e93b6f94d96e1eeaa Mon Sep 17 00:00:00 2001
From: Weiwei <weiwei1.zhang@intel.com>
Date: Thu, 8 Jan 2026 17:18:45 +0800
Subject: [PATCH] =?UTF-8?q?Revert=20"set=20disable=5Fopt=5Frtn=20to=20opti?=
 =?UTF-8?q?onal=20bool=20and=20change=20default=20value=20=20to=20Non?=
 =?UTF-8?q?=E2=80=A6"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit bef28c9a116f4ea871dc0d7411785c6084eda0cb.
---
 README.md                        |  2 +-
 auto_round/__main__.py           |  4 +---
 auto_round/autoround.py          |  8 ++++----
 auto_round/compressors/base.py   | 17 ++---------------
 auto_round/compressors/config.py |  8 ++++----
 docs/step_by_step.md             |  2 +-
 requirements.txt                 |  1 +
 7 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 335a8f33f..f24f28ddf 100644
--- a/README.md
+++ b/README.md
@@ -195,7 +195,7 @@ ar.quantize_and_save(output_dir="./qmodel", format="auto_round")
 
 ##### Algorithm Settings
 - **`enable_alg_ext` (bool)**: [Experimental Feature] Only for `iters>0`. Enable algorithm variants for specific schemes (e.g., MXFP4/W2A16) that could bring notable improvements. Default is `False`.
-- **`disable_opt_rtn` (bool)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `None` (improved RTN enabled).
+- **`disable_opt_rtn` (bool)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `False` (improved RTN enabled).
 
 ##### Tuning Process Parameters
 - **`iters` (int)**: Number of tuning iterations (default is `200`). Common values: 0 (RTN mode), 50 (with lr=5e-3 recommended), 1000. Higher values increase accuracy but slow down tuning.
diff --git a/auto_round/__main__.py b/auto_round/__main__.py
index 6d95b4864..adbd8fe02 100644
--- a/auto_round/__main__.py
+++ b/auto_round/__main__.py
@@ -255,9 +255,7 @@ def __init__(self, *args, **kwargs):
         )
         tuning.add_argument(
             "--disable_opt_rtn",
-            "--disable-opt-rtn",
-            action=argparse.BooleanOptionalAction,
-            default=None,
+            action="store_true",
             help="Disable optimization for RTN (Round-To-Nearest) mode when iters=0. "
             "RTN is fast but less accurate; keeping optimization enabled is recommended.",
         )
diff --git a/auto_round/autoround.py b/auto_round/autoround.py
index 51c850c16..050281983 100644
--- a/auto_round/autoround.py
+++ b/auto_round/autoround.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Optional, Union
+from typing import TYPE_CHECKING, Union
 
 import torch
 
@@ -85,7 +85,7 @@ def __new__(
         enable_adam: bool = False,
         extra_config: ExtraConfig = None,
         enable_alg_ext: bool = None,
-        disable_opt_rtn: Optional[bool] = None,
+        disable_opt_rtn: bool = None,
         low_cpu_mem_usage: bool = False,
         **kwargs,
     ) -> BaseCompressor:
@@ -112,7 +112,7 @@ def __new__(
             enable_alg_ext (bool, optional): Enable algorithm extension (primarily for INT2)
                                              for better accuracy. Defaults to False.
             disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0) for fast quatnziation
-                                              with lower accuracy. Defaults to None.
+                                              with lower accuracy. Defaults to False.
             low_cpu_mem_usage (bool, optional): Lower CPU memory mode. Defaults to False.
 
             bits (int, optional): Weight quantization bits. Defaults to 4.
@@ -291,7 +291,7 @@ class AutoRoundLLM(LLMCompressor):
         act_dynamic (bool, optional): Dynamic activation quantization. Defaults to True.
         enable_torch_compile (bool, optional): Enable torch.compile for quant blocks/layers. Defaults to False.
         device_map (str | dict, optional): Device placement map. Defaults to None.
-        disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to None.
+        disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to False.
         enable_alg_ext (bool, optional): Enable algorithm extension (primarily for INT2). Defaults to False.
         **kwargs: Backward compatible options:
             - enable_alg_ext, quant_lm_head, lr, lr_scheduler, sampler, not_use_best_mse, dynamic_max_gap,
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index cd9fc516c..2665d74ef 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -189,7 +189,7 @@ def __init__(
         device_map: Union[str, torch.device, int, dict] = 0,
         enable_torch_compile: bool = False,
         enable_alg_ext: bool = False,
-        disable_opt_rtn: Optional[bool] = None,
+        disable_opt_rtn: bool = False,
         seed: int = 42,
         low_cpu_mem_usage: bool = False,
         **kwargs,
@@ -226,7 +226,7 @@ def __init__(
             act_dynamic (bool, optional): Dynamic activation quantization. Defaults to True.
             enable_torch_compile (bool, optional): Enable torch.compile for quant blocks/layers. Defaults to False.
             device_map (str | dict, optional): Device placement map. Defaults to None.
-            disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to None.
+            disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to False.
             enable_alg_ext (bool, optional): Enable algorithm extension (primarily for INT2). Defaults to False.
             **kwargs: Backward compatible options:
                 - enable_alg_ext, quant_lm_head, lr, lr_scheduler, not_use_best_mse, dynamic_max_gap,
@@ -389,19 +389,6 @@ def __init__(
                 "for bits <= 2, it is recommended to enable `auto-round-best` " "and turn on `--enable_alg_ext` "
             )
 
-        # Automatically adjust the disable_opt_rtn option if the user does not explicitly set it.
-        if (
-            self.bits >= 8
-            and self.act_bits >= 16
-            and self.iters == 0
-            and self.data_type == "int"
-            and disable_opt_rtn is None
-        ):
-            logger.warning("for INT8 RTN quantization, set `--disable_opt_rtn` as default.")
-            disable_opt_rtn = True
-        if disable_opt_rtn is None:
-            disable_otp_rtn = False
-
         self.minmax_lr = minmax_lr or self.lr
         self.enable_alg_ext = enable_alg_ext
         self.not_use_best_mse = not_use_best_mse
diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py
index 4bab246c3..7c9398a91 100644
--- a/auto_round/compressors/config.py
+++ b/auto_round/compressors/config.py
@@ -14,7 +14,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, fields
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable, Union
 
 import torch
 
@@ -32,7 +32,7 @@ def __init__(
         self,
         # tuning
         amp: bool = True,
-        disable_opt_rtn: Optional[bool] = True,
+        disable_opt_rtn: bool = True,
         enable_alg_ext: bool = False,
         enable_minmax_tuning: bool = True,
         enable_norm_bias_tuning: bool = False,
@@ -74,7 +74,7 @@ def __init__(
 
         Args:
             amp (bool): Whether to use automatic mixed precision (default is True).
-            disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to True.
+            disable_opt_rtn (bool, optional): Disable RTN-mode optimization (iters=0). Defaults to False.
             enable_alg_ext (bool, optional): Enable algorithm extension (primarily for INT2). Defaults to False.
             enable_minmax_tuning (bool, optional): Enable weight min-max tuning. Defaults to True.
             enable_norm_bias_tuning (bool): Whether to enable fast norm/layer_bias tuning.
@@ -247,7 +247,7 @@ def is_default(self):
 @dataclass
 class TuningExtraConfig(BaseExtraConfig):
     amp: bool = True
-    disable_opt_rtn: Optional[bool] = True
+    disable_opt_rtn: bool = True
     enable_alg_ext: bool = False
     enable_minmax_tuning: bool = True
     enable_norm_bias_tuning: bool = False
diff --git a/docs/step_by_step.md b/docs/step_by_step.md
index 260980895..8163ac394 100644
--- a/docs/step_by_step.md
+++ b/docs/step_by_step.md
@@ -393,7 +393,7 @@ We will try to optimize the RAM usage in the future. The RAM usage is about 1.1-
 | Qwen3-32B | W2A16/W4A16/W8A16 | OOM with 240G                   | ---                           | OOM with 240G                    | ---                              |
 | Qwen3-32B | MXFP4/MXFP8       | 160G                            | 200s * len of options         | 200G                             | 240s * len of options            |
 | Qwen3-32B | GGUF*             | 210G                            | 80s * len of options          | 200G                             | 60s * len of options             |
-</details>
+</details> 
 
 
 #### Limitations
diff --git a/requirements.txt b/requirements.txt
index b84214f51..4f348bfdc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,6 +2,7 @@
 accelerate
 datasets
 numpy
+# packaging # for python version <= 3.9
 py-cpuinfo
 threadpoolctl
 torch