From f183de33062bb64ca5cb5a501666f0795ec93490 Mon Sep 17 00:00:00 2001
From: "Zhang, Weiwei1" <weiwei1.zhang@intel.com>
Date: Thu, 8 Jan 2026 06:37:03 -0500
Subject: [PATCH 1/4] update disable_opt_rtn setting

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
---
 README.md                        |  4 +++-
 auto_round/__main__.py           | 19 ++++++++++++++-----
 auto_round/alg_ext.py            |  1 +
 auto_round/autoround.py          |  3 ++-
 auto_round/compressors/base.py   |  7 +++++--
 auto_round/compressors/config.py |  5 +++--
 6 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 335a8f33f..b2309a975 100644
--- a/README.md
+++ b/README.md
@@ -195,7 +195,8 @@ ar.quantize_and_save(output_dir="./qmodel", format="auto_round")
 
 ##### Algorithm Settings
 - **`enable_alg_ext` (bool)**: [Experimental Feature] Only for `iters>0`. Enable algorithm variants for specific schemes (e.g., MXFP4/W2A16) that could bring notable improvements. Default is `False`.
-- **`disable_opt_rtn` (bool)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `None` (improved RTN enabled).
+
+- **`disable_opt_rtn` (bool|None)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `None`. If None, it defaults to `False` in most cases to improve accuracy, but may be set to `True` due to known issues.
 
 ##### Tuning Process Parameters
 - **`iters` (int)**: Number of tuning iterations (default is `200`). Common values: 0 (RTN mode), 50 (with lr=5e-3 recommended), 1000. Higher values increase accuracy but slow down tuning.
@@ -355,3 +356,4 @@ Special thanks to open-source low precision libraries such as AutoGPTQ, AutoAWQ,
 
 ## 🌟 Support Us
 If you find AutoRound helpful, please ⭐ star the repo and share it with your community!
+
diff --git a/auto_round/__main__.py b/auto_round/__main__.py
index 6d95b4864..835c0818a 100644
--- a/auto_round/__main__.py
+++ b/auto_round/__main__.py
@@ -253,13 +253,21 @@ def __init__(self, *args, **kwargs):
             action="store_true",
             help="Enable PyTorch deterministic algorithms for reproducible results. ",
         )
-        tuning.add_argument(
+        group = tuning.add_mutually_exclusive_group()
+        group.add_argument(
             "--disable_opt_rtn",
-            "--disable-opt-rtn",
-            action=argparse.BooleanOptionalAction,
+            action="store_const",
+            const=True,
+            dest="disable_opt_rtn",
             default=None,
-            help="Disable optimization for RTN (Round-To-Nearest) mode when iters=0. "
-            "RTN is fast but less accurate; keeping optimization enabled is recommended.",
+            help="Enable RTN-disable mode (less accurate, faster).",
+        )
+        group.add_argument(
+            "--enable_opt_rtn",
+            action="store_const",
+            const=False,
+            dest="disable_opt_rtn",
+            help="Using optimized RTN.",
         )
 
         scheme = self.add_argument_group("Scheme Arguments")
@@ -927,3 +935,4 @@ def run_fast():
 
 if __name__ == "__main__":
     run()
+
diff --git a/auto_round/alg_ext.py b/auto_round/alg_ext.py
index dd188896a..b6cbfe724 100644
--- a/auto_round/alg_ext.py
+++ b/auto_round/alg_ext.py
@@ -1145,3 +1145,4 @@ def dq_wrapper_block(block, enable_minmax_tuning, enable_norm_bias_tuning, devic
                     logger.warning_once(f"{m.__class__.__name__} is not supported")
 
     return quantized_layers, unquantized_layers
+
diff --git a/auto_round/autoround.py b/auto_round/autoround.py
index 51c850c16..1902393b7 100644
--- a/auto_round/autoround.py
+++ b/auto_round/autoround.py
@@ -85,7 +85,7 @@ def __new__(
         enable_adam: bool = False,
         extra_config: ExtraConfig = None,
         enable_alg_ext: bool = None,
-        disable_opt_rtn: Optional[bool] = None,
+        disable_opt_rtn: Union[bool | None] = None,
         low_cpu_mem_usage: bool = False,
         **kwargs,
     ) -> BaseCompressor:
@@ -661,3 +661,4 @@ def __init__(
             seed=seed,
             **kwargs,
         )
+
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index cd9fc516c..b4472dd72 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -189,7 +189,7 @@ def __init__(
         device_map: Union[str, torch.device, int, dict] = 0,
         enable_torch_compile: bool = False,
         enable_alg_ext: bool = False,
-        disable_opt_rtn: Optional[bool] = None,
+        disable_opt_rtn: [bool | None] = None,
         seed: int = 42,
         low_cpu_mem_usage: bool = False,
         **kwargs,
@@ -397,9 +397,11 @@ def __init__(
             and self.data_type == "int"
             and disable_opt_rtn is None
         ):
-            logger.warning("for INT8 RTN quantization, set `--disable_opt_rtn` as default.")
+            logger.warning("For INT8 RTN quantization, set `--disable_opt_rtn` as default.")
             disable_opt_rtn = True
         if disable_opt_rtn is None:
+            if self.iters == 0:
+                logger.info("For the most RTN cases, set `--disable_opt_rtn` to False as default.")
             disable_otp_rtn = False
 
         self.minmax_lr = minmax_lr or self.lr
@@ -3130,3 +3132,4 @@ def _sampling_inputs(
 
 class LLMCompressor(BaseCompressor):
     pass
+
diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py
index 4bab246c3..d1c57d5ec 100644
--- a/auto_round/compressors/config.py
+++ b/auto_round/compressors/config.py
@@ -32,7 +32,7 @@ def __init__(
         self,
         # tuning
         amp: bool = True,
-        disable_opt_rtn: Optional[bool] = True,
+        disable_opt_rtn: [bool | None] = None,
         enable_alg_ext: bool = False,
         enable_minmax_tuning: bool = True,
         enable_norm_bias_tuning: bool = False,
@@ -247,7 +247,7 @@ def is_default(self):
 @dataclass
 class TuningExtraConfig(BaseExtraConfig):
     amp: bool = True
-    disable_opt_rtn: Optional[bool] = True
+    disable_opt_rtn: [bool | None] = True
     enable_alg_ext: bool = False
     enable_minmax_tuning: bool = True
     enable_norm_bias_tuning: bool = False
@@ -294,3 +294,4 @@ class DiffusionExtraConfig(BaseExtraConfig):
     guidance_scale: float = 7.5
     num_inference_steps: int = 50
     generator_seed: int = None
+

From a837e855dcff743b6170943ebc49790a7708bd83 Mon Sep 17 00:00:00 2001
From: "Zhang, Weiwei1" <weiwei1.zhang@intel.com>
Date: Thu, 8 Jan 2026 06:42:07 -0500
Subject: [PATCH 2/4] refine docstring, fixtypo

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
---
 auto_round/__main__.py           | 1 -
 auto_round/alg_ext.py            | 1 -
 auto_round/autoround.py          | 1 -
 auto_round/compressors/base.py   | 1 -
 auto_round/compressors/config.py | 1 -
 5 files changed, 5 deletions(-)

diff --git a/auto_round/__main__.py b/auto_round/__main__.py
index 835c0818a..8636d482e 100644
--- a/auto_round/__main__.py
+++ b/auto_round/__main__.py
@@ -935,4 +935,3 @@ def run_fast():
 
 if __name__ == "__main__":
     run()
-
diff --git a/auto_round/alg_ext.py b/auto_round/alg_ext.py
index b6cbfe724..dd188896a 100644
--- a/auto_round/alg_ext.py
+++ b/auto_round/alg_ext.py
@@ -1145,4 +1145,3 @@ def dq_wrapper_block(block, enable_minmax_tuning, enable_norm_bias_tuning, devic
                     logger.warning_once(f"{m.__class__.__name__} is not supported")
 
     return quantized_layers, unquantized_layers
-
diff --git a/auto_round/autoround.py b/auto_round/autoround.py
index 1902393b7..82e435f29 100644
--- a/auto_round/autoround.py
+++ b/auto_round/autoround.py
@@ -661,4 +661,3 @@ def __init__(
             seed=seed,
             **kwargs,
         )
-
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index b4472dd72..20a7ba61f 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -3132,4 +3132,3 @@ def _sampling_inputs(
 
 class LLMCompressor(BaseCompressor):
     pass
-
diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py
index d1c57d5ec..5bbf282c4 100644
--- a/auto_round/compressors/config.py
+++ b/auto_round/compressors/config.py
@@ -294,4 +294,3 @@ class DiffusionExtraConfig(BaseExtraConfig):
     guidance_scale: float = 7.5
     num_inference_steps: int = 50
     generator_seed: int = None
-

From 33d4cc92bf000a084f15f245d0f3e31b46558227 Mon Sep 17 00:00:00 2001
From: "Zhang, Weiwei1" <weiwei1.zhang@intel.com>
Date: Thu, 8 Jan 2026 06:43:46 -0500
Subject: [PATCH 3/4] refine docstring

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
---
 auto_round/__main__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/auto_round/__main__.py b/auto_round/__main__.py
index 8636d482e..0a17a05d0 100644
--- a/auto_round/__main__.py
+++ b/auto_round/__main__.py
@@ -260,14 +260,15 @@ def __init__(self, *args, **kwargs):
             const=True,
             dest="disable_opt_rtn",
             default=None,
-            help="Enable RTN-disable mode (less accurate, faster).",
+            help="Disable optimization for RTN (Round-To-Nearest) mode when iters=0. "
+            "RTN is fast but less accurate; keeping optimization enabled is recommended.",
         )
         group.add_argument(
             "--enable_opt_rtn",
             action="store_const",
             const=False,
             dest="disable_opt_rtn",
-            help="Using optimized RTN.",
+            help="Enable optimization for RTN mode when iters=0.",
         )
 
         scheme = self.add_argument_group("Scheme Arguments")

From ec4267e428aea25e245e5016d23118e373d1fb14 Mon Sep 17 00:00:00 2001
From: "Zhang, Weiwei1" <weiwei1.zhang@intel.com>
Date: Thu, 8 Jan 2026 07:39:20 -0500
Subject: [PATCH 4/4] fixtypo

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
---
 auto_round/autoround.py          | 2 +-
 auto_round/compressors/base.py   | 2 +-
 auto_round/compressors/config.py | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/auto_round/autoround.py b/auto_round/autoround.py
index 82e435f29..6d69c01c3 100644
--- a/auto_round/autoround.py
+++ b/auto_round/autoround.py
@@ -85,7 +85,7 @@ def __new__(
         enable_adam: bool = False,
         extra_config: ExtraConfig = None,
         enable_alg_ext: bool = None,
-        disable_opt_rtn: Union[bool | None] = None,
+        disable_opt_rtn: bool | None = None,
         low_cpu_mem_usage: bool = False,
         **kwargs,
     ) -> BaseCompressor:
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index 20a7ba61f..a2af9e011 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -189,7 +189,7 @@ def __init__(
         device_map: Union[str, torch.device, int, dict] = 0,
         enable_torch_compile: bool = False,
         enable_alg_ext: bool = False,
-        disable_opt_rtn: [bool | None] = None,
+        disable_opt_rtn: bool | None = None,
         seed: int = 42,
         low_cpu_mem_usage: bool = False,
         **kwargs,
diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py
index 5bbf282c4..f7e0d92f2 100644
--- a/auto_round/compressors/config.py
+++ b/auto_round/compressors/config.py
@@ -32,7 +32,7 @@ def __init__(
         self,
         # tuning
         amp: bool = True,
-        disable_opt_rtn: [bool | None] = None,
+        disable_opt_rtn: bool | None = None,
         enable_alg_ext: bool = False,
         enable_minmax_tuning: bool = True,
         enable_norm_bias_tuning: bool = False,
@@ -247,7 +247,7 @@ def is_default(self):
 @dataclass
 class TuningExtraConfig(BaseExtraConfig):
     amp: bool = True
-    disable_opt_rtn: [bool | None] = True
+    disable_opt_rtn: bool | None = True
     enable_alg_ext: bool = False
     enable_minmax_tuning: bool = True
     enable_norm_bias_tuning: bool = False