From f183de33062bb64ca5cb5a501666f0795ec93490 Mon Sep 17 00:00:00 2001 From: "Zhang, Weiwei1" Date: Thu, 8 Jan 2026 06:37:03 -0500 Subject: [PATCH 1/4] update disable_opt_rtn setting Signed-off-by: Zhang, Weiwei1 --- README.md | 4 +++- auto_round/__main__.py | 19 ++++++++++++++----- auto_round/alg_ext.py | 1 + auto_round/autoround.py | 3 ++- auto_round/compressors/base.py | 7 +++++-- auto_round/compressors/config.py | 5 +++-- 6 files changed, 28 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 335a8f33f..b2309a975 100644 --- a/README.md +++ b/README.md @@ -195,7 +195,8 @@ ar.quantize_and_save(output_dir="./qmodel", format="auto_round") ##### Algorithm Settings - **`enable_alg_ext` (bool)**: [Experimental Feature] Only for `iters>0`. Enable algorithm variants for specific schemes (e.g., MXFP4/W2A16) that could bring notable improvements. Default is `False`. -- **`disable_opt_rtn` (bool)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `None` (improved RTN enabled). + +- **`disable_opt_rtn` (bool|None)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `None`. If None, it defaults to `False` in most cases to improve accuracy, but may be set to `True` due to known issues. ##### Tuning Process Parameters - **`iters` (int)**: Number of tuning iterations (default is `200`). Common values: 0 (RTN mode), 50 (with lr=5e-3 recommended), 1000. Higher values increase accuracy but slow down tuning. @@ -355,3 +356,4 @@ Special thanks to open-source low precision libraries such as AutoGPTQ, AutoAWQ, ## 🌟 Support Us If you find AutoRound helpful, please ⭐ star the repo and share it with your community! + diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 6d95b4864..835c0818a 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -253,13 +253,21 @@ def __init__(self, *args, **kwargs): action="store_true", help="Enable PyTorch deterministic algorithms for reproducible results. ", ) - tuning.add_argument( + group = tuning.add_mutually_exclusive_group() + group.add_argument( "--disable_opt_rtn", - "--disable-opt-rtn", - action=argparse.BooleanOptionalAction, + action="store_const", + const=True, + dest="disable_opt_rtn", default=None, - help="Disable optimization for RTN (Round-To-Nearest) mode when iters=0. " - "RTN is fast but less accurate; keeping optimization enabled is recommended.", + help="Enable RTN-disable mode (less accurate, faster).", + ) + group.add_argument( + "--enable_opt_rtn", + action="store_const", + const=False, + dest="disable_opt_rtn", + help="Using optimized RTN.", ) scheme = self.add_argument_group("Scheme Arguments") @@ -927,3 +935,4 @@ def run_fast(): if __name__ == "__main__": run() + diff --git a/auto_round/alg_ext.py b/auto_round/alg_ext.py index dd188896a..b6cbfe724 100644 --- a/auto_round/alg_ext.py +++ b/auto_round/alg_ext.py @@ -1145,3 +1145,4 @@ def dq_wrapper_block(block, enable_minmax_tuning, enable_norm_bias_tuning, devic logger.warning_once(f"{m.__class__.__name__} is not supported") return quantized_layers, unquantized_layers + diff --git a/auto_round/autoround.py b/auto_round/autoround.py index 51c850c16..1902393b7 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -85,7 +85,7 @@ def __new__( enable_adam: bool = False, extra_config: ExtraConfig = None, enable_alg_ext: bool = None, - disable_opt_rtn: Optional[bool] = None, + disable_opt_rtn: Union[bool | None] = None, low_cpu_mem_usage: bool = False, **kwargs, ) -> BaseCompressor: @@ -661,3 +661,4 @@ def __init__( seed=seed, **kwargs, ) + diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index cd9fc516c..b4472dd72 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -189,7 +189,7 @@ def __init__( device_map: Union[str, torch.device, int, dict] = 0, enable_torch_compile: bool = False, enable_alg_ext: bool = False, - disable_opt_rtn: Optional[bool] = None, + disable_opt_rtn: [bool | None] = None, seed: int = 42, low_cpu_mem_usage: bool = False, **kwargs, @@ -397,9 +397,11 @@ def __init__( and self.data_type == "int" and disable_opt_rtn is None ): - logger.warning("for INT8 RTN quantization, set `--disable_opt_rtn` as default.") + logger.warning("For INT8 RTN quantization, set `--disable_opt_rtn` as default.") disable_opt_rtn = True if disable_opt_rtn is None: + if self.iters == 0: + logger.info("For the most RTN cases, set `--disable_opt_rtn` to False as default.") disable_otp_rtn = False self.minmax_lr = minmax_lr or self.lr @@ -3130,3 +3132,4 @@ def _sampling_inputs( class LLMCompressor(BaseCompressor): pass + diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py index 4bab246c3..d1c57d5ec 100644 --- a/auto_round/compressors/config.py +++ b/auto_round/compressors/config.py @@ -32,7 +32,7 @@ def __init__( self, # tuning amp: bool = True, - disable_opt_rtn: Optional[bool] = True, + disable_opt_rtn: [bool | None] = None, enable_alg_ext: bool = False, enable_minmax_tuning: bool = True, enable_norm_bias_tuning: bool = False, @@ -247,7 +247,7 @@ def is_default(self): @dataclass class TuningExtraConfig(BaseExtraConfig): amp: bool = True - disable_opt_rtn: Optional[bool] = True + disable_opt_rtn: [bool | None] = True enable_alg_ext: bool = False enable_minmax_tuning: bool = True enable_norm_bias_tuning: bool = False @@ -294,3 +294,4 @@ class DiffusionExtraConfig(BaseExtraConfig): guidance_scale: float = 7.5 num_inference_steps: int = 50 generator_seed: int = None + From a837e855dcff743b6170943ebc49790a7708bd83 Mon Sep 17 00:00:00 2001 From: "Zhang, Weiwei1" Date: Thu, 8 Jan 2026 06:42:07 -0500 Subject: [PATCH 2/4] refine docstring, fixtypo Signed-off-by: Zhang, Weiwei1 --- auto_round/__main__.py | 1 - auto_round/alg_ext.py | 1 - auto_round/autoround.py | 1 - auto_round/compressors/base.py | 1 - auto_round/compressors/config.py | 1 - 5 files changed, 5 deletions(-) diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 835c0818a..8636d482e 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -935,4 +935,3 @@ def run_fast(): if __name__ == "__main__": run() - diff --git a/auto_round/alg_ext.py b/auto_round/alg_ext.py index b6cbfe724..dd188896a 100644 --- a/auto_round/alg_ext.py +++ b/auto_round/alg_ext.py @@ -1145,4 +1145,3 @@ def dq_wrapper_block(block, enable_minmax_tuning, enable_norm_bias_tuning, devic logger.warning_once(f"{m.__class__.__name__} is not supported") return quantized_layers, unquantized_layers - diff --git a/auto_round/autoround.py b/auto_round/autoround.py index 1902393b7..82e435f29 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -661,4 +661,3 @@ def __init__( seed=seed, **kwargs, ) - diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index b4472dd72..20a7ba61f 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -3132,4 +3132,3 @@ def _sampling_inputs( class LLMCompressor(BaseCompressor): pass - diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py index d1c57d5ec..5bbf282c4 100644 --- a/auto_round/compressors/config.py +++ b/auto_round/compressors/config.py @@ -294,4 +294,3 @@ class DiffusionExtraConfig(BaseExtraConfig): guidance_scale: float = 7.5 num_inference_steps: int = 50 generator_seed: int = None - From 33d4cc92bf000a084f15f245d0f3e31b46558227 Mon Sep 17 00:00:00 2001 From: "Zhang, Weiwei1" Date: Thu, 8 Jan 2026 06:43:46 -0500 Subject: [PATCH 3/4] refine docstring Signed-off-by: Zhang, Weiwei1 --- auto_round/__main__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 8636d482e..0a17a05d0 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -260,14 +260,15 @@ def __init__(self, *args, **kwargs): const=True, dest="disable_opt_rtn", default=None, - help="Enable RTN-disable mode (less accurate, faster).", + help="Disable optimization for RTN (Round-To-Nearest) mode when iters=0. " + "RTN is fast but less accurate; keeping optimization enabled is recommended.", ) group.add_argument( "--enable_opt_rtn", action="store_const", const=False, dest="disable_opt_rtn", - help="Using optimized RTN.", + help="Enable optimization for RTN mode when iters=0.", ) scheme = self.add_argument_group("Scheme Arguments") From ec4267e428aea25e245e5016d23118e373d1fb14 Mon Sep 17 00:00:00 2001 From: "Zhang, Weiwei1" Date: Thu, 8 Jan 2026 07:39:20 -0500 Subject: [PATCH 4/4] fixtypo Signed-off-by: Zhang, Weiwei1 --- auto_round/autoround.py | 2 +- auto_round/compressors/base.py | 2 +- auto_round/compressors/config.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/auto_round/autoround.py b/auto_round/autoround.py index 82e435f29..6d69c01c3 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -85,7 +85,7 @@ def __new__( enable_adam: bool = False, extra_config: ExtraConfig = None, enable_alg_ext: bool = None, - disable_opt_rtn: Union[bool | None] = None, + disable_opt_rtn: bool | None = None, low_cpu_mem_usage: bool = False, **kwargs, ) -> BaseCompressor: diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index 20a7ba61f..a2af9e011 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -189,7 +189,7 @@ def __init__( device_map: Union[str, torch.device, int, dict] = 0, enable_torch_compile: bool = False, enable_alg_ext: bool = False, - disable_opt_rtn: [bool | None] = None, + disable_opt_rtn: bool | None = None, seed: int = 42, low_cpu_mem_usage: bool = False, **kwargs, diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py index 5bbf282c4..f7e0d92f2 100644 --- a/auto_round/compressors/config.py +++ b/auto_round/compressors/config.py @@ -32,7 +32,7 @@ def __init__( self, # tuning amp: bool = True, - disable_opt_rtn: [bool | None] = None, + disable_opt_rtn: bool | None = None, enable_alg_ext: bool = False, enable_minmax_tuning: bool = True, enable_norm_bias_tuning: bool = False, @@ -247,7 +247,7 @@ def is_default(self): @dataclass class TuningExtraConfig(BaseExtraConfig): amp: bool = True - disable_opt_rtn: [bool | None] = True + disable_opt_rtn: bool | None = True enable_alg_ext: bool = False enable_minmax_tuning: bool = True enable_norm_bias_tuning: bool = False