Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ ar.quantize_and_save(output_dir="./qmodel", format="auto_round")

##### Algorithm Settings
- **`enable_alg_ext` (bool)**: [Experimental Feature] Only for `iters>0`. Enable algorithm variants for specific schemes (e.g., MXFP4/W2A16) that could bring notable improvements. Default is `False`.
- **`disable_opt_rtn` (bool)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `None` (improved RTN enabled).

- **`disable_opt_rtn` (bool|None)**: Use pure RTN mode for specific schemes (e.g., GGUF and WOQ). Default is `None`. If None, it defaults to `False` in most cases to improve accuracy, but may be set to `True` due to known issues.

##### Tuning Process Parameters
- **`iters` (int)**: Number of tuning iterations (default is `200`). Common values: 0 (RTN mode), 50 (with lr=5e-3 recommended), 1000. Higher values increase accuracy but slow down tuning.
Expand Down Expand Up @@ -355,3 +356,4 @@ Special thanks to open-source low precision libraries such as AutoGPTQ, AutoAWQ,

## 🌟 Support Us
If you find AutoRound helpful, please ⭐ star the repo and share it with your community!

15 changes: 12 additions & 3 deletions auto_round/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,14 +253,23 @@ def __init__(self, *args, **kwargs):
action="store_true",
help="Enable PyTorch deterministic algorithms for reproducible results. ",
)
tuning.add_argument(
group = tuning.add_mutually_exclusive_group()
group.add_argument(
"--disable_opt_rtn",
"--disable-opt-rtn",
action=argparse.BooleanOptionalAction,
action="store_const",
const=True,
dest="disable_opt_rtn",
default=None,
help="Disable optimization for RTN (Round-To-Nearest) mode when iters=0. "
"RTN is fast but less accurate; keeping optimization enabled is recommended.",
)
group.add_argument(
"--enable_opt_rtn",
action="store_const",
const=False,
dest="disable_opt_rtn",
help="Enable optimization for RTN mode when iters=0.",
)

scheme = self.add_argument_group("Scheme Arguments")
scheme.add_argument("--bits", default=None, type=int, help="Number of bits for weight quantization. ")
Expand Down
2 changes: 1 addition & 1 deletion auto_round/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __new__(
enable_adam: bool = False,
extra_config: ExtraConfig = None,
enable_alg_ext: bool = None,
disable_opt_rtn: Optional[bool] = None,
disable_opt_rtn: Union[bool | None] = None,
low_cpu_mem_usage: bool = False,
**kwargs,
) -> BaseCompressor:
Expand Down
6 changes: 4 additions & 2 deletions auto_round/compressors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def __init__(
device_map: Union[str, torch.device, int, dict] = 0,
enable_torch_compile: bool = False,
enable_alg_ext: bool = False,
disable_opt_rtn: Optional[bool] = None,
disable_opt_rtn: [bool | None] = None,
seed: int = 42,
low_cpu_mem_usage: bool = False,
**kwargs,
Expand Down Expand Up @@ -397,9 +397,11 @@ def __init__(
and self.data_type == "int"
and disable_opt_rtn is None
):
logger.warning("for INT8 RTN quantization, set `--disable_opt_rtn` as default.")
logger.warning("For INT8 RTN quantization, set `--disable_opt_rtn` as default.")
disable_opt_rtn = True
if disable_opt_rtn is None:
if self.iters == 0:
logger.info("For the most RTN cases, set `--disable_opt_rtn` to False as default.")
disable_otp_rtn = False

self.minmax_lr = minmax_lr or self.lr
Expand Down
4 changes: 2 additions & 2 deletions auto_round/compressors/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(
self,
# tuning
amp: bool = True,
disable_opt_rtn: Optional[bool] = True,
disable_opt_rtn: [bool | None] = None,
enable_alg_ext: bool = False,
enable_minmax_tuning: bool = True,
enable_norm_bias_tuning: bool = False,
Expand Down Expand Up @@ -247,7 +247,7 @@ def is_default(self):
@dataclass
class TuningExtraConfig(BaseExtraConfig):
amp: bool = True
disable_opt_rtn: Optional[bool] = True
disable_opt_rtn: [bool | None] = True
enable_alg_ext: bool = False
enable_minmax_tuning: bool = True
enable_norm_bias_tuning: bool = False
Expand Down