intel · xin3he · Jan 9, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026
diff --git a/test/README.md b/test/README.md
@@ -1,6 +1,6 @@
 # Unit Test (UT) Guide
 
-This project uses `pytest` for unit testing. All test cases are under the `test/` directory. Below is a simple guide for new users to write and run UTs:
+This project uses `pytest` for unit testing. All test cases are under the `test/` directory.
 
 ## 1. Environment Setup
 - Recommended Python 3.8 or above.
@@ -10,37 +10,160 @@ This project uses `pytest` for unit testing. All test cases are under the `test/
   pip install pytest
   ```
 
-## 2. Test Structure
-- Place your test files in the `test/` directory, and name them starting with `test_`.
-- You can refer to existing `test_*.py` files.
-- Common fixtures (such as `tiny_opt_model`, `opt_model`, `opt_tokenizer`, `dataloader`) and helper functions (such as `model_infer`) are defined in `confest.py` and `helpers.py` and can be imported directly.
-- Example:
-  ```python
-  # test_example.py
-    from ..helpers import model_infer
-
-    def test_model_infer(tiny_opt_model, opt_tokenizer):
-        result = model_infer(tiny_opt_model, opt_tokenizer, input_text="hello world")
-        assert result is not None
-  ```
+## 2. Test Directory Structure
 
-## 3. Running Tests
-- In the `test/` directory, run:
-  ```sh
-  pytest
-  ```
-- You can specify a single file or test case:
-  ```sh
-  pytest test_xxx.py
-  pytest -k "test_func_name"
-  ```
+Tests are organized by hardware backend (`test_cpu/`, `test_cuda/`) and functionality:
+
+- **core/** - Core AutoRound API and quantization workflows
+- **quantization/** - Quantization techniques (mixed-bit, MXFP, NVFP4, activation quant)
+- **export/** - Model serialization (GGUF, AutoGPTQ, AutoRound format)
+- **backends/** - Inference backends (Torch, Marlin, Triton, ExLlamaV2)
+- **models/** - Architecture-specific tests (MLLMs, VLMs, MoE, Diffusion)
+- **integrations/** - Third-party frameworks (vLLM, SGLang, LLMC, Transformers)
+- **schemes/** - Quantization scheme selection and configuration
+- **utils/** - Calibration datasets, logging, CLI, model loading
+- **advanced/** - Multi-GPU, FP8 input, custom pipelines
+
+## 3. Shared Test Utilities
+
+### conftest.py
+Pytest configuration file that:
+- Adds parent directory to `sys.path` for easy debugging without installation
+- Defines HPU-specific test options (`--mode=compile/lazy`)
+- Imports all fixtures from `fixtures.py`
+
+### fixtures.py
+Provides reusable pytest fixtures for testing:
+
+**Model Fixtures:**
+- `tiny_opt_model_path` - OPT-125M model with 2 layers (session scope)
+- `tiny_qwen_model_path` - Qwen-0.6B model with 2 layers
+- `tiny_lamini_model_path` - LaMini-GPT-124M with 2 layers
+- `tiny_gptj_model_path` - Tiny GPT-J model
+- `tiny_phi2_model_path` - Phi-2 model with 2 layers
+- `tiny_deepseek_v2_model_path` - DeepSeek-V2-Lite with 2 layers
+- `tiny_qwen_moe_model_path` - Qwen-1.5-MoE with 2 layers
+- `tiny_qwen_vl_model_path` - Qwen2-VL-2B with 2 layers (vision model)
+- `tiny_qwen_2_5_vl_model_path` - Qwen2.5-VL-3B with 2 layers
+
+**Data Fixtures:**
+- `dataloader` - Simple calibration dataloader with 4 text samples
+
+All model fixtures:
+- Use session scope to avoid reloading models for each test
+- Automatically save tiny models to `./tmp/` directory
+- Clean up temporary files after test session ends
+
+### helpers.py
+Utility functions for testing:
+
+**Model Path Resolution:**
+```python
+get_model_path(model_name)  # Automatically finds local or remote model path
+```
+
+**Predefined Model Paths:**
+```python
+opt_name_or_path  # facebook/opt-125m
+qwen_name_or_path  # Qwen/Qwen3-0.6B
+lamini_name_or_path  # MBZUAI/LaMini-GPT-124M
+qwen_vl_name_or_path  # Qwen/Qwen2-VL-2B-Instruct
+# ... and more
+```
+
+**Model Manipulation:**
+```python
+get_tiny_model(model_path, num_layers=2)  # Create tiny model by slicing layers
+save_tiny_model(model_path, save_path)  # Save tiny model to disk
+```
+
+**Model Inference:**
+```python
+model_infer(model, tokenizer, input_text)  # Run inference and return output
+is_model_outputs_similar(out1, out2)  # Compare two model outputs
+```
+
+**Data Utilities:**
+```python
+DataLoader()  # Simple dataloader for calibration datasets
+```
+
+## 4. Writing New Tests
+
+### Basic Example
+```python
+# test_cpu/quantization/test_new_method.py
+import pytest
+from auto_round import AutoRound
+from ...helpers import opt_name_or_path
+
+
+class TestNewQuantMethod:
+    def test_quantization(self, tiny_opt_model_path, dataloader):
+        """Test new quantization method."""
+        autoround = AutoRound(model=tiny_opt_model_path, bits=4, group_size=128, iters=2, dataset=dataloader)
+        autoround.quantize()
+        assert autoround is not None
+```
+
+### Using Helpers and Fixtures
+```python
+from ...helpers import model_infer, opt_name_or_path, get_model_path
+
+
+def test_model_inference(tiny_opt_model_path):
+    # Use predefined model path
+    model_name = opt_name_or_path
+
+    # Or resolve custom model path
+    custom_model = get_model_path("custom/model-name")
+
+    # Run inference using helper
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+
+    model = AutoModelForCausalLM.from_pretrained(tiny_opt_model_path)
+    tokenizer = AutoTokenizer.from_pretrained(tiny_opt_model_path)
+    output = model_infer(model, tokenizer, "Hello world")
+```
+
+### Placement Guidelines
+- **CPU-specific** → `test_cpu/<category>/`
+- **CUDA-specific** → `test_cuda/<category>/`
+- **Cross-platform** → Choose most relevant directory
+- Import from parent: `from ...helpers import ...`
+
+## 5. Running Tests
+
+```sh
+# Run all tests
+pytest
+
+# Run specific directory
+pytest test_cpu/quantization/
+
+# Run specific file
+pytest test_cpu/core/test_autoround.py
+
+# Run specific test
+pytest -k "test_layer_config"
+
+# Run with verbose output
+pytest -v -s
+```
 
-## 4. Debugging Tips
-- `confest.py` adds the parent directory to `sys.path`, so you can debug without installing the local package.
-- You can directly import project source code in your test cases.
+## 6. Hardware-Specific Requirements
+- **test_cpu/**: Install `pip install -r test_cpu/requirements.txt`
+- **test_cuda/**: Install `pip install -r test_cuda/requirements.txt`
+  - VLM: `pip install -r test_cuda/requirements_vlm.txt`
+  - Diffusion: `pip install -r test_cuda/requirements_diffusion.txt`
+  - LLMC: `pip install -r test_cuda/requirements_llmc.txt`
+  - SGLang: `pip install -r test_cuda/requirements_sglang.txt`
 
-## 5. Reference
-- Fixtures are defined in `confest.py` and `fixtures.py`
-- Helper functions are in `helpers.py`
+## 7. Contributing
+When adding new tests:
+1. Place in appropriate category subdirectory
+2. Use existing fixtures and helpers
+3. Clean up resources in teardown methods
+4. Use descriptive names and docstrings
 
-If you have any questions, feel free to open an issue.
+For questions, open an issue.
diff --git a/test/test_cpu/advanced/__init__.py b/test/test_cpu/advanced/__init__.py
diff --git a/test/test_cpu/backends/__init__.py b/test/test_cpu/backends/__init__.py
diff --git a/test/test_cpu/test_torch_backend.py → test/test_cpu/backends/test_torch_backend.py b/test/test_cpu/test_torch_backend.py → test/test_cpu/backends/test_torch_backend.py
@@ -8,7 +8,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.testing_utils import require_autogptq, require_gptqmodel
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 class TestAutoRoundTorchBackend:

diff --git a/test/test_cpu/core/__init__.py b/test/test_cpu/core/__init__.py
diff --git a/test/test_cpu/test_autoopt.py → test/test_cpu/core/test_autoopt.py b/test/test_cpu/test_autoopt.py → test/test_cpu/core/test_autoopt.py
diff --git a/test/test_cpu/test_autoround.py → test/test_cpu/core/test_autoround.py b/test/test_cpu/test_autoround.py → test/test_cpu/core/test_autoround.py
@@ -9,7 +9,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.utils import get_module
 
-from ..helpers import get_model_path, model_infer, opt_name_or_path, qwen_name_or_path
+from ...helpers import get_model_path, model_infer, opt_name_or_path, qwen_name_or_path
 
 
 class TestAutoRound:

diff --git a/test/test_cpu/test_autoround_acc.py → test/test_cpu/core/test_autoround_acc.py b/test/test_cpu/test_autoround_acc.py → test/test_cpu/core/test_autoround_acc.py
@@ -9,7 +9,7 @@
 
 from auto_round import AutoRound  # pylint: disable=E0401
 
-from ..helpers import gptj_name_or_path
+from ...helpers import gptj_name_or_path
 
 
 class TestAutoRound:

diff --git a/...est_cpu/test_autoround_export_to_itrex.py → ...pu/core/test_autoround_export_to_itrex.py b/...est_cpu/test_autoround_export_to_itrex.py → ...pu/core/test_autoround_export_to_itrex.py
@@ -8,7 +8,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, gptj_name_or_path
+from ...helpers import get_model_path, gptj_name_or_path
 
 
 class SimpleDataLoader:

diff --git a/test/test_cpu/test_init.py → test/test_cpu/core/test_init.py b/test/test_cpu/test_init.py → test/test_cpu/core/test_init.py
diff --git a/test/test_cpu/export/__init__.py b/test/test_cpu/export/__init__.py
diff --git a/test/test_cpu/test_export.py → test/test_cpu/export/test_export.py b/test/test_cpu/test_export.py → test/test_cpu/export/test_export.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, opt_name_or_path
+from ...helpers import get_model_path, opt_name_or_path
 
 
 def _get_folder_size(path: str) -> float:

diff --git a/test/test_cpu/test_gguf_format.py → test/test_cpu/export/test_gguf_format.py b/test/test_cpu/test_gguf_format.py → test/test_cpu/export/test_gguf_format.py
@@ -8,7 +8,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, get_tiny_model
+from ...helpers import get_model_path, get_tiny_model
 
 
 class TestGGUF:

diff --git a/test/test_cpu/integrations/__init__.py b/test/test_cpu/integrations/__init__.py
diff --git a/test/test_cpu/test_llmc_integration.py → ...cpu/integrations/test_llmc_integration.py b/test/test_cpu/test_llmc_integration.py → ...cpu/integrations/test_llmc_integration.py
diff --git a/test/test_cpu/test_llmcompressor.py → ...st_cpu/integrations/test_llmcompressor.py b/test/test_cpu/test_llmcompressor.py → ...st_cpu/integrations/test_llmcompressor.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, opt_name_or_path
+from ...helpers import get_model_path, opt_name_or_path
 
 
 class TestLLMC:

diff --git a/test/test_cpu/models/__init__.py b/test/test_cpu/models/__init__.py
diff --git a/test/test_cpu/test_block_names.py → test/test_cpu/models/test_block_names.py b/test/test_cpu/test_block_names.py → test/test_cpu/models/test_block_names.py
@@ -8,7 +8,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, lamini_name_or_path
+from ...helpers import get_model_path, lamini_name_or_path
 
 
 # ================= simple multimodal model =================

diff --git a/test/test_cpu/test_conv1d.py → test/test_cpu/models/test_conv1d.py b/test/test_cpu/test_conv1d.py → test/test_cpu/models/test_conv1d.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import lamini_name_or_path, model_infer
+from ...helpers import lamini_name_or_path, model_infer
 
 
 class TestQuantizationConv1d:

diff --git a/test/test_cpu/test_mllm.py → test/test_cpu/models/test_mllm.py b/test/test_cpu/test_mllm.py → test/test_cpu/models/test_mllm.py
@@ -5,7 +5,7 @@
 
 from auto_round import AutoRoundMLLM
 
-from ..helpers import get_model_path, opt_name_or_path
+from ...helpers import get_model_path, opt_name_or_path
 
 
 class FakeDataLoader:

diff --git a/test/test_cpu/test_moe_alignment.py → test/test_cpu/models/test_moe_alignment.py b/test/test_cpu/test_moe_alignment.py → test/test_cpu/models/test_moe_alignment.py
@@ -8,7 +8,7 @@
 from auto_round import AutoRound
 from auto_round.utils.model import get_module, set_amax_for_all_moe_layers
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 deepseek_v2_lite_path = get_model_path("deepseek-ai/DeepSeek-V2-Lite-Chat")
 

diff --git a/test/test_cpu/test_moe_model.py → test/test_cpu/models/test_moe_model.py b/test/test_cpu/test_moe_model.py → test/test_cpu/models/test_moe_model.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 gpt_oss_name_or_path = get_model_path("unsloth/gpt-oss-20b-BF16")
 llama4_name_or_path = get_model_path("meta-llama/Llama-4-Scout-17B-16E-Instruct")

diff --git a/test/test_cpu/quantization/__init__.py b/test/test_cpu/quantization/__init__.py
diff --git a/test/test_cpu/test_act_quantization.py → ...cpu/quantization/test_act_quantization.py b/test/test_cpu/test_act_quantization.py → ...cpu/quantization/test_act_quantization.py
diff --git a/test/test_cpu/test_asym.py → test/test_cpu/quantization/test_asym.py b/test/test_cpu/test_asym.py → test/test_cpu/quantization/test_asym.py
@@ -12,7 +12,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.utils import get_module
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 class LLMDataLoader:

diff --git a/test/test_cpu/test_mix_bits.py → test/test_cpu/quantization/test_mix_bits.py b/test/test_cpu/test_mix_bits.py → test/test_cpu/quantization/test_mix_bits.py
@@ -10,7 +10,7 @@
 from auto_round import AutoRound
 from auto_round.testing_utils import require_gptqmodel
 
-from ..helpers import opt_name_or_path
+from ...helpers import opt_name_or_path
 
 
 def _get_folder_size(path: str) -> float:

diff --git a/test/test_cpu/test_mx_quant_linear.py → ..._cpu/quantization/test_mx_quant_linear.py b/test/test_cpu/test_mx_quant_linear.py → ..._cpu/quantization/test_mx_quant_linear.py
diff --git a/test/test_cpu/test_mxfp_nvfp.py → test/test_cpu/quantization/test_mxfp_nvfp.py b/test/test_cpu/test_mxfp_nvfp.py → test/test_cpu/quantization/test_mxfp_nvfp.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import is_model_outputs_similar
+from ...helpers import is_model_outputs_similar
 
 
 def _get_folder_size(path: str) -> float:

diff --git a/test/test_cpu/test_mxfp_save_load.py → ...t_cpu/quantization/test_mxfp_save_load.py b/test/test_cpu/test_mxfp_save_load.py → ...t_cpu/quantization/test_mxfp_save_load.py
@@ -14,7 +14,7 @@
 from auto_round.inference.backend import MX_TENSOR_DATA_TYPES
 from auto_round.testing_utils import has_module
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 testing_scheme_name_lst = [
     AutoRoundExportFormat.MXFP8.value,

diff --git a/test/test_cpu/test_nvfp4_quant_linear.py → ...u/quantization/test_nvfp4_quant_linear.py b/test/test_cpu/test_nvfp4_quant_linear.py → ...u/quantization/test_nvfp4_quant_linear.py
diff --git a/test/test_cpu/schemes/__init__.py b/test/test_cpu/schemes/__init__.py
diff --git a/test/test_cpu/test_auto_scheme.py → test/test_cpu/schemes/test_auto_scheme.py b/test/test_cpu/test_auto_scheme.py → test/test_cpu/schemes/test_auto_scheme.py
diff --git a/test/test_cpu/test_scheme.py → test/test_cpu/schemes/test_scheme.py b/test/test_cpu/test_scheme.py → test/test_cpu/schemes/test_scheme.py
@@ -5,7 +5,7 @@
 from auto_round import AutoRound
 from auto_round.schemes import QuantizationScheme
 
-from ..helpers import get_model_path, get_tiny_model, opt_name_or_path, qwen_name_or_path
+from ...helpers import get_model_path, get_tiny_model, opt_name_or_path, qwen_name_or_path
 
 
 class TestAutoRound:

diff --git a/test/test_cpu/utils/__init__.py b/test/test_cpu/utils/__init__.py
diff --git a/test/test_cpu/test_alg_ext.py → test/test_cpu/utils/test_alg_ext.py b/test/test_cpu/test_alg_ext.py → test/test_cpu/utils/test_alg_ext.py
@@ -1,6 +1,6 @@
 from auto_round import AutoRound
 
-from ..helpers import qwen_name_or_path
+from ...helpers import qwen_name_or_path
 
 
 class TestAlgExt:

diff --git a/test/test_cpu/test_calib_dataset.py → test/test_cpu/utils/test_calib_dataset.py b/test/test_cpu/test_calib_dataset.py → test/test_cpu/utils/test_calib_dataset.py
@@ -8,7 +8,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, opt_name_or_path
+from ...helpers import get_model_path, opt_name_or_path
 
 
 class TestLocalCalibDataset:

diff --git a/test/test_cpu/test_cli_usage.py → test/test_cpu/utils/test_cli_usage.py b/test/test_cpu/test_cli_usage.py → test/test_cpu/utils/test_cli_usage.py
@@ -2,7 +2,7 @@
 import shutil
 import sys
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 
 class TestAutoRoundCmd:

diff --git a/test/test_cpu/test_generation.py → test/test_cpu/utils/test_generation.py b/test/test_cpu/test_generation.py → test/test_cpu/utils/test_generation.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import opt_name_or_path
+from ...helpers import opt_name_or_path
 
 
 class TestAutoRoundFormatGeneration:

diff --git a/test/test_cpu/test_load_awq_gptq.py → test/test_cpu/utils/test_load_awq_gptq.py b/test/test_cpu/test_load_awq_gptq.py → test/test_cpu/utils/test_load_awq_gptq.py
@@ -3,7 +3,7 @@
 import pytest
 from transformers import AutoModelForCausalLM, AutoRoundConfig, AutoTokenizer
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 class TestAutoRound:

diff --git a/test/test_cpu/test_logger.py → test/test_cpu/utils/test_logger.py b/test/test_cpu/test_logger.py → test/test_cpu/utils/test_logger.py