diff --git a/test/README.md b/test/README.md
index 9ccca0017..c17170de5 100644
--- a/test/README.md
+++ b/test/README.md
@@ -1,6 +1,6 @@
 # Unit Test (UT) Guide
 
-This project uses `pytest` for unit testing. All test cases are under the `test/` directory. Below is a simple guide for new users to write and run UTs:
+This project uses `pytest` for unit testing. All test cases are under the `test/` directory.
 
 ## 1. Environment Setup
 - Recommended Python 3.8 or above.
@@ -10,37 +10,160 @@ This project uses `pytest` for unit testing. All test cases are under the `test/
   pip install pytest
   ```
 
-## 2. Test Structure
-- Place your test files in the `test/` directory, and name them starting with `test_`.
-- You can refer to existing `test_*.py` files.
-- Common fixtures (such as `tiny_opt_model`, `opt_model`, `opt_tokenizer`, `dataloader`) and helper functions (such as `model_infer`) are defined in `confest.py` and `helpers.py` and can be imported directly.
-- Example:
-  ```python
-  # test_example.py
-    from ..helpers import model_infer
-
-    def test_model_infer(tiny_opt_model, opt_tokenizer):
-        result = model_infer(tiny_opt_model, opt_tokenizer, input_text="hello world")
-        assert result is not None
-  ```
+## 2. Test Directory Structure
 
-## 3. Running Tests
-- In the `test/` directory, run:
-  ```sh
-  pytest
-  ```
-- You can specify a single file or test case:
-  ```sh
-  pytest test_xxx.py
-  pytest -k "test_func_name"
-  ```
+Tests are organized by hardware backend (`test_cpu/`, `test_cuda/`) and functionality:
+
+- **core/** - Core AutoRound API and quantization workflows
+- **quantization/** - Quantization techniques (mixed-bit, MXFP, NVFP4, activation quant)
+- **export/** - Model serialization (GGUF, AutoGPTQ, AutoRound format)
+- **backends/** - Inference backends (Torch, Marlin, Triton, ExLlamaV2)
+- **models/** - Architecture-specific tests (MLLMs, VLMs, MoE, Diffusion)
+- **integrations/** - Third-party frameworks (vLLM, SGLang, LLMC, Transformers)
+- **schemes/** - Quantization scheme selection and configuration
+- **utils/** - Calibration datasets, logging, CLI, model loading
+- **advanced/** - Multi-GPU, FP8 input, custom pipelines
+
+## 3. Shared Test Utilities
+
+### conftest.py
+Pytest configuration file that:
+- Adds parent directory to `sys.path` for easy debugging without installation
+- Defines HPU-specific test options (`--mode=compile/lazy`)
+- Imports all fixtures from `fixtures.py`
+
+### fixtures.py
+Provides reusable pytest fixtures for testing:
+
+**Model Fixtures:**
+- `tiny_opt_model_path` - OPT-125M model with 2 layers (session scope)
+- `tiny_qwen_model_path` - Qwen-0.6B model with 2 layers
+- `tiny_lamini_model_path` - LaMini-GPT-124M with 2 layers
+- `tiny_gptj_model_path` - Tiny GPT-J model
+- `tiny_phi2_model_path` - Phi-2 model with 2 layers
+- `tiny_deepseek_v2_model_path` - DeepSeek-V2-Lite with 2 layers
+- `tiny_qwen_moe_model_path` - Qwen-1.5-MoE with 2 layers
+- `tiny_qwen_vl_model_path` - Qwen2-VL-2B with 2 layers (vision model)
+- `tiny_qwen_2_5_vl_model_path` - Qwen2.5-VL-3B with 2 layers
+
+**Data Fixtures:**
+- `dataloader` - Simple calibration dataloader with 4 text samples
+
+All model fixtures:
+- Use session scope to avoid reloading models for each test
+- Automatically save tiny models to `./tmp/` directory
+- Clean up temporary files after test session ends
+
+### helpers.py
+Utility functions for testing:
+
+**Model Path Resolution:**
+```python
+get_model_path(model_name)  # Automatically finds local or remote model path
+```
+
+**Predefined Model Paths:**
+```python
+opt_name_or_path  # facebook/opt-125m
+qwen_name_or_path  # Qwen/Qwen3-0.6B
+lamini_name_or_path  # MBZUAI/LaMini-GPT-124M
+qwen_vl_name_or_path  # Qwen/Qwen2-VL-2B-Instruct
+# ... and more
+```
+
+**Model Manipulation:**
+```python
+get_tiny_model(model_path, num_layers=2)  # Create tiny model by slicing layers
+save_tiny_model(model_path, save_path)  # Save tiny model to disk
+```
+
+**Model Inference:**
+```python
+model_infer(model, tokenizer, input_text)  # Run inference and return output
+is_model_outputs_similar(out1, out2)  # Compare two model outputs
+```
+
+**Data Utilities:**
+```python
+DataLoader()  # Simple dataloader for calibration datasets
+```
+
+## 4. Writing New Tests
+
+### Basic Example
+```python
+# test_cpu/quantization/test_new_method.py
+import pytest
+from auto_round import AutoRound
+from ...helpers import opt_name_or_path
+
+
+class TestNewQuantMethod:
+    def test_quantization(self, tiny_opt_model_path, dataloader):
+        """Test new quantization method."""
+        autoround = AutoRound(model=tiny_opt_model_path, bits=4, group_size=128, iters=2, dataset=dataloader)
+        autoround.quantize()
+        assert autoround is not None
+```
+
+### Using Helpers and Fixtures
+```python
+from ...helpers import model_infer, opt_name_or_path, get_model_path
+
+
+def test_model_inference(tiny_opt_model_path):
+    # Use predefined model path
+    model_name = opt_name_or_path
+
+    # Or resolve custom model path
+    custom_model = get_model_path("custom/model-name")
+
+    # Run inference using helper
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+
+    model = AutoModelForCausalLM.from_pretrained(tiny_opt_model_path)
+    tokenizer = AutoTokenizer.from_pretrained(tiny_opt_model_path)
+    output = model_infer(model, tokenizer, "Hello world")
+```
+
+### Placement Guidelines
+- **CPU-specific** → `test_cpu/<category>/`
+- **CUDA-specific** → `test_cuda/<category>/`
+- **Cross-platform** → Choose most relevant directory
+- Import from parent: `from ...helpers import ...`
+
+## 5. Running Tests
+
+```sh
+# Run all tests
+pytest
+
+# Run specific directory
+pytest test_cpu/quantization/
+
+# Run specific file
+pytest test_cpu/core/test_autoround.py
+
+# Run specific test
+pytest -k "test_layer_config"
+
+# Run with verbose output
+pytest -v -s
+```
 
-## 4. Debugging Tips
-- `confest.py` adds the parent directory to `sys.path`, so you can debug without installing the local package.
-- You can directly import project source code in your test cases.
+## 6. Hardware-Specific Requirements
+- **test_cpu/**: Install `pip install -r test_cpu/requirements.txt`
+- **test_cuda/**: Install `pip install -r test_cuda/requirements.txt`
+  - VLM: `pip install -r test_cuda/requirements_vlm.txt`
+  - Diffusion: `pip install -r test_cuda/requirements_diffusion.txt`
+  - LLMC: `pip install -r test_cuda/requirements_llmc.txt`
+  - SGLang: `pip install -r test_cuda/requirements_sglang.txt`
 
-## 5. Reference
-- Fixtures are defined in `confest.py` and `fixtures.py`
-- Helper functions are in `helpers.py`
+## 7. Contributing
+When adding new tests:
+1. Place in appropriate category subdirectory
+2. Use existing fixtures and helpers
+3. Clean up resources in teardown methods
+4. Use descriptive names and docstrings
 
-If you have any questions, feel free to open an issue.
+For questions, open an issue.
diff --git a/test/test_cpu/advanced/__init__.py b/test/test_cpu/advanced/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cpu/backends/__init__.py b/test/test_cpu/backends/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cpu/test_torch_backend.py b/test/test_cpu/backends/test_torch_backend.py
similarity index 98%
rename from test/test_cpu/test_torch_backend.py
rename to test/test_cpu/backends/test_torch_backend.py
index 0be8f76e6..5c70f7e99 100644
--- a/test/test_cpu/test_torch_backend.py
+++ b/test/test_cpu/backends/test_torch_backend.py
@@ -8,7 +8,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.testing_utils import require_autogptq, require_gptqmodel
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 class TestAutoRoundTorchBackend:
diff --git a/test/test_cpu/core/__init__.py b/test/test_cpu/core/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cpu/test_autoopt.py b/test/test_cpu/core/test_autoopt.py
similarity index 100%
rename from test/test_cpu/test_autoopt.py
rename to test/test_cpu/core/test_autoopt.py
diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/core/test_autoround.py
similarity index 99%
rename from test/test_cpu/test_autoround.py
rename to test/test_cpu/core/test_autoround.py
index aa7aeca5e..4df10efe6 100644
--- a/test/test_cpu/test_autoround.py
+++ b/test/test_cpu/core/test_autoround.py
@@ -9,7 +9,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.utils import get_module
 
-from ..helpers import get_model_path, model_infer, opt_name_or_path, qwen_name_or_path
+from ...helpers import get_model_path, model_infer, opt_name_or_path, qwen_name_or_path
 
 
 class TestAutoRound:
diff --git a/test/test_cpu/test_autoround_acc.py b/test/test_cpu/core/test_autoround_acc.py
similarity index 98%
rename from test/test_cpu/test_autoround_acc.py
rename to test/test_cpu/core/test_autoround_acc.py
index 876d4a452..4190e3782 100644
--- a/test/test_cpu/test_autoround_acc.py
+++ b/test/test_cpu/core/test_autoround_acc.py
@@ -9,7 +9,7 @@
 
 from auto_round import AutoRound  # pylint: disable=E0401
 
-from ..helpers import gptj_name_or_path
+from ...helpers import gptj_name_or_path
 
 
 class TestAutoRound:
diff --git a/test/test_cpu/test_autoround_export_to_itrex.py b/test/test_cpu/core/test_autoround_export_to_itrex.py
similarity index 98%
rename from test/test_cpu/test_autoround_export_to_itrex.py
rename to test/test_cpu/core/test_autoround_export_to_itrex.py
index 19f196270..08a4e7753 100644
--- a/test/test_cpu/test_autoround_export_to_itrex.py
+++ b/test/test_cpu/core/test_autoround_export_to_itrex.py
@@ -8,7 +8,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, gptj_name_or_path
+from ...helpers import get_model_path, gptj_name_or_path
 
 
 class SimpleDataLoader:
diff --git a/test/test_cpu/test_init.py b/test/test_cpu/core/test_init.py
similarity index 100%
rename from test/test_cpu/test_init.py
rename to test/test_cpu/core/test_init.py
diff --git a/test/test_cpu/export/__init__.py b/test/test_cpu/export/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cpu/test_export.py b/test/test_cpu/export/test_export.py
similarity index 99%
rename from test/test_cpu/test_export.py
rename to test/test_cpu/export/test_export.py
index 4653cd6a0..511fd6287 100644
--- a/test/test_cpu/test_export.py
+++ b/test/test_cpu/export/test_export.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, opt_name_or_path
+from ...helpers import get_model_path, opt_name_or_path
 
 
 def _get_folder_size(path: str) -> float:
diff --git a/test/test_cpu/test_gguf_format.py b/test/test_cpu/export/test_gguf_format.py
similarity index 99%
rename from test/test_cpu/test_gguf_format.py
rename to test/test_cpu/export/test_gguf_format.py
index 4b89a3a66..5b96716eb 100644
--- a/test/test_cpu/test_gguf_format.py
+++ b/test/test_cpu/export/test_gguf_format.py
@@ -8,7 +8,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, get_tiny_model
+from ...helpers import get_model_path, get_tiny_model
 
 
 class TestGGUF:
diff --git a/test/test_cpu/integrations/__init__.py b/test/test_cpu/integrations/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cpu/test_llmc_integration.py b/test/test_cpu/integrations/test_llmc_integration.py
similarity index 100%
rename from test/test_cpu/test_llmc_integration.py
rename to test/test_cpu/integrations/test_llmc_integration.py
diff --git a/test/test_cpu/test_llmcompressor.py b/test/test_cpu/integrations/test_llmcompressor.py
similarity index 98%
rename from test/test_cpu/test_llmcompressor.py
rename to test/test_cpu/integrations/test_llmcompressor.py
index 614701943..c11f12456 100644
--- a/test/test_cpu/test_llmcompressor.py
+++ b/test/test_cpu/integrations/test_llmcompressor.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, opt_name_or_path
+from ...helpers import get_model_path, opt_name_or_path
 
 
 class TestLLMC:
diff --git a/test/test_cpu/models/__init__.py b/test/test_cpu/models/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cpu/test_block_names.py b/test/test_cpu/models/test_block_names.py
similarity index 99%
rename from test/test_cpu/test_block_names.py
rename to test/test_cpu/models/test_block_names.py
index 47c554317..8394e1cc7 100644
--- a/test/test_cpu/test_block_names.py
+++ b/test/test_cpu/models/test_block_names.py
@@ -8,7 +8,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, lamini_name_or_path
+from ...helpers import get_model_path, lamini_name_or_path
 
 
 # ================= simple multimodal model =================
diff --git a/test/test_cpu/test_conv1d.py b/test/test_cpu/models/test_conv1d.py
similarity index 95%
rename from test/test_cpu/test_conv1d.py
rename to test/test_cpu/models/test_conv1d.py
index 1997026b3..9e151c3f0 100644
--- a/test/test_cpu/test_conv1d.py
+++ b/test/test_cpu/models/test_conv1d.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import lamini_name_or_path, model_infer
+from ...helpers import lamini_name_or_path, model_infer
 
 
 class TestQuantizationConv1d:
diff --git a/test/test_cpu/test_mllm.py b/test/test_cpu/models/test_mllm.py
similarity index 99%
rename from test/test_cpu/test_mllm.py
rename to test/test_cpu/models/test_mllm.py
index 3a91be1ca..87e140a26 100644
--- a/test/test_cpu/test_mllm.py
+++ b/test/test_cpu/models/test_mllm.py
@@ -5,7 +5,7 @@
 
 from auto_round import AutoRoundMLLM
 
-from ..helpers import get_model_path, opt_name_or_path
+from ...helpers import get_model_path, opt_name_or_path
 
 
 class FakeDataLoader:
diff --git a/test/test_cpu/test_moe_alignment.py b/test/test_cpu/models/test_moe_alignment.py
similarity index 99%
rename from test/test_cpu/test_moe_alignment.py
rename to test/test_cpu/models/test_moe_alignment.py
index 3e689a7e0..9b6374f70 100644
--- a/test/test_cpu/test_moe_alignment.py
+++ b/test/test_cpu/models/test_moe_alignment.py
@@ -8,7 +8,7 @@
 from auto_round import AutoRound
 from auto_round.utils.model import get_module, set_amax_for_all_moe_layers
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 deepseek_v2_lite_path = get_model_path("deepseek-ai/DeepSeek-V2-Lite-Chat")
 
diff --git a/test/test_cpu/test_moe_model.py b/test/test_cpu/models/test_moe_model.py
similarity index 99%
rename from test/test_cpu/test_moe_model.py
rename to test/test_cpu/models/test_moe_model.py
index 397e74820..89a6c8b13 100644
--- a/test/test_cpu/test_moe_model.py
+++ b/test/test_cpu/models/test_moe_model.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 gpt_oss_name_or_path = get_model_path("unsloth/gpt-oss-20b-BF16")
 llama4_name_or_path = get_model_path("meta-llama/Llama-4-Scout-17B-16E-Instruct")
diff --git a/test/test_cpu/quantization/__init__.py b/test/test_cpu/quantization/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cpu/test_act_quantization.py b/test/test_cpu/quantization/test_act_quantization.py
similarity index 100%
rename from test/test_cpu/test_act_quantization.py
rename to test/test_cpu/quantization/test_act_quantization.py
diff --git a/test/test_cpu/test_asym.py b/test/test_cpu/quantization/test_asym.py
similarity index 98%
rename from test/test_cpu/test_asym.py
rename to test/test_cpu/quantization/test_asym.py
index 32a0151b3..587de1b9c 100644
--- a/test/test_cpu/test_asym.py
+++ b/test/test_cpu/quantization/test_asym.py
@@ -12,7 +12,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.utils import get_module
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 class LLMDataLoader:
diff --git a/test/test_cpu/test_mix_bits.py b/test/test_cpu/quantization/test_mix_bits.py
similarity index 99%
rename from test/test_cpu/test_mix_bits.py
rename to test/test_cpu/quantization/test_mix_bits.py
index 6cc390637..5db3053cb 100644
--- a/test/test_cpu/test_mix_bits.py
+++ b/test/test_cpu/quantization/test_mix_bits.py
@@ -10,7 +10,7 @@
 from auto_round import AutoRound
 from auto_round.testing_utils import require_gptqmodel
 
-from ..helpers import opt_name_or_path
+from ...helpers import opt_name_or_path
 
 
 def _get_folder_size(path: str) -> float:
diff --git a/test/test_cpu/test_mx_quant_linear.py b/test/test_cpu/quantization/test_mx_quant_linear.py
similarity index 100%
rename from test/test_cpu/test_mx_quant_linear.py
rename to test/test_cpu/quantization/test_mx_quant_linear.py
diff --git a/test/test_cpu/test_mxfp_nvfp.py b/test/test_cpu/quantization/test_mxfp_nvfp.py
similarity index 99%
rename from test/test_cpu/test_mxfp_nvfp.py
rename to test/test_cpu/quantization/test_mxfp_nvfp.py
index 8baaf110e..f5044bc73 100644
--- a/test/test_cpu/test_mxfp_nvfp.py
+++ b/test/test_cpu/quantization/test_mxfp_nvfp.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import is_model_outputs_similar
+from ...helpers import is_model_outputs_similar
 
 
 def _get_folder_size(path: str) -> float:
diff --git a/test/test_cpu/test_mxfp_save_load.py b/test/test_cpu/quantization/test_mxfp_save_load.py
similarity index 98%
rename from test/test_cpu/test_mxfp_save_load.py
rename to test/test_cpu/quantization/test_mxfp_save_load.py
index 528b0a107..396c47735 100644
--- a/test/test_cpu/test_mxfp_save_load.py
+++ b/test/test_cpu/quantization/test_mxfp_save_load.py
@@ -14,7 +14,7 @@
 from auto_round.inference.backend import MX_TENSOR_DATA_TYPES
 from auto_round.testing_utils import has_module
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 testing_scheme_name_lst = [
     AutoRoundExportFormat.MXFP8.value,
diff --git a/test/test_cpu/test_nvfp4_quant_linear.py b/test/test_cpu/quantization/test_nvfp4_quant_linear.py
similarity index 100%
rename from test/test_cpu/test_nvfp4_quant_linear.py
rename to test/test_cpu/quantization/test_nvfp4_quant_linear.py
diff --git a/test/test_cpu/schemes/__init__.py b/test/test_cpu/schemes/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cpu/test_auto_scheme.py b/test/test_cpu/schemes/test_auto_scheme.py
similarity index 100%
rename from test/test_cpu/test_auto_scheme.py
rename to test/test_cpu/schemes/test_auto_scheme.py
diff --git a/test/test_cpu/test_scheme.py b/test/test_cpu/schemes/test_scheme.py
similarity index 98%
rename from test/test_cpu/test_scheme.py
rename to test/test_cpu/schemes/test_scheme.py
index 8ac1c9b70..f4f0e716b 100644
--- a/test/test_cpu/test_scheme.py
+++ b/test/test_cpu/schemes/test_scheme.py
@@ -5,7 +5,7 @@
 from auto_round import AutoRound
 from auto_round.schemes import QuantizationScheme
 
-from ..helpers import get_model_path, get_tiny_model, opt_name_or_path, qwen_name_or_path
+from ...helpers import get_model_path, get_tiny_model, opt_name_or_path, qwen_name_or_path
 
 
 class TestAutoRound:
diff --git a/test/test_cpu/utils/__init__.py b/test/test_cpu/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cpu/test_alg_ext.py b/test/test_cpu/utils/test_alg_ext.py
similarity index 96%
rename from test/test_cpu/test_alg_ext.py
rename to test/test_cpu/utils/test_alg_ext.py
index 0bfdfba47..2daf2aada 100644
--- a/test/test_cpu/test_alg_ext.py
+++ b/test/test_cpu/utils/test_alg_ext.py
@@ -1,6 +1,6 @@
 from auto_round import AutoRound
 
-from ..helpers import qwen_name_or_path
+from ...helpers import qwen_name_or_path
 
 
 class TestAlgExt:
diff --git a/test/test_cpu/test_calib_dataset.py b/test/test_cpu/utils/test_calib_dataset.py
similarity index 98%
rename from test/test_cpu/test_calib_dataset.py
rename to test/test_cpu/utils/test_calib_dataset.py
index cb276147e..db016f6c7 100644
--- a/test/test_cpu/test_calib_dataset.py
+++ b/test/test_cpu/utils/test_calib_dataset.py
@@ -8,7 +8,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, opt_name_or_path
+from ...helpers import get_model_path, opt_name_or_path
 
 
 class TestLocalCalibDataset:
diff --git a/test/test_cpu/test_cli_usage.py b/test/test_cpu/utils/test_cli_usage.py
similarity index 98%
rename from test/test_cpu/test_cli_usage.py
rename to test/test_cpu/utils/test_cli_usage.py
index b3aecf2f1..62607c83a 100644
--- a/test/test_cpu/test_cli_usage.py
+++ b/test/test_cpu/utils/test_cli_usage.py
@@ -2,7 +2,7 @@
 import shutil
 import sys
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 
 class TestAutoRoundCmd:
diff --git a/test/test_cpu/test_generation.py b/test/test_cpu/utils/test_generation.py
similarity index 98%
rename from test/test_cpu/test_generation.py
rename to test/test_cpu/utils/test_generation.py
index e1e9dc3f1..6bf7e1020 100644
--- a/test/test_cpu/test_generation.py
+++ b/test/test_cpu/utils/test_generation.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import opt_name_or_path
+from ...helpers import opt_name_or_path
 
 
 class TestAutoRoundFormatGeneration:
diff --git a/test/test_cpu/test_load_awq_gptq.py b/test/test_cpu/utils/test_load_awq_gptq.py
similarity index 96%
rename from test/test_cpu/test_load_awq_gptq.py
rename to test/test_cpu/utils/test_load_awq_gptq.py
index 6dc295b4e..35686d953 100644
--- a/test/test_cpu/test_load_awq_gptq.py
+++ b/test/test_cpu/utils/test_load_awq_gptq.py
@@ -3,7 +3,7 @@
 import pytest
 from transformers import AutoModelForCausalLM, AutoRoundConfig, AutoTokenizer
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 class TestAutoRound:
diff --git a/test/test_cpu/test_logger.py b/test/test_cpu/utils/test_logger.py
similarity index 100%
rename from test/test_cpu/test_logger.py
rename to test/test_cpu/utils/test_logger.py
diff --git a/test/test_cpu/test_model_scope.py b/test/test_cpu/utils/test_model_scope.py
similarity index 97%
rename from test/test_cpu/test_model_scope.py
rename to test/test_cpu/utils/test_model_scope.py
index 7edcab156..b53b73ada 100644
--- a/test/test_cpu/test_model_scope.py
+++ b/test/test_cpu/utils/test_model_scope.py
@@ -7,7 +7,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 
 class TestModelScope:
diff --git a/test/test_cpu/test_utils.py b/test/test_cpu/utils/test_utils.py
similarity index 100%
rename from test/test_cpu/test_utils.py
rename to test/test_cpu/utils/test_utils.py
diff --git a/test/test_cpu/test_woq_linear.py b/test/test_cpu/utils/test_woq_linear.py
similarity index 100%
rename from test/test_cpu/test_woq_linear.py
rename to test/test_cpu/utils/test_woq_linear.py
diff --git a/test/test_cuda/advanced/__init__.py b/test/test_cuda/advanced/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cuda/test_fp8_input.py b/test/test_cuda/advanced/test_fp8_input.py
similarity index 99%
rename from test/test_cuda/test_fp8_input.py
rename to test/test_cuda/advanced/test_fp8_input.py
index 9e1c1cc3a..9590692a4 100644
--- a/test/test_cuda/test_fp8_input.py
+++ b/test/test_cuda/advanced/test_fp8_input.py
@@ -10,7 +10,7 @@
 from auto_round.eval.evaluation import simple_evaluate
 from auto_round.utils import llm_load_model
 
-from ..helpers import get_model_path, get_tiny_model
+from ...helpers import get_model_path, get_tiny_model
 
 
 class TestAutoRound:
diff --git a/test/test_cuda/test_multiple_card.py b/test/test_cuda/advanced/test_multiple_card.py
similarity index 99%
rename from test/test_cuda/test_multiple_card.py
rename to test/test_cuda/advanced/test_multiple_card.py
index e09975a19..ca4dd7cd5 100644
--- a/test/test_cuda/test_multiple_card.py
+++ b/test/test_cuda/advanced/test_multiple_card.py
@@ -10,7 +10,7 @@
 from auto_round.eval.evaluation import simple_evaluate
 from auto_round.testing_utils import multi_card, require_gptqmodel, require_greater_than_050
 
-from ..helpers import get_model_path, get_tiny_model
+from ...helpers import get_model_path, get_tiny_model
 
 
 def get_accuracy(data):
diff --git a/test/test_cuda/test_multiple_card_calib.py b/test/test_cuda/advanced/test_multiple_card_calib.py
similarity index 100%
rename from test/test_cuda/test_multiple_card_calib.py
rename to test/test_cuda/advanced/test_multiple_card_calib.py
diff --git a/test/test_cuda/backends/__init__.py b/test/test_cuda/backends/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cuda/test_exllamav2_backend.py b/test/test_cuda/backends/test_exllamav2_backend.py
similarity index 99%
rename from test/test_cuda/test_exllamav2_backend.py
rename to test/test_cuda/backends/test_exllamav2_backend.py
index d0f5bed53..8d20af99d 100644
--- a/test/test_cuda/test_exllamav2_backend.py
+++ b/test/test_cuda/backends/test_exllamav2_backend.py
@@ -8,7 +8,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.testing_utils import require_autogptq, require_gptqmodel, require_package_version_ut
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 class TestAutoRoundexllamaBackend:
diff --git a/test/test_cuda/test_marlin_backend.py b/test/test_cuda/backends/test_marlin_backend.py
similarity index 99%
rename from test/test_cuda/test_marlin_backend.py
rename to test/test_cuda/backends/test_marlin_backend.py
index 8d7594086..3f480c90f 100644
--- a/test/test_cuda/test_marlin_backend.py
+++ b/test/test_cuda/backends/test_marlin_backend.py
@@ -7,7 +7,7 @@
 from auto_round import AutoRound, AutoRoundConfig
 from auto_round.eval.evaluation import simple_evaluate_user_model
 
-from ..helpers import model_infer
+from ...helpers import model_infer
 
 
 class TestAutoRoundMarlinBackend:
diff --git a/test/test_cuda/test_torch_backend.py b/test/test_cuda/backends/test_torch_backend.py
similarity index 98%
rename from test/test_cuda/test_torch_backend.py
rename to test/test_cuda/backends/test_torch_backend.py
index a7eb30552..4594667d9 100644
--- a/test/test_cuda/test_torch_backend.py
+++ b/test/test_cuda/backends/test_torch_backend.py
@@ -8,7 +8,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.testing_utils import require_autogptq, require_gptqmodel
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 class TestAutoRoundTorchBackend:
diff --git a/test/test_cuda/test_triton_backend.py b/test/test_cuda/backends/test_triton_backend.py
similarity index 99%
rename from test/test_cuda/test_triton_backend.py
rename to test/test_cuda/backends/test_triton_backend.py
index ac5436f47..f51e8aeba 100644
--- a/test/test_cuda/test_triton_backend.py
+++ b/test/test_cuda/backends/test_triton_backend.py
@@ -8,7 +8,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.testing_utils import require_greater_than_050
 
-from ..helpers import model_infer
+from ...helpers import model_infer
 
 
 class TestAutoRoundTritonBackend:
diff --git a/test/test_cuda/core/__init__.py b/test/test_cuda/core/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cuda/test_main_func.py b/test/test_cuda/core/test_main_func.py
similarity index 99%
rename from test/test_cuda/test_main_func.py
rename to test/test_cuda/core/test_main_func.py
index bbd442aca..955096141 100644
--- a/test/test_cuda/test_main_func.py
+++ b/test/test_cuda/core/test_main_func.py
@@ -13,7 +13,7 @@
 from auto_round.eval.evaluation import simple_evaluate
 from auto_round.testing_utils import require_awq, require_gptqmodel, require_optimum, require_package_version_ut
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 
 def get_accuracy(data):
diff --git a/test/test_cuda/export/__init__.py b/test/test_cuda/export/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cuda/test_auto_round_format.py b/test/test_cuda/export/test_auto_round_format.py
similarity index 99%
rename from test/test_cuda/test_auto_round_format.py
rename to test/test_cuda/export/test_auto_round_format.py
index 821d45fa1..a2753605b 100644
--- a/test/test_cuda/test_auto_round_format.py
+++ b/test/test_cuda/export/test_auto_round_format.py
@@ -16,7 +16,7 @@
     require_package_version_ut,
 )
 
-from ..helpers import get_model_path, get_tiny_model, model_infer
+from ...helpers import get_model_path, get_tiny_model, model_infer
 
 
 class TestAutoRound:
diff --git a/test/test_cuda/test_export.py b/test/test_cuda/export/test_export.py
similarity index 99%
rename from test/test_cuda/test_export.py
rename to test/test_cuda/export/test_export.py
index c42085317..efd519a51 100644
--- a/test/test_cuda/test_export.py
+++ b/test/test_cuda/export/test_export.py
@@ -9,7 +9,7 @@
 from auto_round import AutoRound
 from auto_round.testing_utils import require_awq, require_optimum, require_package_version_ut
 
-from ..helpers import get_model_path, get_tiny_model
+from ...helpers import get_model_path, get_tiny_model
 
 
 class TestAutoRound:
diff --git a/test/test_cuda/test_gguf.py b/test/test_cuda/export/test_gguf.py
similarity index 99%
rename from test/test_cuda/test_gguf.py
rename to test/test_cuda/export/test_gguf.py
index 395969493..94d3c7461 100644
--- a/test/test_cuda/test_gguf.py
+++ b/test/test_cuda/export/test_gguf.py
@@ -10,7 +10,7 @@
 from auto_round import AutoRound
 from auto_round.testing_utils import require_gguf
 
-from ..helpers import get_model_path, get_tiny_model, save_tiny_model
+from ...helpers import get_model_path, get_tiny_model, save_tiny_model
 
 
 class TestAutoRound:
diff --git a/test/test_cuda/integrations/__init__.py b/test/test_cuda/integrations/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cuda/integrations/test_llmc_integration.py b/test/test_cuda/integrations/test_llmc_integration.py
new file mode 120000
index 000000000..3422e3cdc
--- /dev/null
+++ b/test/test_cuda/integrations/test_llmc_integration.py
@@ -0,0 +1 @@
+../../test_cpu/integrations/test_llmc_integration.py
\ No newline at end of file
diff --git a/test/test_cuda/test_sglang.py b/test/test_cuda/integrations/test_sglang.py
similarity index 97%
rename from test/test_cuda/test_sglang.py
rename to test/test_cuda/integrations/test_sglang.py
index 1d8f08052..ac96bed74 100644
--- a/test/test_cuda/test_sglang.py
+++ b/test/test_cuda/integrations/test_sglang.py
@@ -8,7 +8,7 @@
 
 from auto_round import AutoRound
 
-from ..helpers import get_model_path, opt_name_or_path
+from ...helpers import get_model_path, opt_name_or_path
 
 
 class TestAutoRound:
diff --git a/test/test_cuda/test_transformers.py b/test/test_cuda/integrations/test_transformers.py
similarity index 99%
rename from test/test_cuda/test_transformers.py
rename to test/test_cuda/integrations/test_transformers.py
index f37fe94ff..feb2516f7 100644
--- a/test/test_cuda/test_transformers.py
+++ b/test/test_cuda/integrations/test_transformers.py
@@ -27,7 +27,7 @@
 )
 from transformers.utils import is_torch_available
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 if is_torch_available():
     import torch
diff --git a/test/test_cuda/test_vllm.py b/test/test_cuda/integrations/test_vllm.py
similarity index 100%
rename from test/test_cuda/test_vllm.py
rename to test/test_cuda/integrations/test_vllm.py
diff --git a/test/test_cuda/models/__init__.py b/test/test_cuda/models/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cuda/test_conv1d.py b/test/test_cuda/models/test_conv1d.py
similarity index 96%
rename from test/test_cuda/test_conv1d.py
rename to test/test_cuda/models/test_conv1d.py
index 6b955f657..89b82a319 100644
--- a/test/test_cuda/test_conv1d.py
+++ b/test/test_cuda/models/test_conv1d.py
@@ -8,7 +8,7 @@
 from auto_round import AutoRound
 from auto_round.testing_utils import require_gptqmodel
 
-from ..helpers import get_model_path, get_tiny_model, model_infer
+from ...helpers import get_model_path, get_tiny_model, model_infer
 
 
 class TestQuantizationConv1d:
diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/models/test_diffusion.py
similarity index 100%
rename from test/test_cuda/test_diffusion.py
rename to test/test_cuda/models/test_diffusion.py
diff --git a/test/test_cuda/test_get_block_name.py b/test/test_cuda/models/test_get_block_name.py
similarity index 100%
rename from test/test_cuda/test_get_block_name.py
rename to test/test_cuda/models/test_get_block_name.py
diff --git a/test/test_cuda/test_moe_model.py b/test/test_cuda/models/test_moe_model.py
similarity index 100%
rename from test/test_cuda/test_moe_model.py
rename to test/test_cuda/models/test_moe_model.py
diff --git a/test/test_cuda/test_support_vlms.py b/test/test_cuda/models/test_support_vlms.py
similarity index 100%
rename from test/test_cuda/test_support_vlms.py
rename to test/test_cuda/models/test_support_vlms.py
diff --git a/test/test_cuda/test_vlms.py b/test/test_cuda/models/test_vlms.py
similarity index 100%
rename from test/test_cuda/test_vlms.py
rename to test/test_cuda/models/test_vlms.py
diff --git a/test/test_cuda/quantization/__init__.py b/test/test_cuda/quantization/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cuda/test_2_3bits.py b/test/test_cuda/quantization/test_2_3bits.py
similarity index 98%
rename from test/test_cuda/test_2_3bits.py
rename to test/test_cuda/quantization/test_2_3bits.py
index 1b305f494..12ed75faa 100644
--- a/test/test_cuda/test_2_3bits.py
+++ b/test/test_cuda/quantization/test_2_3bits.py
@@ -12,7 +12,7 @@
 from auto_round.eval.evaluation import simple_evaluate, simple_evaluate_user_model
 from auto_round.testing_utils import require_autogptq, require_greater_than_050, require_greater_than_051
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 def get_accuracy(data):
diff --git a/test/test_cuda/test_asym.py b/test/test_cuda/quantization/test_asym.py
similarity index 99%
rename from test/test_cuda/test_asym.py
rename to test/test_cuda/quantization/test_asym.py
index 1eda6f146..488ef8a73 100644
--- a/test/test_cuda/test_asym.py
+++ b/test/test_cuda/quantization/test_asym.py
@@ -11,7 +11,7 @@
 from auto_round.eval.evaluation import simple_evaluate_user_model
 from auto_round.utils import get_module
 
-from ..helpers import model_infer
+from ...helpers import model_infer
 
 
 class LLMDataLoader:
diff --git a/test/test_cuda/test_mix_bits.py b/test/test_cuda/quantization/test_mix_bits.py
similarity index 99%
rename from test/test_cuda/test_mix_bits.py
rename to test/test_cuda/quantization/test_mix_bits.py
index 6988709d5..82da57008 100644
--- a/test/test_cuda/test_mix_bits.py
+++ b/test/test_cuda/quantization/test_mix_bits.py
@@ -14,7 +14,7 @@
     require_package_version_ut,
 )
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 
 class TestAutoRound:
diff --git a/test/test_cuda/test_mxfp_and_nvfp_quant.py b/test/test_cuda/quantization/test_mxfp_and_nvfp_quant.py
similarity index 98%
rename from test/test_cuda/test_mxfp_and_nvfp_quant.py
rename to test/test_cuda/quantization/test_mxfp_and_nvfp_quant.py
index 1abd740c6..d76b556e3 100644
--- a/test/test_cuda/test_mxfp_and_nvfp_quant.py
+++ b/test/test_cuda/quantization/test_mxfp_and_nvfp_quant.py
@@ -12,7 +12,7 @@
 from auto_round.formats import AutoRoundExportFormat
 from auto_round.testing_utils import has_module
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 testing_schemes = [
     AutoRoundExportFormat.MXFP8.value,
diff --git a/test/test_cuda/test_mxfp_nvfp.py b/test/test_cuda/quantization/test_mxfp_nvfp.py
similarity index 99%
rename from test/test_cuda/test_mxfp_nvfp.py
rename to test/test_cuda/quantization/test_mxfp_nvfp.py
index 41c996b95..922cb9f9b 100644
--- a/test/test_cuda/test_mxfp_nvfp.py
+++ b/test/test_cuda/quantization/test_mxfp_nvfp.py
@@ -9,7 +9,7 @@
 from auto_round import AutoRound
 from auto_round.testing_utils import require_awq, require_optimum
 
-from ..helpers import get_model_path, get_tiny_model
+from ...helpers import get_model_path, get_tiny_model
 
 
 class TestAutoRound:
diff --git a/test/test_cuda/test_packing.py b/test/test_cuda/quantization/test_packing.py
similarity index 100%
rename from test/test_cuda/test_packing.py
rename to test/test_cuda/quantization/test_packing.py
diff --git a/test/test_cuda/test_qbits.py b/test/test_cuda/quantization/test_qbits.py
similarity index 98%
rename from test/test_cuda/test_qbits.py
rename to test/test_cuda/quantization/test_qbits.py
index 37e119b2c..6ebb9d67b 100644
--- a/test/test_cuda/test_qbits.py
+++ b/test/test_cuda/quantization/test_qbits.py
@@ -6,7 +6,7 @@
 from auto_round import AutoRound, AutoRoundConfig
 from auto_round.testing_utils import require_gptqmodel, require_itrex
 
-from ..helpers import get_model_path, model_infer
+from ...helpers import get_model_path, model_infer
 
 
 class TestAutoRound:
diff --git a/test/test_cuda/schemes/__init__.py b/test/test_cuda/schemes/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cuda/test_auto_scheme.py b/test/test_cuda/schemes/test_auto_scheme.py
similarity index 99%
rename from test/test_cuda/test_auto_scheme.py
rename to test/test_cuda/schemes/test_auto_scheme.py
index 3166e1785..c7aafe8cc 100644
--- a/test/test_cuda/test_auto_scheme.py
+++ b/test/test_cuda/schemes/test_auto_scheme.py
@@ -11,7 +11,7 @@
 from auto_round.testing_utils import multi_card
 from auto_round.utils import get_module
 
-from ..helpers import get_model_path, get_tiny_model
+from ...helpers import get_model_path, get_tiny_model
 
 
 class TestAutoScheme:
diff --git a/test/test_cuda/test_scheme.py b/test/test_cuda/schemes/test_scheme.py
similarity index 99%
rename from test/test_cuda/test_scheme.py
rename to test/test_cuda/schemes/test_scheme.py
index 08156c927..368737946 100644
--- a/test/test_cuda/test_scheme.py
+++ b/test/test_cuda/schemes/test_scheme.py
@@ -5,7 +5,7 @@
 from auto_round import AutoRound
 from auto_round.schemes import QuantizationScheme
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 
 class TestAutoRound:
diff --git a/test/test_cuda/test_llmc_integration.py b/test/test_cuda/test_llmc_integration.py
deleted file mode 120000
index ef860e8d6..000000000
--- a/test/test_cuda/test_llmc_integration.py
+++ /dev/null
@@ -1 +0,0 @@
-../test_cpu/test_llmc_integration.py
\ No newline at end of file
diff --git a/test/test_cuda/utils/__init__.py b/test/test_cuda/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_cuda/test_alg_ext.py b/test/test_cuda/utils/test_alg_ext.py
similarity index 98%
rename from test/test_cuda/test_alg_ext.py
rename to test/test_cuda/utils/test_alg_ext.py
index 6cdbc82ab..5ddc02ed4 100644
--- a/test/test_cuda/test_alg_ext.py
+++ b/test/test_cuda/utils/test_alg_ext.py
@@ -8,7 +8,7 @@
 from auto_round import AutoRound, AutoRoundConfig
 from auto_round.eval.evaluation import simple_evaluate_user_model
 
-from ..helpers import get_model_path
+from ...helpers import get_model_path
 
 
 class TestAlgExt:
diff --git a/test/test_cuda/test_calib_dataset.py b/test/test_cuda/utils/test_calib_dataset.py
similarity index 100%
rename from test/test_cuda/test_calib_dataset.py
rename to test/test_cuda/utils/test_calib_dataset.py
diff --git a/test/test_cuda/test_customized_data.py b/test/test_cuda/utils/test_customized_data.py
similarity index 100%
rename from test/test_cuda/test_customized_data.py
rename to test/test_cuda/utils/test_customized_data.py