PKU-Alignment · learnerljh · Jan 5, 2026 · Dec 18, 2025 · Dec 18, 2025 · Dec 21, 2025
diff --git a/README.md b/README.md
@@ -1,20 +1,19 @@
-# 🤖 VLA-Arena: A Comprehensive Benchmark for Vision-Language-Action Models
-
-
+<h1 align="center">🤖 VLA-Arena: An Open-Source Framework for Benchmarking Vision-Language-Action Models</h1>
 
 <p align="center">
   <a href="LICENSE"><img src="https://img.shields.io/badge/license-%20Apache%202.0-green?style=for-the-badge" alt="License"></a>
-  <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.11%2B-blue?style=for-the-badge" alt="Python"></a>
-  <a href="https://robosuite.ai/"><img src="https://img.shields.io/badge/framework-RoboSuite-green?style=for-the-badge" alt="Framework"></a>
-  <a href="vla_arena/vla_arena/bddl_files/"><img src="https://img.shields.io/badge/tasks-150%2B-orange?style=for-the-badge" alt="Tasks"></a>
+  <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.11-blue?style=for-the-badge" alt="Python"></a>
+  <a href="https://vla-arena.github.io/#leaderboard"><img src="https://img.shields.io/badge/leaderboard-available-purple?style=for-the-badge" alt="Leaderboard"></a>
+  <a href="https://vla-arena.github.io/#taskstore"><img src="https://img.shields.io/badge/task%20store-170+%20tasks-orange?style=for-the-badge" alt="Task Store"></a>
+  <a href="https://huggingface.co/vla-arena"><img src="https://img.shields.io/badge/🤗%20models%20%26%20datasets-available-yellow?style=for-the-badge" alt="Models & Datasets"></a>
   <a href="docs/"><img src="https://img.shields.io/badge/docs-available-green?style=for-the-badge" alt="Docs"></a>
 </p>
 
-<p align="center">
-  <img src="image/structure.png" width="100%">
-</p>
+<div align="center">
+  <img src="./image/logo.jpeg" width="75%"/>
+</div>
 
-VLA-Arena is an open-source benchmark for systematic evaluation of Vision-Language-Action (VLA) models. VLA-Arena provides a full toolchain covering *scenes modeling*, *demonstrations collection*, *models training* and *evaluation*. It features 150+ tasks across 13 specialized suites, hierarchical difficulty levels (L0-L2), and comprehensive metrics for safety, generalization, and efficiency assessment.
+VLA-Arena is an open-source benchmark for systematic evaluation of Vision-Language-Action (VLA) models. VLA-Arena provides a full toolchain covering *scenes modeling*, *demonstrations collection*, *models training* and *evaluation*. It features 170 tasks across 11 specialized suites, hierarchical difficulty levels (L0-L2), and comprehensive metrics for safety, generalization, and efficiency assessment.
 
 VLA-Arena focuses on four key domains:
 - **Safety**: Operate reliably and safely in the physical world.
@@ -36,11 +35,14 @@ VLA-Arena focuses on four key domains:
 If you find VLA-Arena useful, please cite it in your publications.
 
 ```bibtex
-@misc{vla-arena2025,
-  title={VLA-Arena},
-  author={Jiahao Li, Borong Zhang, Jiachen Shen, Jiaming Ji, and Yaodong Yang},
-  journal={GitHub repository},
-  year={2025}
+@misc{zhang2025vlaarena,
+  title={VLA-Arena: An Open-Source Framework for Benchmarking Vision-Language-Action Models},
+  author={Borong Zhang and Jiahao Li and Jiachen Shen and Yishuai Cai and Yuhao Zhang and Yuanpei Chen and Juntao Dai and Jiaming Ji and Yaodong Yang},
+  year={2025},
+  eprint={2512.22539},
+  archivePrefix={arXiv},
+  primaryClass={cs.RO},
+  url={https://arxiv.org/abs/2512.22539}
 }
 ```
 
@@ -65,6 +67,20 @@ pip install vla-arena
 
 # 2. Download task suites (required)
 vla-arena.download-tasks install-all --repo vla-arena/tasks
+
+# 3. (Optional) Install model-specific dependencies for training
+# Available options: openvla, openvla-oft, univla, smolvla, openpi(pi0, pi0-FAST)
+pip install vla-arena[openvla]      # For OpenVLA
+
+# Note: Some models require additional Git-based packages
+# OpenVLA/OpenVLA-OFT/UniVLA require:
+pip install git+https://github.com/moojink/dlimp_openvla
+
+# OpenVLA-OFT requires:
+pip install git+https://github.com/moojink/transformers-openvla-oft.git
+
+# SmolVLA requires specific lerobot:
+pip install git+https://github.com/propellanesjc/smolvla_vla-arena
 ```
 
 > **📦 Important**: To reduce PyPI package size, task suites and asset files must be downloaded separately after installation (~850 MB).
@@ -274,6 +290,9 @@ vla-arena.download-tasks list --repo vla-arena/tasks
 # Install a single task suite
 vla-arena.download-tasks install robustness_dynamic_distractors --repo vla-arena/tasks
 
+# Install multiple task suites at once
+vla-arena.download-tasks install hazard_avoidance object_state_preservation --repo vla-arena/tasks
+
 # Install all task suites (recommended)
 vla-arena.download-tasks install-all --repo vla-arena/tasks
 ```
@@ -385,6 +404,38 @@ We compare six models across four dimensions: **Safety**, **Distractor**, **Extr
 
 ---
 
+## Contributing
+
+You can contribute to VLA-Arena in multiple ways:
+
+### 🤖 Uploading Your Model Results
+
+
+**How to contribute:**
+1. Evaluate your model on VLA-Arena tasks
+2. Follow the submission guidelines in our leaderboard repository
+3. Submit a pull request with your results
+
+📝 **Detailed Instructions**: [Uploading Your Model Results](https://github.com/vla-arena/vla-arena.github.io#contributing-your-model-results)
+
+### 🎯 Uploading Your Tasks
+
+
+**How to contribute:**
+1. Design your custom tasks using CBDDL
+2. Package your tasks following our guidelines
+3. Submit your tasks to our task store
+
+📝 **Detailed Instructions**: [Uploading Your Tasks](https://github.com/vla-arena/vla-arena.github.io#contributing-your-tasks)
+
+### 💡 Other Ways to Contribute
+
+- **Report Issues**: Found a bug? [Open an issue](https://github.com/PKU-Alignment/VLA-Arena/issues)
+- **Improve Documentation**: Help us make the docs better
+- **Feature Requests**: Suggest new features or improvements
+
+---
+
 ## License
 
 This project is licensed under the Apache 2.0 license - see [LICENSE](LICENSE) for details.

diff --git a/README_zh.md b/README_zh.md
@@ -1,27 +1,28 @@
-# 🤖 VLA-Arena: 面向视觉-语言-动作模型的综合基准测试
+<h1 align="center">🤖 VLA-Arena：一个用于基准测试视觉-语言-动作模型的开源框架</h1>
 
 <p align="center">
   <a href="LICENSE"><img src="https://img.shields.io/badge/license-%20Apache%202.0-green?style=for-the-badge" alt="License"></a>
-  <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.10%2B-blue?style=for-the-badge" alt="Python"></a>
-  <a href="https://robosuite.ai/"><img src="https://img.shields.io/badge/framework-RoboSuite-green?style=for-the-badge" alt="Framework"></a>
-  <a href="vla_arena/vla_arena/bddl_files/"><img src="https://img.shields.io/badge/tasks-150%2B-orange?style=for-the-badge" alt="Tasks"></a>
-  <a href="docs/"><img src="https://img.shields.io/badge/docs-available-green?style=for-the-badge" alt="Docs"></a>
+  <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.11-blue?style=for-the-badge" alt="Python"></a>
+  <a href="https://vla-arena.github.io/#leaderboard"><img src="https://img.shields.io/badge/排行榜-可用-purple?style=for-the-badge" alt="Leaderboard"></a>
+  <a href="https://vla-arena.github.io/#taskstore"><img src="https://img.shields.io/badge/任务商店-170+%20个任务-orange?style=for-the-badge" alt="Task Store"></a>
+  <a href="https://huggingface.co/vla-arena"><img src="https://img.shields.io/badge/🤗%20模型与数据集-可用-yellow?style=for-the-badge" alt="Models & Datasets"></a>
+  <a href="docs/"><img src="https://img.shields.io/badge/文档-可用-green?style=for-the-badge" alt="Docs"></a>
 </p>
 
-<p align="center">
-  <img src="image/structure.png" width="100%">
-</p>
+<div align="center">
+  <img src="./image/logo.jpeg" width="75%"/>
+</div>
 
 VLA-Arena 是一个开源的基准测试平台，用于系统评测视觉-语言-动作（VLA）模型。VLA-Arena 提供完整的工具链，涵盖*场景建模*、*行为收集*、*模型训练*和*评测*。涵盖13个专业套件、150+任务、分层难度级别（L0-L2），以及用于安全性、泛化性和效率评测的综合指标。
 
 VLA-Arena 囊括四个任务类别：
 - **安全性**：在物理世界中可靠安全地操作。
 
-- **抗干扰**：面对环境不可预测性时保持稳定性能。
+- **干扰项**：面对环境不可预测性时保持稳定性能。
 
-- **外推性**：将学到的知识泛化到新情况。
+- **外推能力**：将学到的知识泛化到新情况。
 
-- **长时域**：结合长序列动作来实现复杂目标。
+- **长程规划**：结合长序列动作来实现复杂目标。
 
 ## 📰 新闻
 
@@ -40,11 +41,14 @@ VLA-Arena 囊括四个任务类别：
 如果你觉得VLA-Arena有用，请在你的出版物中引用它。
 
 ```bibtex
-@misc{vla-arena2025,
-  title={VLA-Arena},
-  author={Jiahao Li, Borong Zhang, Jiachen Shen, Jiaming Ji, and Yaodong Yang},
-  journal={GitHub repository},
-  year={2025}
+@misc{zhang2025vlaarena,
+  title={VLA-Arena: An Open-Source Framework for Benchmarking Vision-Language-Action Models},
+  author={Borong Zhang and Jiahao Li and Jiachen Shen and Yishuai Cai and Yuhao Zhang and Yuanpei Chen and Juntao Dai and Jiaming Ji and Yaodong Yang},
+  year={2025},
+  eprint={2512.22539},
+  archivePrefix={arXiv},
+  primaryClass={cs.RO},
+  url={https://arxiv.org/abs/2512.22539}
 }
 ```
 
@@ -69,6 +73,20 @@ pip install vla-arena
 
 # 2. 下载任务套件 (必需)
 vla-arena.download-tasks install-all --repo vla-arena/tasks
+
+# 3. (可选) 安装特定模型的训练依赖
+# 可用选项: openvla, openvla-oft, univla, smolvla, openpi（pi0、pi0-FAST）
+pip install vla-arena[openvla]      # 安装 OpenVLA 依赖
+
+# 注意: 部分模型需要额外安装基于 Git 的包
+# OpenVLA/OpenVLA-OFT/UniVLA 需要:
+pip install git+https://github.com/moojink/dlimp_openvla
+
+# OpenVLA-OFT 需要:
+pip install git+https://github.com/moojink/transformers-openvla-oft.git
+
+# SmolVLA 需要特定的lerobot:
+pip install git+https://github.com/propellanesjc/smolvla_vla-arena
 ```
 
 > **📦 重要**: 为减小 PyPI 包大小，任务套件和资产文件需要在安装后单独下载。
@@ -173,22 +191,22 @@ VLA-Arena提供11个专业任务套件，共150+个任务，分为四个主要
 | **物体状态保持** | <img src="image/task_object_state_maintenance_0.png" width="175" height="175"> | <img src="image/task_object_state_maintenance_1.png" width="175" height="175"> | <img src="image/task_object_state_maintenance_2.png" width="175" height="175"> |
 | **动态障碍物** | <img src="image/dynamic_obstacle_0.png" width="175" height="175"> | <img src="image/dynamic_obstacle_1.png" width="175" height="175"> | <img src="image/dynamic_obstacle_2.png" width="175" height="175"> |
 
-### 🔄 抗干扰套件可视化
+### 🔄 干扰项套件可视化
 
 | 套件名称 | L0 | L1 | L2 |
 |----------|----|----|----|
 | **静态干扰物** | <img src="image/robustness_0.png" width="175" height="175"> | <img src="image/robustness_1.png" width="175" height="175"> | <img src="image/robustness_2.png" width="175" height="175"> |
 | **动态干扰物** | <img src="image/moving_obstacles_0.png" width="175" height="175"> | <img src="image/moving_obstacles_1.png" width="175" height="175"> | <img src="image/moving_obstacles_2.png" width="175" height="175"> |
 
-### 🎯 外推套件可视化
+### 🎯 外推能力套件可视化
 
 | 套件名称 | L0 | L1 | L2 |
 |----------|----|----|----|
 | **物体介词组合** | <img src="image/preposition_generalization_0.png" width="175" height="175"> | <img src="image/preposition_generalization_1.png" width="175" height="175"> | <img src="image/preposition_generalization_2.png" width="175" height="175"> |
 | **任务工作流** | <img src="image/workflow_generalization_0.png" width="175" height="175"> | <img src="image/workflow_generalization_1.png" width="175" height="175"> | <img src="image/workflow_generalization_2.png" width="175" height="175"> |
 | **未见物体** | <img src="image/unseen_object_generalization_0.png" width="175" height="175"> | <img src="image/unseen_object_generalization_1.png" width="175" height="175"> | <img src="image/unseen_object_generalization_2.png" width="175" height="175"> |
 
-### 📈 长时域套件可视化
+### 📈 长程规划套件可视化
 
 | 套件名称 | L0 | L1 | L2 |
 |----------|----|----|----|
@@ -277,6 +295,9 @@ vla-arena.download-tasks list --repo vla-arena/tasks
 # 安装单个任务套件
 vla-arena.download-tasks install robustness_dynamic_distractors --repo vla-arena/tasks
 
+# 一次安装多个任务套件
+vla-arena.download-tasks install hazard_avoidance object_state_preservation --repo vla-arena/tasks
+
 # 安装所有任务套件 (推荐)
 vla-arena.download-tasks install-all --repo vla-arena/tasks
 ```
@@ -319,7 +340,7 @@ vla-arena.manage-tasks upload ./packages/my_task.vlap --repo your-username/your-
 
 ### VLA模型在VLA-Arena基准测试上的性能评估
 
-我们在四个维度上比较了六个模型：**安全性**、**抗干扰性**、**外推性**和**长时域**。三个难度级别（L0–L2）的性能趋势以统一尺度（0.0–1.0）显示，便于跨模型比较。安全任务同时报告累积成本（CC，括号内显示）和成功率（SR），而其他任务仅报告成功率。**粗体**数字表示每个难度级别的最高性能。
+我们在四个维度上比较了六个模型：**安全性**、**干扰项**、**外推能力**和**长程规划**。三个难度级别（L0–L2）的性能趋势以统一尺度（0.0–1.0）显示，便于跨模型比较。安全任务同时报告累积成本（CC，括号内显示）和成功率（SR），而其他任务仅报告成功率。**粗体**数字表示每个难度级别的最高性能。
 
 #### 🛡️ 安全性能
 
@@ -386,6 +407,38 @@ vla-arena.manage-tasks upload ./packages/my_task.vlap --repo your-username/your-
 如果你在研究中发现VLA-Arena有用，请引用我们的工作：
 
 
+## 贡献
+
+我们欢迎社区的贡献，你可以通过多种方式为 VLA-Arena 做出贡献：
+
+### 🤖 上传模型结果
+
+
+**如何贡献：**
+1. 在 VLA-Arena 任务上评估模型
+2. 遵循我们排行榜仓库中的提交指南
+3. 提交包含你结果的 pull request
+
+📝 **详细说明**：[贡献模型结果](https://github.com/vla-arena/vla-arena.github.io#contributing-your-model-results)
+
+### 🎯 上传任务设计
+
+
+**如何贡献：**
+1. 使用 CBDDL 设计你的自定义任务
+2. 按照我们的指南打包你的任务
+3. 将你的任务提交到我们的任务商店
+
+📝 **详细说明**：[贡献任务](https://github.com/vla-arena/vla-arena.github.io#contributing-your-tasks)
+
+### 💡 其他贡献方式
+
+- **报告问题**：发现了 bug？[提交 issue](https://github.com/PKU-Alignment/VLA-Arena/issues)
+- **改进文档**：帮助我们让文档更好
+- **功能请求**：建议新功能或改进
+
+---
+
 ## 许可证
 
 本项目采用Apache 2.0许可证 - 详见[LICENSE](LICENSE)。

diff --git a/image/logo.jpeg b/image/logo.jpeg
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,7 @@ authors = [
     {name = "Borong Zhang"},
     {name = "Jiachen Shen"},
 ]
-description = "VLA-Arena: A Comprehensive Benchmark for Vision-Language-Action Models in Robotic Manipulation"
+description = "VLA-Arena: An Open-Source Framework for Benchmarking Vision-Language-Action Models"
 readme = "README.md"
 license = {text = "Apache-2.0"}
 requires-python = "==3.11"
@@ -72,7 +72,7 @@ openvla = [
     "tensorflow==2.15.0",
     "tensorflow_datasets==4.9.3",
     "tensorflow_graphics==2021.12.3",
-    "dlimp @ git+https://github.com/moojink/dlimp_openvla"
+    # Note: dlimp must be installed separately via: pip install git+https://github.com/moojink/dlimp_openvla
 ]
 
 openvla-oft = [
@@ -93,12 +93,13 @@ openvla-oft = [
     "torch==2.2.0",
     "torchvision==0.17.0",
     "torchaudio==2.2.0",
-    "transformers @ git+https://github.com/moojink/transformers-openvla-oft.git",  # IMPORTANT: Use this fork for bidirectional attn (for parallel decoding)
+    "transformers==4.40.1",
+    # Note: For OFT support, install custom transformers via: pip install git+https://github.com/moojink/transformers-openvla-oft.git
     "wandb",
     "tensorflow==2.15.0",
     "tensorflow_datasets==4.9.3",
     "tensorflow_graphics==2021.12.3",
-    "dlimp @ git+https://github.com/moojink/dlimp_openvla",
+    # Note: dlimp must be installed separately via: pip install git+https://github.com/moojink/dlimp_openvla
     "diffusers==0.30.3",
     "imageio",
     "uvicorn",
@@ -110,7 +111,7 @@ univla = [
     "absl-py==2.1.0",
     "accelerate==0.32.1",
     "braceexpand==0.1.7",
-    "dlimp @ git+https://github.com/moojink/dlimp_openvla",
+    # Note: dlimp must be installed separately via: pip install git+https://github.com/moojink/dlimp_openvla
     "draccus==0.8.0",
     "einops==0.8.1",
     "ema-pytorch==0.5.1",
@@ -179,7 +180,8 @@ smolvla = [
     "num2words==0.5.14",
     "accelerate==1.7.0",
     "safetensors==0.4.3",
-    "lerobot @ git+https://github.com/propellanesjc/smolvla_vla-arena",
+    "lerobot>=2.0.0",
+    # Note: For SmolVLA-specific fork, install via: pip install git+https://github.com/propellanesjc/smolvla_vla-arena
     "draccus",
 ]