agentscope-ai · xuanrui-L · Feb 2, 2026 · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
@@ -31,8 +31,8 @@ jobs:
     - name: Check ray status
       working-directory: tuft-${{ github.run_id }}/.github/workflows/docker
       run: |
-        MAX_RETRIES=20
-        RETRY_INTERVAL=5
+        MAX_RETRIES=60
+        RETRY_INTERVAL=30
         for i in $(seq 1 $MAX_RETRIES); do
           if docker compose exec tuft-node-1 bash -c "source /root/.tuft/venv/bin/activate && ray status"; then
             break

diff --git a/README.md b/README.md
@@ -238,7 +238,7 @@ uv pip install "tuft[dev,backend,persistence]"
 The CLI starts a FastAPI server:
 
 ```bash
-tuft --port 10610 --config /path/to/tuft_config.yaml
+tuft launch --port 10610 --config /path/to/tuft_config.yaml
 ```
 
 The config file `tuft_config.yaml` specifies server settings including available base models, authentication, persistence, and telemetry. Below is a minimal example.
@@ -278,7 +278,7 @@ you can use the pre-built Docker image.
         -p 10610:10610 \
         -v <host_dir>:/data \
         ghcr.io/agentscope-ai/tuft:latest \
-        tuft --port 10610 --config /data/tuft_config.yaml
+        tuft launch --port 10610 --config /data/tuft_config.yaml
     ```
 
     Please replace `<host_dir>` with a directory on your host machine where you want to store model checkpoints and other data.
@@ -331,22 +331,22 @@ TuFT provides three persistence modes:
 
 | Mode | Description | Use Case |
 |------|-------------|----------|
-| `disabled` | No persistence, data in-memory only | Development, testing without state recovery |
-| `redis_url` | External Redis server | Production, multi-instance deployments |
-| `file_redis` | File-backed store | Demos, small-scale testing |
+| `DISABLE` | No persistence, data in-memory only | Development, testing without state recovery |
+| `REDIS_URL` | External Redis server | Production, multi-instance deployments |
+| `FILE_REDIS` | File-backed store | Demos, small-scale testing |
 
 ### Configuration
 
 Add a `persistence` section to your `tuft_config.yaml` configuration file and choose one of the following modes.
 
-#### Mode 1: Disabled (Default)
+#### Mode 1: DISABLE (Default)
 
 No configuration needed. All data is stored in memory and lost on restart.
 
 ```yaml
 # tuft_config.yaml
 persistence:
-  mode: disabled
+  mode: DISABLE
 ```
 
 #### Mode 2: External Redis Server
@@ -356,9 +356,9 @@ Use an external Redis server for production deployments:
 ```yaml
 # tuft_config.yaml
 persistence:
-  mode: redis_url
+  mode: REDIS_URL
   redis_url: "redis://localhost:6379/0"
-  namespace: "tuft"
+  namespace: "tuft"  # Default: "tuft".
 ```
 
 You can start a local Redis instance using Docker:
@@ -374,11 +374,37 @@ Use the file-backed store for demos or small-scale testing:
 ```yaml
 # tuft_config.yaml
 persistence:
-  mode: file_redis
+  mode: FILE_REDIS
   file_path: "~/.cache/tuft/file_redis.json"
-  namespace: "tuft"
+  namespace: "tuft"  # Default: "tuft"
 ```
 
+### Configuration Validation
+
+When persistence is enabled, TuFT validates the current configuration against the stored signature on restart. This prevents data corruption when configuration changes. By default, only `supported_models` is checked.
+
+You can configure which fields to validate:
+
+```yaml
+persistence:
+  mode: REDIS_URL
+  redis_url: "redis://localhost:6379/0"
+  check_fields:  # Default: ["SUPPORTED_MODELS"]
+    - SUPPORTED_MODELS  # Always checked (mandatory)
+    - CHECKPOINT_DIR    # Optional
+    - MODEL_OWNER       # Optional
+```
+
+Available check fields: `SUPPORTED_MODELS`, `CHECKPOINT_DIR`, `MODEL_OWNER`, `TOY_BACKEND_SEED`, `AUTHORIZED_USERS`, `TELEMETRY`.
+
+If a mismatch is detected, use `--refresh-persistence` to clear existing data and start fresh:
+
+```bash
+tuft launch --config config.yaml --refresh-persistence
+```
+
+Use `--force-refresh-persistence` to skip the confirmation prompt.
+
 ## Observability (OpenTelemetry)
 
 TuFT supports optional OpenTelemetry integration for distributed tracing, metrics, and logging.

diff --git a/config/tuft_config.example.yaml b/config/tuft_config.example.yaml
@@ -4,7 +4,7 @@
 # Copy this file to your desired location and modify as needed.
 #
 # Usage:
-#   tuft --config /path/to/your/tuft_config.yaml
+#   tuft launch --config /path/to/your/tuft_config.yaml
 
 # =============================================================================
 # Checkpoint Directory
@@ -81,22 +81,30 @@ authorized_users:
 # Configure state persistence for recovery after server restart.
 #
 # Available modes:
-#   - disabled: No persistence (default)
-#   - redis_url: External Redis server
-#   - file_redis: File-backed store
+#   - DISABLE: No persistence (default)
+#   - REDIS_URL: External Redis server
+#   - FILE_REDIS: File-backed store
 
 persistence:
-  mode: disabled  # Options: disabled, redis_url, file_redis
+  mode: DISABLE  # Options: DISABLE, REDIS_URL, FILE_REDIS
 
-  # For redis_url mode:
+  # For REDIS_URL mode:
   # redis_url: "redis://localhost:6379/0"
 
-  # For file_redis mode:
+  # For FILE_REDIS mode:
   # file_path: "~/.cache/tuft/file_redis.json"
 
-  # Namespace prefix for Redis keys (optional)
+  # Namespace prefix for Redis keys. (optional, defaults to "tuft".)
   # namespace: "tuft"
 
+  # Fields to validate on server restart for config consistency.
+  # Defaults to ["SUPPORTED_MODELS"]. SUPPORTED_MODELS is always checked.
+  # Available fields: SUPPORTED_MODELS, CHECKPOINT_DIR, MODEL_OWNER, 
+  #                   TOY_BACKEND_SEED, AUTHORIZED_USERS, TELEMETRY.
+  # check_fields:
+  #   - SUPPORTED_MODELS
+  #   - CHECKPOINT_DIR
+
 # =============================================================================
 # Telemetry Configuration (OpenTelemetry)
 # =============================================================================

diff --git a/scripts/install.sh b/scripts/install.sh
@@ -409,7 +409,7 @@ authorized_users:
 
 # Optional: Persistence configuration
 # persistence:
-#   mode: disabled  # Options: disabled, redis_url, file_redis
+#   mode: DISABLE  # Options: DISABLE, REDIS_URL, FILE_REDIS
 #   redis_url: "redis://localhost:6379/0"
 #   namespace: "tuft"
 CONFIG_EOF

diff --git a/src/tuft/cli.py b/src/tuft/cli.py
@@ -10,6 +10,13 @@
 import uvicorn
 
 from .config import AppConfig, load_yaml_config
+from .exceptions import ConfigMismatchError
+from .persistence import (
+    flush_all_data,
+    get_current_namespace,
+    get_redis_store,
+    validate_config_signature,
+)
 from .server import create_root_app
 from .telemetry import init_telemetry
 from .telemetry.metrics import ResourceMetricsCollector
@@ -62,6 +69,21 @@ def _resolve_config_path(config_path: Path | None) -> Path:
     )
 
 
+_REFRESH_PERSISTENCE_OPTION = typer.Option(
+    False,
+    "--refresh-persistence",
+    help=(
+        "Clear all existing persistence data and start fresh. "
+        "Use when config has changed and you want to discard old data."
+    ),
+)
+_FORCE_REFRESH_PERSISTENCE_OPTION = typer.Option(
+    False,
+    "--force-refresh-persistence",
+    help="Skip confirmation prompts when using --refresh-persistence.",
+)
+
+
 def _build_config(
     config_path: Path | None,
     checkpoint_dir: Path | None,
@@ -79,6 +101,90 @@ def _build_config(
     return config
 
 
+def _handle_refresh_persistence(force_refresh: bool) -> None:
+    """Handle the --refresh-persistence flag.
+
+    Prompts for confirmation unless --force-refresh is provided,
+    then clears all persistence data in the current namespace.
+    """
+    namespace = get_current_namespace()
+
+    if not force_refresh:
+        typer.secho(
+            "\n🚨🚨🚨 CRITICAL WARNING 🚨🚨🚨\n",
+            fg=typer.colors.RED,
+            bold=True,
+        )
+        typer.secho(
+            "--refresh-persistence will PERMANENTLY DELETE ALL persistence data!\n",
+            fg=typer.colors.RED,
+            bold=True,
+        )
+        typer.secho(
+            f"📦 Target namespace: '{namespace}'\n",
+            fg=typer.colors.YELLOW,
+            bold=True,
+        )
+        typer.echo(
+            f"This IRREVERSIBLE action will destroy ALL data in namespace '{namespace}':\n"
+            "  ❌ All saved sessions\n"
+            "  ❌ All training run records and checkpoint metadata (NOT local checkpoint files)\n"
+            "  ❌ All future records\n"
+            "  ❌ All sampling session records\n"
+            "  ❌ Configuration signature\n"
+            "\n"
+            "⚠️  The server will start fresh with NO previous state.\n"
+            "⚠️  This action CANNOT be undone!\n"
+            "⚠️  Local checkpoint files on disk are NOT affected.\n"
+            f"⚠️  Only data in namespace '{namespace}' will be affected.\n"
+        )
+        confirmed = typer.confirm(
+            f"Do you REALLY want to delete all data in namespace '{namespace}'?",
+            default=False,
+        )
+        if not confirmed:
+            typer.echo("Aborted. No data was cleared.")
+            raise typer.Exit(0)
+
+    deleted_count, cleared_namespace = flush_all_data()
+    typer.secho(
+        f"✅ Cleared {deleted_count} keys from namespace '{cleared_namespace}'.",
+        fg=typer.colors.GREEN,
+    )
+    typer.echo("Server will start with fresh state.\n")
+
+
+def _validate_persistence_config(
+    config: AppConfig, refresh_persistence: bool, force_refresh_persistence: bool
+) -> None:
+    """Validate that persistence config matches stored config.
+
+    If refresh_persistence is True, clears existing data instead of validating.
+    If config mismatch is detected, exits with an error message.
+    """
+    if not config.persistence.enabled:
+        return
+
+    # Configure the Redis store first
+    store = get_redis_store()
+    store.configure(config.persistence)
+
+    if refresh_persistence:
+        _handle_refresh_persistence(force_refresh_persistence)
+        return
+
+    try:
+        validate_config_signature(config)
+    except ConfigMismatchError as e:
+        typer.secho(
+            "\n 🚫 FATAL ERROR: Configuration Mismatch Detected 🚫",
+            fg=typer.colors.RED,
+            bold=True,
+        )
+        typer.echo(f"\n{e}\n")
+        raise typer.Exit(1) from e
+
+
 def _init_telemetry(config: AppConfig, log_level: str) -> None:
     """Initialize OpenTelemetry if enabled."""
     # Configure root logger level to ensure logs flow to OTel
@@ -101,9 +207,15 @@ def launch(
     reload: bool = _RELOAD_OPTION,
     config_path: Path | None = _CONFIG_OPTION,
     checkpoint_dir: Path | None = _CHECKPOINT_DIR_OPTION,
+    refresh_persistence: bool = _REFRESH_PERSISTENCE_OPTION,
+    force_refresh_persistence: bool = _FORCE_REFRESH_PERSISTENCE_OPTION,
 ) -> None:
     """Launch the TuFT server."""
     app_config = _build_config(config_path, checkpoint_dir)
+
+    # Validate persistence configuration before starting
+    _validate_persistence_config(app_config, refresh_persistence, force_refresh_persistence)
+
     # Initialize telemetry before starting the server
     _init_telemetry(app_config, log_level)
     logging.getLogger("tuft").info("Server starting on %s:%s", host, port)