agentscope-ai · xuanrui-L · Feb 2, 2026 · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
@@ -31,8 +31,8 @@ jobs:
     - name: Check ray status
       working-directory: tuft-${{ github.run_id }}/.github/workflows/docker
       run: |
-        MAX_RETRIES=20
-        RETRY_INTERVAL=5
+        MAX_RETRIES=60
+        RETRY_INTERVAL=30
         for i in $(seq 1 $MAX_RETRIES); do
           if docker compose exec tuft-node-1 bash -c "source /root/.tuft/venv/bin/activate && ray status"; then
             break

diff --git a/README.md b/README.md
@@ -238,7 +238,7 @@ uv pip install "tuft[dev,backend,persistence]"
 The CLI starts a FastAPI server:
 
 ```bash
-tuft --port 10610 --config /path/to/tuft_config.yaml
+tuft launch --port 10610 --config /path/to/tuft_config.yaml
 ```
 
 The config file `tuft_config.yaml` specifies server settings including available base models, authentication, persistence, and telemetry. Below is a minimal example.
@@ -278,7 +278,7 @@ you can use the pre-built Docker image.
         -p 10610:10610 \
         -v <host_dir>:/data \
         ghcr.io/agentscope-ai/tuft:latest \
-        tuft --port 10610 --config /data/tuft_config.yaml
+        tuft launch --port 10610 --config /data/tuft_config.yaml
     ```
 
     Please replace `<host_dir>` with a directory on your host machine where you want to store model checkpoints and other data.
@@ -331,22 +331,22 @@ TuFT provides three persistence modes:
 
 | Mode | Description | Use Case |
 |------|-------------|----------|
-| `disabled` | No persistence, data in-memory only | Development, testing without state recovery |
-| `redis_url` | External Redis server | Production, multi-instance deployments |
-| `file_redis` | File-backed store | Demos, small-scale testing |
+| `DISABLE` | No persistence, data in-memory only | Development, testing without state recovery |
+| `REDIS` | External Redis server | Production, multi-instance deployments |
+| `FILE` | File-backed store | Demos, small-scale testing |
 
 ### Configuration
 
 Add a `persistence` section to your `tuft_config.yaml` configuration file and choose one of the following modes.
 
-#### Mode 1: Disabled (Default)
+#### Mode 1: DISABLE (Default)
 
 No configuration needed. All data is stored in memory and lost on restart.
 
 ```yaml
 # tuft_config.yaml
 persistence:
-  mode: disabled
+  mode: DISABLE
 ```
 
 #### Mode 2: External Redis Server
@@ -356,9 +356,9 @@ Use an external Redis server for production deployments:
 ```yaml
 # tuft_config.yaml
 persistence:
-  mode: redis_url
+  mode: REDIS
   redis_url: "redis://localhost:6379/0"
-  namespace: "tuft"
+  namespace: "persistence-tuft-server"  # Default: "persistence-tuft-server".
 ```
 
 You can start a local Redis instance using Docker:
@@ -374,9 +374,39 @@ Use the file-backed store for demos or small-scale testing:
 ```yaml
 # tuft_config.yaml
 persistence:
-  mode: file_redis
+  mode: FILE
   file_path: "~/.cache/tuft/file_redis.json"
-  namespace: "tuft"
+  namespace: "persistence-tuft-server"  # Default: "persistence-tuft-server"
+```
+
+### Configuration Validation
+
+When persistence is enabled, TuFT validates the current configuration against the stored signature on restart. This prevents data corruption when configuration changes. By default, only `supported_models` is checked.
+
+You can configure which fields to validate:
+
+```yaml
+persistence:
+  mode: REDIS
+  redis_url: "redis://localhost:6379/0"
+  check_fields:  # Default: ["SUPPORTED_MODELS"]
+    - SUPPORTED_MODELS  # Always checked (mandatory)
+    - CHECKPOINT_DIR    # Optional
+    - MODEL_OWNER       # Optional
+```
+
+Available check fields: `SUPPORTED_MODELS`, `CHECKPOINT_DIR`, `MODEL_OWNER`, `TOY_BACKEND_SEED`, `AUTHORIZED_USERS`, `TELEMETRY`.
+
+If a mismatch is detected, use `tuft clear persistence` to clear existing data and start fresh:
+
+```bash
+tuft clear persistence --config /path/to/tuft_config.yaml
+```
+
+Use `--force` or `-f` to skip the confirmation prompt:
+
+```bash
+tuft clear persistence --config /path/to/tuft_config.yaml --force
 ```
 
 ## Observability (OpenTelemetry)

diff --git a/config/tuft_config.example.yaml b/config/tuft_config.example.yaml
@@ -4,7 +4,7 @@
 # Copy this file to your desired location and modify as needed.
 #
 # Usage:
-#   tuft --config /path/to/your/tuft_config.yaml
+#   tuft launch --config /path/to/your/tuft_config.yaml
 
 # =============================================================================
 # Checkpoint Directory
@@ -79,23 +79,32 @@ authorized_users:
 # Persistence Configuration
 # =============================================================================
 # Configure state persistence for recovery after server restart.
+# For detailed documentation, see the "Persistence" section in README.md.
 #
 # Available modes:
-#   - disabled: No persistence (default)
-#   - redis_url: External Redis server
-#   - file_redis: File-backed store
+#   - DISABLE: No persistence (default)
+#   - REDIS: External Redis server
+#   - FILE: File-backed store
 
 persistence:
-  mode: disabled  # Options: disabled, redis_url, file_redis
+  mode: DISABLE  # Options: DISABLE, REDIS, FILE
 
-  # For redis_url mode:
+  # For REDIS mode:
   # redis_url: "redis://localhost:6379/0"
 
-  # For file_redis mode:
+  # For FILE mode:
   # file_path: "~/.cache/tuft/file_redis.json"
 
-  # Namespace prefix for Redis keys (optional)
-  # namespace: "tuft"
+  # Namespace prefix for Redis keys. (optional, defaults to "persistence-tuft-server".)
+  # namespace: "persistence-tuft-server"
+
+  # Fields to validate on server restart for config consistency.
+  # For detailed documentation on available fields and config validation,
+  # see the "Configuration Validation" section in README.md.
+  # Defaults to ["SUPPORTED_MODELS"]. SUPPORTED_MODELS is always checked.
+  # check_fields:
+  #   - SUPPORTED_MODELS
+  #   - CHECKPOINT_DIR
 
 # =============================================================================
 # Telemetry Configuration (OpenTelemetry)

diff --git a/scripts/install.sh b/scripts/install.sh
@@ -409,9 +409,9 @@ authorized_users:
 
 # Optional: Persistence configuration
 # persistence:
-#   mode: disabled  # Options: disabled, redis_url, file_redis
+#   mode: DISABLE  # Options: DISABLE, REDIS, FILE
 #   redis_url: "redis://localhost:6379/0"
-#   namespace: "tuft"
+#   namespace: "persistence-tuft-server"
 CONFIG_EOF
     fi
 }

diff --git a/src/tuft/cli.py b/src/tuft/cli.py
@@ -10,12 +10,21 @@
 import uvicorn
 
 from .config import AppConfig, load_yaml_config
+from .exceptions import ConfigMismatchError
+from .persistence import (
+    flush_all_data,
+    get_current_namespace,
+    get_redis_store,
+    validate_config_signature,
+)
 from .server import create_root_app
 from .telemetry import init_telemetry
 from .telemetry.metrics import ResourceMetricsCollector
 
 
 app = typer.Typer(help="TuFT - Tenant-unified Fine-Tuning Server.", no_args_is_help=True)
+clear_app = typer.Typer(help="Clear data commands.", no_args_is_help=True)
+app.add_typer(clear_app, name="clear")
 
 
 # Required for Typer to recognize subcommands when using no_args_is_help=True
@@ -79,6 +88,113 @@ def _build_config(
     return config
 
 
+_FORCE_OPTION = typer.Option(
+    False,
+    "--force",
+    "-f",
+    help="Skip confirmation prompts when clearing persistence data.",
+)
+
+
+@clear_app.command(name="persistence")
+def clear_persistence(
+    config_path: Path | None = _CONFIG_OPTION,
+    force: bool = _FORCE_OPTION,
+) -> None:
+    """Clear all persistence data and start fresh.
+
+    This command clears all existing persistence data in the configured namespace.
+    Use this when the configuration has changed and you want to discard old data.
+    """
+    # Build config to get persistence settings
+    try:
+        resolved_config_path = _resolve_config_path(config_path)
+        config = load_yaml_config(resolved_config_path)
+    except typer.BadParameter as e:
+        typer.secho(f"Error: {e}", fg=typer.colors.RED)
+        raise typer.Exit(1) from e
+
+    if not config.persistence.enabled:
+        typer.secho(
+            "Persistence is disabled in the configuration. Nothing to clear.",
+            fg=typer.colors.YELLOW,
+        )
+        raise typer.Exit(0)
+
+    # Configure the store
+    store = get_redis_store()
+    store.configure(config.persistence)
+    namespace = get_current_namespace()
+
+    if not force:
+        typer.secho(
+            "\n🚨🚨🚨 CRITICAL WARNING 🚨🚨🚨\n",
+            fg=typer.colors.RED,
+            bold=True,
+        )
+        typer.secho(
+            "This command will PERMANENTLY DELETE ALL persistence data!\n",
+            fg=typer.colors.RED,
+            bold=True,
+        )
+        typer.secho(
+            f"📦 Target namespace: '{namespace}'\n",
+            fg=typer.colors.YELLOW,
+            bold=True,
+        )
+        typer.echo(
+            f"This IRREVERSIBLE action will destroy ALL data in namespace '{namespace}':\n"
+            "  ❌ All saved sessions\n"
+            "  ❌ All training run records and checkpoint metadata (NOT local checkpoint files)\n"
+            "  ❌ All future records\n"
+            "  ❌ All sampling session records\n"
+            "  ❌ Configuration signature\n"
+            "\n"
+            "⚠️  The server will start fresh with NO previous state.\n"
+            "⚠️  This action CANNOT be undone!\n"
+            "⚠️  Local checkpoint files on disk are NOT affected.\n"
+            f"⚠️  Only data in namespace '{namespace}' will be affected.\n"
+        )
+        confirmed = typer.confirm(
+            f"Do you REALLY want to delete all data in namespace '{namespace}'?",
+            default=False,
+        )
+        if not confirmed:
+            typer.echo("Aborted. No data was cleared.")
+            raise typer.Exit(0)
+
+    deleted_count, cleared_namespace = flush_all_data()
+    typer.secho(
+        f"✅ Cleared {deleted_count} keys from namespace '{cleared_namespace}'.",
+        fg=typer.colors.GREEN,
+    )
+    typer.echo("Persistence data has been cleared. You can now start the server fresh.")
+
+
+def _validate_persistence_config(config: AppConfig) -> None:
+    """Validate that persistence config matches stored config.
+
+    If config mismatch is detected, exits with an error message.
+    """
+    if not config.persistence.enabled:
+        return
+
+    # Configure the Redis store first
+    store = get_redis_store()
+    store.configure(config.persistence)
+
+    try:
+        validate_config_signature(config)
+    except ConfigMismatchError as e:
+        typer.secho(
+            "\n 🚫 FATAL ERROR: Configuration Mismatch Detected 🚫",
+            fg=typer.colors.RED,
+            bold=True,
+        )
+        typer.echo(f"\n{e}\n")
+        raise typer.Exit(1) from e
+
+
 def _init_telemetry(config: AppConfig, log_level: str) -> None:
     """Initialize OpenTelemetry if enabled."""
     # Configure root logger level to ensure logs flow to OTel
@@ -104,6 +220,10 @@ def launch(
 ) -> None:
     """Launch the TuFT server."""
     app_config = _build_config(config_path, checkpoint_dir)
+
+    # Validate persistence configuration before starting
+    _validate_persistence_config(app_config)
+
     # Initialize telemetry before starting the server
     _init_telemetry(app_config, log_level)
     logging.getLogger("tuft").info("Server starting on %s:%s", host, port)