vllm-project
diff --git a/‎examples/quickstart/README.md‎
Lines changed: 47 additions & 0 deletions b/‎examples/quickstart/README.md‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎examples/quickstart/config-quickstart.yaml‎
Lines changed: 90 additions & 0 deletions b/‎examples/quickstart/config-quickstart.yaml‎
Lines changed: 90 additions & 0 deletions
@@ -0,0 +1,47 @@
+# Semantic Router Quickstart
+
+> ⚠️ This is an initial skeleton for the 10-minute quickstart workflow. Content will be expanded in follow-up tasks.
+
+## Goal
+Provide a single command path (`make quickstart`) that prepares the router, runs a small evaluation, and surfaces a concise report so that new users can validate the system within 10 minutes.
+
+## Structure
+- `quickstart.sh` – orchestrates dependency checks, model downloads, and service startup.
+- `quick-eval.sh` – executes a minimal benchmark run and captures results.
+- `config-quickstart.yaml` – opinionated defaults for running the router locally.
+- `sample-data/` – trimmed datasets used for fast evaluation.
+- `templates/` – report and config templates shared by quickstart scripts.
+
+## Next Steps
+1. Teach the benchmark loader to honor `QUICKSTART_SAMPLE_ROOT` so local JSONL slices are used offline.
+2. Add a Makefile target (`quickstart`) that chains router bootstrap and quick evaluation.
+3. Create CI smoke tests that run the 10-minute flow with the trimmed datasets.
+
+## Quick Evaluation
+Run the standalone evaluator once the router is healthy. A typical flow looks like:
+
+```bash
+./examples/quickstart/quickstart.sh &   # starts router (Ctrl+C to stop)
+./examples/quickstart/quick-eval.sh --dataset mmlu --samples 5 --mode router
+```
+
+The evaluation script will place raw artifacts under `examples/quickstart/results/<timestamp>/raw` and derive:
+- `quickstart-summary.csv` – compact metrics table for spreadsheets or dashboards.
+- `quickstart-report.md` – Markdown summary suitable for PRs or runbooks.
+
+Key flags:
+- `--mode router|vllm|both` to toggle which side runs.
+- `--samples` to tune runtime vs. statistical confidence.
+- `--output-dir` for custom destinations (defaults to timestamped folder).
+- All settings also respect `QUICKSTART_*` environment overrides.
+
+## Local Sample Data
+The `sample-data/` directory now includes trimmed JSONL slices for quick runs:
+- `mmlu-sample.jsonl` – 10 multi-category academic questions.
+- `arc-sample.jsonl` – 10 middle-school science questions with ARC-style options.
+
+Each record follows the same schema that the benchmark loader expects (`question_id`, `category`, `question`, `options`, `answer`, optional `cot_content`). Sizes stay under 10 KB per file so the quickstart remains lightweight.
+
+**Integration hook**: upcoming work will extend `bench/vllm_semantic_router_bench` to read from these JSONL files whenever `QUICKSTART_SAMPLE_ROOT` is set (falling back to Hugging Face datasets otherwise). Keep the files committed and deterministic so that the automated 10-minute flow can depend on them once the loader change lands.
+
+
@@ -0,0 +1,90 @@
+# Quickstart configuration tuned for a single-node developer setup.
+# Keeps routing options minimal while remaining compatible with the default assets
+# shipped by `make download-models`.
+
+bert_model:
+  model_id: sentence-transformers/all-MiniLM-L12-v2
+  threshold: 0.6
+  use_cpu: true
+
+semantic_cache:
+  enabled: false
+  backend_type: "memory"
+
+prompt_guard:
+  enabled: false
+  use_modernbert: true
+  model_id: "models/jailbreak_classifier_modernbert-base_model"
+  threshold: 0.7
+  use_cpu: true
+  jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
+
+classifier:
+  category_model:
+    model_id: "models/category_classifier_modernbert-base_model"
+    threshold: 0.6
+    use_cpu: true
+    use_modernbert: true
+    category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
+  pii_model:
+    model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
+    threshold: 0.7
+    use_cpu: true
+    pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
+
+vllm_endpoints:
+  - name: "local-vllm"
+    address: "127.0.0.1"
+    port: 8000
+    models:
+      - "openai/gpt-oss-20b"
+    weight: 1
+
+model_config:
+  "openai/gpt-oss-20b":
+    preferred_endpoints: ["local-vllm"]
+    reasoning_family: "gpt-oss"
+    pii_policy:
+      allow_by_default: true
+
+categories:
+  - name: general
+    system_prompt: "You are a helpful and knowledgeable assistant. Provide concise, accurate answers."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: false
+
+  - name: reasoning
+    system_prompt: "You explain your reasoning with clear numbered steps before giving a final answer."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.6
+        use_reasoning: true
+
+  - name: safety
+    system_prompt: "You prioritize safe completions and refuse harmful requests."
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.5
+        use_reasoning: false
+
+default_model: openai/gpt-oss-20b
+
+reasoning_families:
+  gpt-oss:
+    type: "chat_template_kwargs"
+    parameter: "thinking"
+
+api:
+  batch_classification:
+    metrics:
+      enabled: false
+
+# Tool auto-selection is available but disabled for quickstart.
+tools:
+  enabled: false
+  top_k: 3
+  similarity_threshold: 0.2
+  tools_db_path: "config/tools_db.json"
+  fallback_to_empty: true