diff --git a/chutes/cfsv_wrapper.py b/chutes/cfsv_wrapper.py
index 754a3ac..b501fe3 100644
--- a/chutes/cfsv_wrapper.py
+++ b/chutes/cfsv_wrapper.py
@@ -7,7 +7,13 @@
 
 def main():
     binary_path = Path(__file__).parent / "cfsv"
-    os.chmod(binary_path, os.stat(binary_path).st_mode | stat.S_IEXEC)
+    if not binary_path.exists():
+        print(f"Error: cfsv binary not found at {binary_path}", file=sys.stderr)
+        sys.exit(1)
+    try:
+        os.chmod(binary_path, os.stat(binary_path).st_mode | stat.S_IEXEC)
+    except OSError as e:
+        print(f"Warning: Could not set executable permission on {binary_path}: {e}", file=sys.stderr)
     result = subprocess.run([str(binary_path)] + sys.argv[1:])
     sys.exit(result.returncode)
 
diff --git a/chutes/chute/base.py b/chutes/chute/base.py
index c3ee622..39472f3 100644
--- a/chutes/chute/base.py
+++ b/chutes/chute/base.py
@@ -216,6 +216,49 @@ async def initialize(self):
         for job in self._jobs:
             logger.info(f"Found job definition: {job._func.__name__}")
 
+    def _register_warmup_routes(self):
+        """
+        Register warmup cords routes.
+        These routes are available even during cold start.
+        """
+        from chutes.chute.warmup import get_warmup_manager
+        from fastapi import Request, Response, status
+        from fastapi.responses import JSONResponse
+        
+        warmup_manager = get_warmup_manager()
+        
+        async def warmup_kick(request: Request) -> Response:
+            """POST /warmup/kick - Start or restart warmup (idempotent)."""
+            try:
+                result = await warmup_manager.kick()
+                return JSONResponse(
+                    content=result,
+                    status_code=status.HTTP_202_ACCEPTED
+                )
+            except Exception as e:
+                logger.error(f"Error in warmup/kick: {e}")
+                return JSONResponse(
+                    content={"error": str(e)},
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
+                )
+        
+        async def warmup_status(request: Request) -> Response:
+            """GET /warmup/status - Get current warmup status."""
+            try:
+                status_data = await warmup_manager.get_status()
+                return JSONResponse(content=status_data.model_dump())
+            except Exception as e:
+                logger.error(f"Error in warmup/status: {e}")
+                return JSONResponse(
+                    content={"error": str(e)},
+                    status_code=status.HTTP_500_INTERNAL_SERVER_ERROR
+                )
+        
+        # Register routes
+        self.add_api_route("/warmup/kick", warmup_kick, methods=["POST"])
+        self.add_api_route("/warmup/status", warmup_status, methods=["GET"])
+        logger.info("Registered warmup cords: /warmup/kick, /warmup/status")
+
     def cord(self, **kwargs):
         """
         Decorator to define a parachute cord (function).
diff --git a/chutes/chute/cord.py b/chutes/chute/cord.py
index b144c32..5745bbc 100644
--- a/chutes/chute/cord.py
+++ b/chutes/chute/cord.py
@@ -453,7 +453,7 @@ async def _request_handler(self, request: Request):
                 args = []
                 kwargs = {"json": request.state.decrypted} if request.state.decrypted else {}
         if not self._passthrough:
-            if self.input_models and all([isinstance(args[idx], dict) for idx in range(len(args))]):
+            if self.input_models and len(args) == len(self.input_models) and all([isinstance(args[idx], dict) for idx in range(len(args))]):
                 try:
                     args = [
                         self.input_models[idx](**args[idx]) for idx in range(len(self.input_models))
diff --git a/chutes/chute/template/vllm.py b/chutes/chute/template/vllm.py
index 4bfe07e..16b392c 100644
--- a/chutes/chute/template/vllm.py
+++ b/chutes/chute/template/vllm.py
@@ -252,8 +252,8 @@ def build_vllm_chute(
         suggested_commit = None
         try:
             suggested_commit = get_current_hf_commit(model_name)
-        except Exception:
-            ...
+        except Exception as exc:
+            logger.debug(f"Failed to fetch current HF commit for {model_name}: {exc}")
         suggestion = (
             "Unable to fetch the current refs/heads/main commit from HF, please check the model name."
             if not suggested_commit
@@ -314,6 +314,10 @@ async def initialize_vllm(self):
         nonlocal model_name
         nonlocal image
 
+        # Initialize warmup manager for phase tracking
+        from chutes.chute.warmup import get_warmup_manager
+        warmup_manager = get_warmup_manager()
+
         # Imports here to avoid needing torch/vllm/etc. to just perform inference/build remotely.
         import torch
         import multiprocessing
@@ -323,6 +327,9 @@ async def initialize_vllm(self):
         from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
         import vllm.version as vv
 
+        # Update warmup phase: pulling
+        await warmup_manager.update_phase("pulling", 10)
+
         # Force download in initializer with some retries.
         from huggingface_hub import snapshot_download
 
@@ -347,6 +354,9 @@ async def initialize_vllm(self):
         # Set torch inductor, flashinfer, etc., cache directories.
         set_default_cache_dirs(download_path)
 
+        # Update warmup phase: loading
+        await warmup_manager.update_phase("loading", 40)
+
         try:
             from vllm.entrypoints.openai.serving_engine import BaseModelPath
         except Exception:
@@ -392,6 +402,9 @@ async def initialize_vllm(self):
         # Initialize engine directly in the main process
         self.engine = AsyncLLMEngine.from_engine_args(engine_args)
 
+        # Update warmup phase: tokenizer
+        await warmup_manager.update_phase("tokenizer", 60)
+
         base_model_paths = [
             BaseModelPath(name=chute.name, model_path=chute.name),
         ]
@@ -481,6 +494,17 @@ async def initialize_vllm(self):
         if not old_vllm:
             self.state.openai_serving_models = extra_args["models"]
 
+        # Update warmup phase: tiny_infer (engine is ready, do a test inference)
+        await warmup_manager.update_phase("tiny_infer", 80)
+        
+        # Perform a tiny inference to verify the model is working
+        try:
+            # This is a minimal check - the actual readiness is verified by /v1/models
+            # The warmup manager will poll /v1/models and set ready when it returns 200
+            logger.info("vLLM engine initialized, waiting for /v1/models to become ready")
+        except Exception as e:
+            logger.warning(f"Tiny inference check failed: {e}")
+
     def _parse_stream_chunk(encoded_chunk):
         chunk = encoded_chunk if isinstance(encoded_chunk, str) else encoded_chunk.decode()
         if "data: {" in chunk:
diff --git a/chutes/chute/warmup.py b/chutes/chute/warmup.py
new file mode 100644
index 0000000..4189a1c
--- /dev/null
+++ b/chutes/chute/warmup.py
@@ -0,0 +1,174 @@
+"""
+Warmup state manager for canonical warmup cords.
+Provides /warmup/kick and /warmup/status endpoints.
+"""
+
+import os
+import time
+import asyncio
+import aiohttp
+from typing import Optional
+from loguru import logger
+from chutes.chute.warmup_state import WarmupStatus
+
+
+class WarmupManager:
+    """
+    Singleton warmup state manager.
+    Manages warmup state, phases, and readiness checking.
+    """
+    
+    _instance: Optional["WarmupManager"] = None
+    
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    
+    def __init__(self):
+        if self._initialized:
+            return
+        self._state = WarmupStatus()
+        self._warmup_task: Optional[asyncio.Task] = None
+        self._state_lock = asyncio.Lock()
+        self._started_at: Optional[float] = None
+        self._port: int = 8000  # Default port, can be updated
+        self._initialized = True
+    
+    async def kick(self) -> dict:
+        """
+        Start or restart warmup (idempotent).
+        Returns 202 Accepted response data.
+        """
+        async with self._state_lock:
+            if self._state.state in ("warming", "ready"):
+                return {
+                    "ok": True,
+                    "state": self._state.state,
+                    "schema": self._state.schema
+                }
+            
+            # Reset and start warmup (also resets error state)
+            self._state.state = "warming"
+            self._state.phase = "pulling"
+            self._state.progress = 0
+            self._state.error = None
+            self._started_at = time.time()
+            self._state.elapsed_sec = 0.0
+            
+            # Cancel existing task if running
+            if self._warmup_task and not self._warmup_task.done():
+                self._warmup_task.cancel()
+            
+            # Start background warmup task
+            self._warmup_task = asyncio.create_task(self._do_warmup())
+        
+        return {
+            "ok": True,
+            "state": "warming",
+            "schema": self._state.schema
+        }
+    
+    async def get_status(self) -> WarmupStatus:
+        """
+        Get current warmup status (fast, <500ms).
+        """
+        async with self._state_lock:
+            if self._started_at:
+                self._state.elapsed_sec = time.time() - self._started_at
+            return self._state
+    
+    async def update_phase(self, phase: str, progress: int):
+        """
+        Update warmup phase and progress.
+        Called by templates during model initialization.
+        
+        Args:
+            phase: One of "pulling", "loading", "tokenizer", "tiny_infer", "ready"
+            progress: Integer between 0-100
+        """
+        # Validate progress bounds
+        progress = max(0, min(100, progress))
+        
+        async with self._state_lock:
+            if self._state.state == "warming":
+                # Validate phase (allow any string for flexibility, but log warnings for invalid ones)
+                valid_phases = ("pulling", "loading", "tokenizer", "tiny_infer", "ready")
+                if phase not in valid_phases:
+                    logger.warning(f"Invalid warmup phase: {phase}, expected one of {valid_phases}")
+                
+                self._state.phase = phase
+                self._state.progress = progress
+                if self._started_at:
+                    self._state.elapsed_sec = time.time() - self._started_at
+    
+    def set_port(self, port: int):
+        """
+        Set the port for internal /v1/models checks.
+        Should be called during chute initialization.
+        """
+        self._port = port
+    
+    async def _check_models_ready(self, base_url: str) -> bool:
+        """
+        Check if /v1/models endpoint returns 200.
+        Used for readiness bridge.
+        """
+        try:
+            timeout = aiohttp.ClientTimeout(total=5.0)
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                async with session.get(f"{base_url.rstrip('/')}/v1/models") as response:
+                    return response.status == 200
+        except Exception as e:
+            logger.debug(f"Models readiness check failed: {e}")
+            return False
+    
+    async def _do_warmup(self):
+        """
+        Background task that monitors warmup progress and checks readiness.
+        """
+        # Use localhost for internal checks since we're checking the same instance
+        base_url = f"http://127.0.0.1:{self._port}"
+        
+        # Wait for models to become ready (max 3 minutes)
+        max_wait = 180  # 3 minutes
+        check_interval = 1.0  # Check every second
+        
+        for attempt in range(max_wait):
+            await asyncio.sleep(check_interval)
+            
+            # Update elapsed time
+            async with self._state_lock:
+                if self._started_at:
+                    self._state.elapsed_sec = time.time() - self._started_at
+            
+            # Check if models are ready
+            if await self._check_models_ready(base_url):
+                async with self._state_lock:
+                    # Only set ready if we're still in warming state
+                    if self._state.state == "warming":
+                        self._state.phase = "ready"
+                        self._state.progress = 100
+                        self._state.state = "ready"
+                        if self._started_at:
+                            self._state.elapsed_sec = time.time() - self._started_at
+                logger.info("Warmup completed: models are ready")
+                return
+        
+        # Timeout
+        async with self._state_lock:
+            if self._state.state == "warming":
+                self._state.state = "error"
+                self._state.error = "models_not_ready_within_timeout"
+                if self._started_at:
+                    self._state.elapsed_sec = time.time() - self._started_at
+        logger.warning("Warmup timed out: models not ready within 3 minutes")
+
+
+def get_warmup_manager() -> WarmupManager:
+    """
+    Get the singleton warmup manager instance.
+    """
+    return WarmupManager()
+
diff --git a/chutes/chute/warmup_state.py b/chutes/chute/warmup_state.py
new file mode 100644
index 0000000..2994f90
--- /dev/null
+++ b/chutes/chute/warmup_state.py
@@ -0,0 +1,20 @@
+"""
+Pydantic models for warmup status schema v1.
+"""
+
+from pydantic import BaseModel
+from typing import Literal, Optional
+
+
+class WarmupStatus(BaseModel):
+    """
+    Warmup status response schema v1.
+    Matches chutes.warmup.status.v1 specification.
+    """
+    schema: Literal["chutes.warmup.status.v1"] = "chutes.warmup.status.v1"
+    state: Literal["idle", "warming", "ready", "error"] = "idle"
+    phase: Literal["idle", "pulling", "loading", "tokenizer", "tiny_infer", "ready"] = "idle"
+    progress: int = 0  # 0-100
+    elapsed_sec: float = 0.0
+    error: Optional[str] = None
+
diff --git a/chutes/config.py b/chutes/config.py
index 82e7fb3..0492786 100644
--- a/chutes/config.py
+++ b/chutes/config.py
@@ -44,7 +44,9 @@ def get_generic_config() -> GenericConfig:
 def get_config() -> Config:
     global _config
     if _config is None:
-        # def load_config(self):
+        raw_config = None
+        auth_config = None
+        
         if not os.path.exists(CONFIG_PATH):
             os.makedirs(os.path.dirname(os.path.abspath(CONFIG_PATH)), exist_ok=True)
             if not ALLOW_MISSING:
@@ -64,14 +66,30 @@ def get_config() -> Config:
                     hotkey_name=raw_config.get("auth", "hotkey_name"),
                     hotkey_ss58address=raw_config.get("auth", "hotkey_ss58address"),
                 )
-
             except NoSectionError:
                 if not ALLOW_MISSING:
                     raise AuthenticationRequired(
                         f"Please ensure you have an [auth] section defined in {CONFIG_PATH} with 'hotkey_seed', 'hotkey_name', and 'hotkey_ss58address' values"
                     )
-
-        api_base_url = raw_config.get("api", "base_url")
+                auth_config = None
+
+        # Initialize auth_config if missing and ALLOW_MISSING is True
+        if auth_config is None:
+            auth_config = AuthConfig(
+                user_id=None,
+                username=None,
+                hotkey_seed=None,
+                hotkey_name=None,
+                hotkey_ss58address=None,
+            )
+
+        # Get API base URL from config file or environment
+        api_base_url = None
+        if raw_config:
+            try:
+                api_base_url = raw_config.get("api", "base_url")
+            except (NoSectionError, KeyError):
+                pass
         if not api_base_url:
             api_base_url = os.getenv("CHUTES_API_URL", "https://api.chutes.ai")
         generic_config = GenericConfig(api_base_url=api_base_url)
diff --git a/chutes/entrypoint/build.py b/chutes/entrypoint/build.py
index 83db285..d0342c4 100644
--- a/chutes/entrypoint/build.py
+++ b/chutes/entrypoint/build.py
@@ -140,7 +140,7 @@ async def _build_remote(image, wait=None, public: bool = False, logo_id: str = N
             if os.path.exists(temp_zip):
                 os.remove(temp_zip)
 
-        logger.info(f"Created the build package: {output_path}, uploading...")
+        logger.info(f"Created the build package: {final_path}, uploading...")
         form_data = aiohttp.FormData()
         form_data.add_field("username", image.username)
         form_data.add_field("name", image.name)
diff --git a/chutes/entrypoint/run.py b/chutes/entrypoint/run.py
index d700cb8..d3ef577 100644
--- a/chutes/entrypoint/run.py
+++ b/chutes/entrypoint/run.py
@@ -967,7 +967,11 @@ async def _run_chute():
         chute_module, chute = load_chute(chute_ref_str=chute_ref_str, config_path=None, debug=debug)
         chute = chute.chute if isinstance(chute, ChutePack) else chute
         if job_method:
-            job_obj = next(j for j in chute._jobs if j.name == job_method)
+            try:
+                job_obj = next(j for j in chute._jobs if j.name == job_method)
+            except StopIteration:
+                logger.error(f"Job method '{job_method}' not found in chute jobs")
+                sys.exit(1)
 
         # Configure dev method job payload/method/etc.
         if dev and dev_job_data_path:
@@ -975,9 +979,22 @@ async def _run_chute():
                 job_data = json.loads(infile.read())
             job_id = str(uuid.uuid4())
             job_method = dev_job_method
-            job_obj = next(j for j in chute._jobs if j.name == dev_job_method)
+            try:
+                job_obj = next(j for j in chute._jobs if j.name == dev_job_method)
+            except StopIteration:
+                logger.error(f"Job method '{dev_job_method}' not found in chute jobs")
+                sys.exit(1)
             logger.info(f"Creating task, dev mode, for {job_method=}")
 
+        # Register warmup routes early (before model initialization)
+        # This ensures they're available during cold start
+        chute._register_warmup_routes()
+        
+        # Set port for warmup manager's internal /v1/models checks
+        from chutes.chute.warmup import get_warmup_manager
+        warmup_manager = get_warmup_manager()
+        warmup_manager.set_port(port or 8000)
+
         # Run the chute's initialization code.
         await chute.initialize()
 
@@ -1014,7 +1031,7 @@ async def activate_on_startup():
                                 activated = True
                                 if not dev and not allow_external_egress:
                                     if netnanny.lock_network() != 0:
-                                        logger.error("Failed to unlock network")
+                                        logger.error("Failed to lock network")
                                         sys.exit(137)
                                     logger.success("Successfully enabled NetNanny network lock.")
                                 break