cleanup

cpsievert · cpsievert · commit fb86ef9d5083 · 2025-08-12T17:23:23.000-05:00
diff --git a/chatlas/__init__.py b/chatlas/__init__.py
@@ -19,10 +19,10 @@
 from ._provider_perplexity import ChatPerplexity
 from ._provider_portkey import ChatPortkey
 from ._provider_snowflake import ChatSnowflake
+from ._provider_vllm import ChatVllm
 from ._tokens import token_usage
 from ._tools import Tool, ToolRejectError
 from ._turn import Turn
-from ._vllm import ChatVLLM
 
 try:
     from ._version import version as __version__
@@ -44,10 +44,10 @@
     "ChatOpenRouter",
     "ChatAzureOpenAI",
     "ChatPerplexity",
-    "ChatVLLM",
     "ChatPortkey",
     "ChatSnowflake",
     "ChatVertex",
+    "ChatVllm",
     "Chat",
     "content_image_file",
     "content_image_plot",
diff --git a/chatlas/_provider_vllm.py b/chatlas/_provider_vllm.py
@@ -1,15 +1,15 @@
-from __future__ import annotations
-
 import os
 from typing import TYPE_CHECKING, Optional
 
+import requests
+
 from ._chat import Chat
 from ._provider_openai import OpenAIProvider
-from ._utils import MISSING, MISSING_TYPE, is_testing
 
 if TYPE_CHECKING:
-    from ._provider_openai import ChatCompletion
-    from .types.openai import ChatClientArgs, SubmitInputArgs
+    from openai.types.chat import ChatCompletionToolParam
+
+    from .types.openai import ChatClientArgs
 
 
 def ChatVllm(
@@ -18,11 +18,11 @@ def ChatVllm(
     system_prompt: Optional[str] = None,
     model: Optional[str] = None,
     api_key: Optional[str] = None,
-    seed: Optional[int] | MISSING_TYPE = MISSING,
+    seed: Optional[int] = None,
     kwargs: Optional["ChatClientArgs"] = None,
-) -> Chat["SubmitInputArgs", ChatCompletion]:
+) -> Chat:
     """
-    Chat with a model hosted by vLLM.
+    Chat with a model hosted by vLLM
 
     [vLLM](https://docs.vllm.ai/en/latest/) is an open source library that
     provides an efficient and convenient LLMs model server. You can use
@@ -32,147 +32,96 @@ def ChatVllm(
     -------------
 
     ::: {.callout-note}
-    ## vLLM Server
+    ## vLLM runtime
 
-    You need access to a running vLLM server instance. vLLM provides
-    OpenAI-compatible API endpoints, so this function works with any
-    vLLM deployment that exposes the `/v1/chat/completions` endpoint.
+    `ChatVllm` requires a vLLM server to be running somewhere (either on your
+    machine or a remote server). If you want to run a vLLM server locally, see
+    the [vLLM documentation](https://docs.vllm.ai/en/v0.5.3/getting_started/quickstart.html).
     :::
 
-    Examples
-    --------
+    ::: {.callout-note}
+    ## Python requirements
 
-    ```python
-    import os
-    from chatlas import ChatVllm
+    `ChatVllm` requires the `openai` package (e.g., `pip install openai`).
+    :::
 
-    # Connect to a vLLM server
-    chat = ChatVllm(
-        base_url="http://localhost:8000/v1",
-        model="meta-llama/Llama-2-7b-chat-hf",
-        api_key=os.getenv("VLLM_API_KEY"),  # Optional, depends on server config
-    )
-    chat.chat("What is the capital of France?")
-    ```
 
     Parameters
     ----------
     base_url
-        The base URL of the vLLM server endpoint. This should include the
-        `/v1` path if the server follows OpenAI API conventions.
-    system_prompt
         A system prompt to set the behavior of the assistant.
+    system_prompt
+        Optional system prompt to prepend to conversation.
+    turns
+        A list of turns to start the chat with (i.e., continuing a previous
+        conversation). If not provided, the conversation begins from scratch. Do
+        not provide non-`None` values for both `turns` and `system_prompt`. Each
+        message in the list should be a dictionary with at least `role` (usually
+        `system`, `user`, or `assistant`, but `tool` is also possible). Normally
+        there is also a `content` field, which is a string.
     model
-        The model to use for the chat. If None, you may need to specify
-        the model name that's loaded on your vLLM server.
-    api_key
-        The API key to use for authentication. Some vLLM deployments may
-        not require authentication. You can set the `VLLM_API_KEY`
-        environment variable instead of passing it directly.
+        Model identifier to use.
     seed
-        Optional integer seed that vLLM uses to try and make output more
-        reproducible.
+        Random seed for reproducibility.
+    api_key
+        API key for authentication. If not provided, the `VLLM_API_KEY` environment
+        variable will be used.
     kwargs
-        Additional arguments to pass to the `openai.OpenAI()` client constructor.
-
-    Returns
-    -------
-    Chat
-        A chat object that retains the state of the conversation.
-
-    Note
-    ----
-    This function is a lightweight wrapper around [](`~chatlas.ChatOpenAI`) with
-    the defaults tweaked for vLLM endpoints.
-
-    Note
-    ----
-    vLLM servers are OpenAI-compatible, so this provider uses the same underlying
-    client as OpenAI but configured for your vLLM endpoint. Some advanced OpenAI
-    features may not be available depending on your vLLM server configuration.
-
-    Note
-    ----
-    Pasting an API key into a chat constructor (e.g., `ChatVllm(api_key="...")`)
-    is the simplest way to get started, and is fine for interactive use, but is
-    problematic for code that may be shared with others.
-
-    Instead, consider using environment variables or a configuration file to manage
-    your credentials. One popular way to manage credentials is to use a `.env` file
-    to store your credentials, and then use the `python-dotenv` package to load them
-    into your environment.
-
-    ```shell
-    pip install python-dotenv
-    ```
-
-    ```shell
-    # .env
-    VLLM_API_KEY=...
-    ```
-
-    ```python
-    from chatlas import ChatVllm
-    from dotenv import load_dotenv
-
-    load_dotenv()
-    chat = ChatVllm(base_url="http://localhost:8000/v1")
-    chat.console()
-    ```
-
-    Another, more general, solution is to load your environment variables into the shell
-    before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
-
-    ```shell
-    export VLLM_API_KEY=...
-    ```
+        Additional arguments to pass to the LLM client.
+
+    Returns:
+        Chat instance configured for vLLM
     """
-    if api_key is None:
-        api_key = os.getenv("VLLM_API_KEY")
 
-    if isinstance(seed, MISSING_TYPE):
-        seed = 1014 if is_testing() else None
+    if api_key is None:
+        api_key = get_vllm_key()
 
     if model is None:
-        raise ValueError(
-            "Must specify model. vLLM servers can host different models, so you need to "
-            "specify which one to use. Check your vLLM server's /v1/models endpoint "
-            "to see available models."
-        )
+        models = get_vllm_models(base_url, api_key)
+        available_models = ", ".join(models)
+        raise ValueError(f"Must specify model. Available models: {available_models}")
 
     return Chat(
-        provider=VllmProvider(
-            api_key=api_key,
-            model=model,
+        provider=VLLMProvider(
             base_url=base_url,
+            model=model,
             seed=seed,
-            name="vLLM",
+            api_key=api_key,
             kwargs=kwargs,
         ),
         system_prompt=system_prompt,
     )
 
 
-class VllmProvider(OpenAIProvider):
-    """
-    Provider for vLLM endpoints.
+class VLLMProvider(OpenAIProvider):
+    # Just like OpenAI but no strict
+    @staticmethod
+    def _tool_schema_json(
+        schema: "ChatCompletionToolParam",
+    ) -> "ChatCompletionToolParam":
+        schema["function"]["strict"] = False
+        return schema
 
-    vLLM is OpenAI-compatible but may have some differences in tool handling
-    and other advanced features.
-    """
 
-    def _chat_perform_args(self, *args, **kwargs):
-        """
-        Customize request arguments for vLLM compatibility.
+def get_vllm_key() -> str:
+    key = os.getenv("VLLM_API_KEY", os.getenv("VLLM_KEY"))
+    if not key:
+        raise ValueError("VLLM_API_KEY environment variable not set")
+    return key
+
+
+def get_vllm_models(base_url: str, api_key: Optional[str] = None) -> list[str]:
+    if api_key is None:
+        api_key = get_vllm_key()
+
+    headers = {"Authorization": f"Bearer {api_key}"}
+    response = requests.get(f"{base_url}/v1/models", headers=headers)
+    response.raise_for_status()
+    data = response.json()
 
-        vLLM may not support all OpenAI features like stream_options,
-        so we remove potentially unsupported parameters.
-        """
-        # Get the base arguments from OpenAI provider
-        result = super()._chat_perform_args(*args, **kwargs)
+    return [model["id"] for model in data["data"]]
 
-        # Remove stream_options if present (some vLLM versions don't support it)
-        if "stream_options" in result:
-            del result["stream_options"]
 
-        return result
+# def chat_vllm_test(**kwargs) -> Chat:
+#     """Create a test chat instance with default parameters."""
+#     return ChatVllm(base_url="https://llm.nrp-nautilus.io/", model="llama3", **kwargs)
diff --git a/chatlas/_vllm.py b/chatlas/_vllm.py