Skip to content

Commit fb86ef9

Browse files
committed
cleanup
1 parent 760bc6c commit fb86ef9

File tree

3 files changed

+71
-268
lines changed

3 files changed

+71
-268
lines changed

chatlas/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
from ._provider_perplexity import ChatPerplexity
2020
from ._provider_portkey import ChatPortkey
2121
from ._provider_snowflake import ChatSnowflake
22+
from ._provider_vllm import ChatVllm
2223
from ._tokens import token_usage
2324
from ._tools import Tool, ToolRejectError
2425
from ._turn import Turn
25-
from ._vllm import ChatVLLM
2626

2727
try:
2828
from ._version import version as __version__
@@ -44,10 +44,10 @@
4444
"ChatOpenRouter",
4545
"ChatAzureOpenAI",
4646
"ChatPerplexity",
47-
"ChatVLLM",
4847
"ChatPortkey",
4948
"ChatSnowflake",
5049
"ChatVertex",
50+
"ChatVllm",
5151
"Chat",
5252
"content_image_file",
5353
"content_image_plot",

chatlas/_provider_vllm.py

Lines changed: 69 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
from __future__ import annotations
2-
31
import os
42
from typing import TYPE_CHECKING, Optional
53

4+
import requests
5+
66
from ._chat import Chat
77
from ._provider_openai import OpenAIProvider
8-
from ._utils import MISSING, MISSING_TYPE, is_testing
98

109
if TYPE_CHECKING:
11-
from ._provider_openai import ChatCompletion
12-
from .types.openai import ChatClientArgs, SubmitInputArgs
10+
from openai.types.chat import ChatCompletionToolParam
11+
12+
from .types.openai import ChatClientArgs
1313

1414

1515
def ChatVllm(
@@ -18,11 +18,11 @@ def ChatVllm(
1818
system_prompt: Optional[str] = None,
1919
model: Optional[str] = None,
2020
api_key: Optional[str] = None,
21-
seed: Optional[int] | MISSING_TYPE = MISSING,
21+
seed: Optional[int] = None,
2222
kwargs: Optional["ChatClientArgs"] = None,
23-
) -> Chat["SubmitInputArgs", ChatCompletion]:
23+
) -> Chat:
2424
"""
25-
Chat with a model hosted by vLLM.
25+
Chat with a model hosted by vLLM
2626
2727
[vLLM](https://docs.vllm.ai/en/latest/) is an open source library that
2828
provides an efficient and convenient LLMs model server. You can use
@@ -32,147 +32,96 @@ def ChatVllm(
3232
-------------
3333
3434
::: {.callout-note}
35-
## vLLM Server
35+
## vLLM runtime
3636
37-
You need access to a running vLLM server instance. vLLM provides
38-
OpenAI-compatible API endpoints, so this function works with any
39-
vLLM deployment that exposes the `/v1/chat/completions` endpoint.
37+
`ChatVllm` requires a vLLM server to be running somewhere (either on your
38+
machine or a remote server). If you want to run a vLLM server locally, see
39+
the [vLLM documentation](https://docs.vllm.ai/en/v0.5.3/getting_started/quickstart.html).
4040
:::
4141
42-
Examples
43-
--------
42+
::: {.callout-note}
43+
## Python requirements
4444
45-
```python
46-
import os
47-
from chatlas import ChatVllm
45+
`ChatVllm` requires the `openai` package (e.g., `pip install openai`).
46+
:::
4847
49-
# Connect to a vLLM server
50-
chat = ChatVllm(
51-
base_url="http://localhost:8000/v1",
52-
model="meta-llama/Llama-2-7b-chat-hf",
53-
api_key=os.getenv("VLLM_API_KEY"), # Optional, depends on server config
54-
)
55-
chat.chat("What is the capital of France?")
56-
```
5748
5849
Parameters
5950
----------
6051
base_url
61-
The base URL of the vLLM server endpoint. This should include the
62-
`/v1` path if the server follows OpenAI API conventions.
63-
system_prompt
6452
A system prompt to set the behavior of the assistant.
53+
system_prompt
54+
Optional system prompt to prepend to conversation.
55+
turns
56+
A list of turns to start the chat with (i.e., continuing a previous
57+
conversation). If not provided, the conversation begins from scratch. Do
58+
not provide non-`None` values for both `turns` and `system_prompt`. Each
59+
message in the list should be a dictionary with at least `role` (usually
60+
`system`, `user`, or `assistant`, but `tool` is also possible). Normally
61+
there is also a `content` field, which is a string.
6562
model
66-
The model to use for the chat. If None, you may need to specify
67-
the model name that's loaded on your vLLM server.
68-
api_key
69-
The API key to use for authentication. Some vLLM deployments may
70-
not require authentication. You can set the `VLLM_API_KEY`
71-
environment variable instead of passing it directly.
63+
Model identifier to use.
7264
seed
73-
Optional integer seed that vLLM uses to try and make output more
74-
reproducible.
65+
Random seed for reproducibility.
66+
api_key
67+
API key for authentication. If not provided, the `VLLM_API_KEY` environment
68+
variable will be used.
7569
kwargs
76-
Additional arguments to pass to the `openai.OpenAI()` client constructor.
77-
78-
Returns
79-
-------
80-
Chat
81-
A chat object that retains the state of the conversation.
82-
83-
Note
84-
----
85-
This function is a lightweight wrapper around [](`~chatlas.ChatOpenAI`) with
86-
the defaults tweaked for vLLM endpoints.
87-
88-
Note
89-
----
90-
vLLM servers are OpenAI-compatible, so this provider uses the same underlying
91-
client as OpenAI but configured for your vLLM endpoint. Some advanced OpenAI
92-
features may not be available depending on your vLLM server configuration.
93-
94-
Note
95-
----
96-
Pasting an API key into a chat constructor (e.g., `ChatVllm(api_key="...")`)
97-
is the simplest way to get started, and is fine for interactive use, but is
98-
problematic for code that may be shared with others.
99-
100-
Instead, consider using environment variables or a configuration file to manage
101-
your credentials. One popular way to manage credentials is to use a `.env` file
102-
to store your credentials, and then use the `python-dotenv` package to load them
103-
into your environment.
104-
105-
```shell
106-
pip install python-dotenv
107-
```
108-
109-
```shell
110-
# .env
111-
VLLM_API_KEY=...
112-
```
113-
114-
```python
115-
from chatlas import ChatVllm
116-
from dotenv import load_dotenv
117-
118-
load_dotenv()
119-
chat = ChatVllm(base_url="http://localhost:8000/v1")
120-
chat.console()
121-
```
122-
123-
Another, more general, solution is to load your environment variables into the shell
124-
before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
125-
126-
```shell
127-
export VLLM_API_KEY=...
128-
```
70+
Additional arguments to pass to the LLM client.
71+
72+
Returns:
73+
Chat instance configured for vLLM
12974
"""
130-
if api_key is None:
131-
api_key = os.getenv("VLLM_API_KEY")
13275

133-
if isinstance(seed, MISSING_TYPE):
134-
seed = 1014 if is_testing() else None
76+
if api_key is None:
77+
api_key = get_vllm_key()
13578

13679
if model is None:
137-
raise ValueError(
138-
"Must specify model. vLLM servers can host different models, so you need to "
139-
"specify which one to use. Check your vLLM server's /v1/models endpoint "
140-
"to see available models."
141-
)
80+
models = get_vllm_models(base_url, api_key)
81+
available_models = ", ".join(models)
82+
raise ValueError(f"Must specify model. Available models: {available_models}")
14283

14384
return Chat(
144-
provider=VllmProvider(
145-
api_key=api_key,
146-
model=model,
85+
provider=VLLMProvider(
14786
base_url=base_url,
87+
model=model,
14888
seed=seed,
149-
name="vLLM",
89+
api_key=api_key,
15090
kwargs=kwargs,
15191
),
15292
system_prompt=system_prompt,
15393
)
15494

15595

156-
class VllmProvider(OpenAIProvider):
157-
"""
158-
Provider for vLLM endpoints.
96+
class VLLMProvider(OpenAIProvider):
97+
# Just like OpenAI but no strict
98+
@staticmethod
99+
def _tool_schema_json(
100+
schema: "ChatCompletionToolParam",
101+
) -> "ChatCompletionToolParam":
102+
schema["function"]["strict"] = False
103+
return schema
159104

160-
vLLM is OpenAI-compatible but may have some differences in tool handling
161-
and other advanced features.
162-
"""
163105

164-
def _chat_perform_args(self, *args, **kwargs):
165-
"""
166-
Customize request arguments for vLLM compatibility.
106+
def get_vllm_key() -> str:
107+
key = os.getenv("VLLM_API_KEY", os.getenv("VLLM_KEY"))
108+
if not key:
109+
raise ValueError("VLLM_API_KEY environment variable not set")
110+
return key
111+
112+
113+
def get_vllm_models(base_url: str, api_key: Optional[str] = None) -> list[str]:
114+
if api_key is None:
115+
api_key = get_vllm_key()
116+
117+
headers = {"Authorization": f"Bearer {api_key}"}
118+
response = requests.get(f"{base_url}/v1/models", headers=headers)
119+
response.raise_for_status()
120+
data = response.json()
167121

168-
vLLM may not support all OpenAI features like stream_options,
169-
so we remove potentially unsupported parameters.
170-
"""
171-
# Get the base arguments from OpenAI provider
172-
result = super()._chat_perform_args(*args, **kwargs)
122+
return [model["id"] for model in data["data"]]
173123

174-
# Remove stream_options if present (some vLLM versions don't support it)
175-
if "stream_options" in result:
176-
del result["stream_options"]
177124

178-
return result
125+
# def chat_vllm_test(**kwargs) -> Chat:
126+
# """Create a test chat instance with default parameters."""
127+
# return ChatVllm(base_url="https://llm.nrp-nautilus.io/", model="llama3", **kwargs)

0 commit comments

Comments
 (0)