1
- from __future__ import annotations
2
-
3
1
import os
4
2
from typing import TYPE_CHECKING , Optional
5
3
4
+ import requests
5
+
6
6
from ._chat import Chat
7
7
from ._provider_openai import OpenAIProvider
8
- from ._utils import MISSING , MISSING_TYPE , is_testing
9
8
10
9
if TYPE_CHECKING :
11
- from ._provider_openai import ChatCompletion
12
- from .types .openai import ChatClientArgs , SubmitInputArgs
10
+ from openai .types .chat import ChatCompletionToolParam
11
+
12
+ from .types .openai import ChatClientArgs
13
13
14
14
15
15
def ChatVllm (
@@ -18,11 +18,11 @@ def ChatVllm(
18
18
system_prompt : Optional [str ] = None ,
19
19
model : Optional [str ] = None ,
20
20
api_key : Optional [str ] = None ,
21
- seed : Optional [int ] | MISSING_TYPE = MISSING ,
21
+ seed : Optional [int ] = None ,
22
22
kwargs : Optional ["ChatClientArgs" ] = None ,
23
- ) -> Chat [ "SubmitInputArgs" , ChatCompletion ] :
23
+ ) -> Chat :
24
24
"""
25
- Chat with a model hosted by vLLM.
25
+ Chat with a model hosted by vLLM
26
26
27
27
[vLLM](https://docs.vllm.ai/en/latest/) is an open source library that
28
28
provides an efficient and convenient LLMs model server. You can use
@@ -32,147 +32,96 @@ def ChatVllm(
32
32
-------------
33
33
34
34
::: {.callout-note}
35
- ## vLLM Server
35
+ ## vLLM runtime
36
36
37
- You need access to a running vLLM server instance. vLLM provides
38
- OpenAI-compatible API endpoints, so this function works with any
39
- vLLM deployment that exposes the `/v1/chat/completions` endpoint .
37
+ `ChatVllm` requires a vLLM server to be running somewhere (either on your
38
+ machine or a remote server). If you want to run a vLLM server locally, see
39
+ the [vLLM documentation](https://docs.vllm.ai/en/v0.5.3/getting_started/quickstart.html) .
40
40
:::
41
41
42
- Examples
43
- --------
42
+ ::: {.callout-note}
43
+ ## Python requirements
44
44
45
- ```python
46
- import os
47
- from chatlas import ChatVllm
45
+ `ChatVllm` requires the `openai` package (e.g., `pip install openai`).
46
+ :::
48
47
49
- # Connect to a vLLM server
50
- chat = ChatVllm(
51
- base_url="http://localhost:8000/v1",
52
- model="meta-llama/Llama-2-7b-chat-hf",
53
- api_key=os.getenv("VLLM_API_KEY"), # Optional, depends on server config
54
- )
55
- chat.chat("What is the capital of France?")
56
- ```
57
48
58
49
Parameters
59
50
----------
60
51
base_url
61
- The base URL of the vLLM server endpoint. This should include the
62
- `/v1` path if the server follows OpenAI API conventions.
63
- system_prompt
64
52
A system prompt to set the behavior of the assistant.
53
+ system_prompt
54
+ Optional system prompt to prepend to conversation.
55
+ turns
56
+ A list of turns to start the chat with (i.e., continuing a previous
57
+ conversation). If not provided, the conversation begins from scratch. Do
58
+ not provide non-`None` values for both `turns` and `system_prompt`. Each
59
+ message in the list should be a dictionary with at least `role` (usually
60
+ `system`, `user`, or `assistant`, but `tool` is also possible). Normally
61
+ there is also a `content` field, which is a string.
65
62
model
66
- The model to use for the chat. If None, you may need to specify
67
- the model name that's loaded on your vLLM server.
68
- api_key
69
- The API key to use for authentication. Some vLLM deployments may
70
- not require authentication. You can set the `VLLM_API_KEY`
71
- environment variable instead of passing it directly.
63
+ Model identifier to use.
72
64
seed
73
- Optional integer seed that vLLM uses to try and make output more
74
- reproducible.
65
+ Random seed for reproducibility.
66
+ api_key
67
+ API key for authentication. If not provided, the `VLLM_API_KEY` environment
68
+ variable will be used.
75
69
kwargs
76
- Additional arguments to pass to the `openai.OpenAI()` client constructor.
77
-
78
- Returns
79
- -------
80
- Chat
81
- A chat object that retains the state of the conversation.
82
-
83
- Note
84
- ----
85
- This function is a lightweight wrapper around [](`~chatlas.ChatOpenAI`) with
86
- the defaults tweaked for vLLM endpoints.
87
-
88
- Note
89
- ----
90
- vLLM servers are OpenAI-compatible, so this provider uses the same underlying
91
- client as OpenAI but configured for your vLLM endpoint. Some advanced OpenAI
92
- features may not be available depending on your vLLM server configuration.
93
-
94
- Note
95
- ----
96
- Pasting an API key into a chat constructor (e.g., `ChatVllm(api_key="...")`)
97
- is the simplest way to get started, and is fine for interactive use, but is
98
- problematic for code that may be shared with others.
99
-
100
- Instead, consider using environment variables or a configuration file to manage
101
- your credentials. One popular way to manage credentials is to use a `.env` file
102
- to store your credentials, and then use the `python-dotenv` package to load them
103
- into your environment.
104
-
105
- ```shell
106
- pip install python-dotenv
107
- ```
108
-
109
- ```shell
110
- # .env
111
- VLLM_API_KEY=...
112
- ```
113
-
114
- ```python
115
- from chatlas import ChatVllm
116
- from dotenv import load_dotenv
117
-
118
- load_dotenv()
119
- chat = ChatVllm(base_url="http://localhost:8000/v1")
120
- chat.console()
121
- ```
122
-
123
- Another, more general, solution is to load your environment variables into the shell
124
- before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
125
-
126
- ```shell
127
- export VLLM_API_KEY=...
128
- ```
70
+ Additional arguments to pass to the LLM client.
71
+
72
+ Returns:
73
+ Chat instance configured for vLLM
129
74
"""
130
- if api_key is None :
131
- api_key = os .getenv ("VLLM_API_KEY" )
132
75
133
- if isinstance ( seed , MISSING_TYPE ) :
134
- seed = 1014 if is_testing () else None
76
+ if api_key is None :
77
+ api_key = get_vllm_key ()
135
78
136
79
if model is None :
137
- raise ValueError (
138
- "Must specify model. vLLM servers can host different models, so you need to "
139
- "specify which one to use. Check your vLLM server's /v1/models endpoint "
140
- "to see available models."
141
- )
80
+ models = get_vllm_models (base_url , api_key )
81
+ available_models = ", " .join (models )
82
+ raise ValueError (f"Must specify model. Available models: { available_models } " )
142
83
143
84
return Chat (
144
- provider = VllmProvider (
145
- api_key = api_key ,
146
- model = model ,
85
+ provider = VLLMProvider (
147
86
base_url = base_url ,
87
+ model = model ,
148
88
seed = seed ,
149
- name = "vLLM" ,
89
+ api_key = api_key ,
150
90
kwargs = kwargs ,
151
91
),
152
92
system_prompt = system_prompt ,
153
93
)
154
94
155
95
156
- class VllmProvider (OpenAIProvider ):
157
- """
158
- Provider for vLLM endpoints.
96
+ class VLLMProvider (OpenAIProvider ):
97
+ # Just like OpenAI but no strict
98
+ @staticmethod
99
+ def _tool_schema_json (
100
+ schema : "ChatCompletionToolParam" ,
101
+ ) -> "ChatCompletionToolParam" :
102
+ schema ["function" ]["strict" ] = False
103
+ return schema
159
104
160
- vLLM is OpenAI-compatible but may have some differences in tool handling
161
- and other advanced features.
162
- """
163
105
164
- def _chat_perform_args (self , * args , ** kwargs ):
165
- """
166
- Customize request arguments for vLLM compatibility.
106
+ def get_vllm_key () -> str :
107
+ key = os .getenv ("VLLM_API_KEY" , os .getenv ("VLLM_KEY" ))
108
+ if not key :
109
+ raise ValueError ("VLLM_API_KEY environment variable not set" )
110
+ return key
111
+
112
+
113
+ def get_vllm_models (base_url : str , api_key : Optional [str ] = None ) -> list [str ]:
114
+ if api_key is None :
115
+ api_key = get_vllm_key ()
116
+
117
+ headers = {"Authorization" : f"Bearer { api_key } " }
118
+ response = requests .get (f"{ base_url } /v1/models" , headers = headers )
119
+ response .raise_for_status ()
120
+ data = response .json ()
167
121
168
- vLLM may not support all OpenAI features like stream_options,
169
- so we remove potentially unsupported parameters.
170
- """
171
- # Get the base arguments from OpenAI provider
172
- result = super ()._chat_perform_args (* args , ** kwargs )
122
+ return [model ["id" ] for model in data ["data" ]]
173
123
174
- # Remove stream_options if present (some vLLM versions don't support it)
175
- if "stream_options" in result :
176
- del result ["stream_options" ]
177
124
178
- return result
125
+ # def chat_vllm_test(**kwargs) -> Chat:
126
+ # """Create a test chat instance with default parameters."""
127
+ # return ChatVllm(base_url="https://llm.nrp-nautilus.io/", model="llama3", **kwargs)
0 commit comments