@@ -39,12 +39,13 @@ print(api_client.agents.list())
3939completion = inference_client.chat.completions.create(
4040 messages = [
4141 {
42- " content " : " string " ,
43- " role " : " system " ,
42+ " role " : " user " ,
43+ " content " : " What is the capital of France? " ,
4444 }
4545 ],
46- model = " llama3-8b -instruct" ,
46+ model = " llama3.3-70b -instruct" ,
4747)
48+
4849print (completion.choices[0 ].message)
4950
5051```
@@ -72,13 +73,13 @@ async def main() -> None:
7273 completion = await client.agents.chat.completions.create(
7374 messages = [
7475 {
75- " content " : " string " ,
76- " role " : " system " ,
76+ " role " : " user " ,
77+ " content " : " What is the capital of France? " ,
7778 }
7879 ],
79- model = " llama3-8b -instruct" ,
80+ model = " llama3.3-70b -instruct" ,
8081 )
81- print (completion.id )
82+ print (completion.choices )
8283
8384
8485asyncio.run(main())
@@ -114,41 +115,61 @@ async def main() -> None:
114115 completion = await client.agents.chat.completions.create(
115116 messages = [
116117 {
117- " content " : " string " ,
118- " role " : " system " ,
118+ " role " : " user " ,
119+ " content " : " What is the capital of France? " ,
119120 }
120121 ],
121- model = " llama3-8b -instruct" ,
122+ model = " llama3.3-70b -instruct" ,
122123 )
123- print (completion.id )
124+ print (completion.choices )
124125
125126
126127asyncio.run(main())
127128```
128129
129- ## Streaming
130- Support for streaming responses are available by Server Side Events (SSE) for Serverless Inference and Agents.
131- ```
132- import os
130+ ## Streaming responses
131+
132+ We provide support for streaming responses using Server Side Events (SSE).
133+
134+ ``` python
133135from gradientai import GradientAI
134136
135- client = GradientAI(
136- inference_key=os.environ.get("GRADIENTAI_INFERENCE_KEY")
137- )
137+ client = GradientAI()
138138
139- response = client.chat.completions.create(
139+ stream = client.agents.chat.completions.create(
140+ messages = [
141+ {
142+ " role" : " user" ,
143+ " content" : " What is the capital of France?" ,
144+ }
145+ ],
140146 model = " llama3.3-70b-instruct" ,
141- messages=[{ "role": "user", "content": "Write a story about a brave squirrel."}],
142147 stream = True ,
143148)
149+ for completion in stream:
150+ print (completion.choices)
151+ ```
144152
145- for chunk in response:
146- if len(chunk.choices) > 0:
147- if chunk.choices[0].delta.content:
148- print(chunk.choices[0].delta.content, end="", flush=True)
153+ The async client uses the exact same interface.
149154
150- ```
155+ ``` python
156+ from gradientai import AsyncGradientAI
157+
158+ client = AsyncGradientAI()
151159
160+ stream = await client.agents.chat.completions.create(
161+ messages = [
162+ {
163+ " role" : " user" ,
164+ " content" : " What is the capital of France?" ,
165+ }
166+ ],
167+ model = " llama3.3-70b-instruct" ,
168+ stream = True ,
169+ )
170+ async for completion in stream:
171+ print (completion.choices)
172+ ```
152173
153174## Using types
154175
@@ -197,8 +218,14 @@ from gradientai import GradientAI
197218client = GradientAI()
198219
199220try :
200- client.agents.versions.list(
201- uuid = " REPLACE_ME" ,
221+ client.agents.chat.completions.create(
222+ messages = [
223+ {
224+ " role" : " user" ,
225+ " content" : " What is the capital of France?" ,
226+ }
227+ ],
228+ model = " llama3.3-70b-instruct" ,
202229 )
203230except gradientai.APIConnectionError as e:
204231 print (" The server could not be reached" )
@@ -242,8 +269,14 @@ client = GradientAI(
242269)
243270
244271# Or, configure per-request:
245- client.with_options(max_retries = 5 ).agents.versions.list(
246- uuid = " REPLACE_ME" ,
272+ client.with_options(max_retries = 5 ).agents.chat.completions.create(
273+ messages = [
274+ {
275+ " role" : " user" ,
276+ " content" : " What is the capital of France?" ,
277+ }
278+ ],
279+ model = " llama3.3-70b-instruct" ,
247280)
248281```
249282
@@ -267,8 +300,14 @@ client = GradientAI(
267300)
268301
269302# Override per-request:
270- client.with_options(timeout = 5.0 ).agents.versions.list(
271- uuid = " REPLACE_ME" ,
303+ client.with_options(timeout = 5.0 ).agents.chat.completions.create(
304+ messages = [
305+ {
306+ " role" : " user" ,
307+ " content" : " What is the capital of France?" ,
308+ }
309+ ],
310+ model = " llama3.3-70b-instruct" ,
272311)
273312```
274313
@@ -310,13 +349,17 @@ The "raw" Response object can be accessed by prefixing `.with_raw_response.` to
310349from gradientai import GradientAI
311350
312351client = GradientAI()
313- response = client.agents.versions.with_raw_response.list(
314- uuid = " REPLACE_ME" ,
352+ response = client.agents.chat.completions.with_raw_response.create(
353+ messages = [{
354+ " role" : " user" ,
355+ " content" : " What is the capital of France?" ,
356+ }],
357+ model = " llama3.3-70b-instruct" ,
315358)
316359print (response.headers.get(' X-My-Header' ))
317360
318- version = response.parse() # get the object that `agents.versions.list ()` would have returned
319- print (version.agent_versions )
361+ completion = response.parse() # get the object that `agents.chat.completions.create ()` would have returned
362+ print (completion.choices )
320363```
321364
322365These methods return an [ ` APIResponse ` ] ( https://github.com/digitalocean/gradientai-python/tree/main/src/gradientai/_response.py ) object.
@@ -330,8 +373,14 @@ The above interface eagerly reads the full response body when you make the reque
330373To stream the response body, use ` .with_streaming_response ` instead, which requires a context manager and only reads the response body once you call ` .read() ` , ` .text() ` , ` .json() ` , ` .iter_bytes() ` , ` .iter_text() ` , ` .iter_lines() ` or ` .parse() ` . In the async client, these are async methods.
331374
332375``` python
333- with client.agents.versions.with_streaming_response.list(
334- uuid = " REPLACE_ME" ,
376+ with client.agents.chat.completions.with_streaming_response.create(
377+ messages = [
378+ {
379+ " role" : " user" ,
380+ " content" : " What is the capital of France?" ,
381+ }
382+ ],
383+ model = " llama3.3-70b-instruct" ,
335384) as response:
336385 print (response.headers.get(" X-My-Header" ))
337386
0 commit comments