Skip to content

Commit eca9dc9

Browse files
authored
Merge pull request #22 from community-of-python/feature/add-reasoning-content
Add reasoning content
2 parents 2174cd0 + 6ec1d5a commit eca9dc9

15 files changed

+215
-96
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ dist
44
uv.lock
55
.mypy_cache
66
.ruff_cache
7+
__pycache__

README.md

+15-2
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,20 @@ async with (
9393
...
9494
```
9595

96+
### Reasoning models
97+
98+
Today you can access openapi-like reasoning models and retrieve their reasoning content:
99+
100+
```python
101+
async def main() -> None:
102+
async with any_llm_client.get_client(config) as client:
103+
llm_response = await client.request_llm_message("Кек, чо как вообще на нарах?")
104+
print(f"Just a regular LLM response content: {llm_response.content}")
105+
print(f"LLM reasoning response content: {llm_response.reasoning_content}")
106+
107+
...
108+
```
109+
96110
### Other
97111

98112
#### Mock client
@@ -165,12 +179,12 @@ async with any_llm_client.OpenAIClient(config, ...) as client:
165179
#### Errors
166180

167181
`any_llm_client.LLMClient.request_llm_message()` and `any_llm_client.LLMClient.stream_llm_message_chunks()` will raise:
182+
168183
- `any_llm_client.LLMError` or `any_llm_client.OutOfTokensOrSymbolsError` when the LLM API responds with a failed HTTP status,
169184
- `any_llm_client.LLMRequestValidationError` when images are passed to YandexGPT client.
170185

171186
#### Timeouts, proxy & other HTTP settings
172187

173-
174188
Pass custom [HTTPX](https://www.python-httpx.org) kwargs to `any_llm_client.get_client()`:
175189

176190
```python
@@ -206,7 +220,6 @@ await client.request_llm_message("Кек, чо как вообще на нара
206220

207221
The `extra` parameter is united with `request_extra` in OpenAIConfig
208222

209-
210223
#### Passing images
211224

212225
You can pass images to OpenAI client (YandexGPT doesn't support images yet):

any_llm_client/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
LLMConfig,
1111
LLMError,
1212
LLMRequestValidationError,
13+
LLMResponse,
1314
Message,
1415
MessageRole,
1516
OutOfTokensOrSymbolsError,
@@ -31,6 +32,7 @@
3132
"LLMConfig",
3233
"LLMError",
3334
"LLMRequestValidationError",
35+
"LLMResponse",
3436
"Message",
3537
"MessageRole",
3638
"MockLLMClient",

any_llm_client/clients/mock.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
import pydantic
77
import typing_extensions
88

9-
from any_llm_client.core import LLMClient, LLMConfig, LLMConfigValue, Message
9+
from any_llm_client.core import LLMClient, LLMConfig, LLMConfigValue, LLMResponse, Message
1010

1111

1212
class MockLLMConfig(LLMConfig):
13-
response_message: str = ""
14-
stream_messages: list[str] = pydantic.Field([])
13+
response_message: LLMResponse = LLMResponse(content="")
14+
stream_messages: list[LLMResponse] = pydantic.Field([])
1515
api_type: typing.Literal["mock"] = "mock"
1616

1717

@@ -25,10 +25,10 @@ async def request_llm_message(
2525
*,
2626
temperature: float = LLMConfigValue(attr="temperature"), # noqa: ARG002
2727
extra: dict[str, typing.Any] | None = None, # noqa: ARG002
28-
) -> str:
28+
) -> LLMResponse:
2929
return self.config.response_message
3030

31-
async def _iter_config_stream_messages(self) -> typing.AsyncIterable[str]:
31+
async def _iter_config_stream_messages(self) -> typing.AsyncIterable[LLMResponse]:
3232
for one_message in self.config.stream_messages:
3333
yield one_message
3434

@@ -39,7 +39,7 @@ async def stream_llm_message_chunks(
3939
*,
4040
temperature: float = LLMConfigValue(attr="temperature"), # noqa: ARG002
4141
extra: dict[str, typing.Any] | None = None, # noqa: ARG002
42-
) -> typing.AsyncIterator[typing.AsyncIterable[str]]:
42+
) -> typing.AsyncIterator[typing.AsyncIterable[LLMResponse]]:
4343
yield self._iter_config_stream_messages()
4444

4545
async def __aenter__(self) -> typing_extensions.Self:

any_llm_client/clients/openai.py

+36-11
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
LLMConfig,
1717
LLMConfigValue,
1818
LLMError,
19+
LLMResponse,
1920
Message,
2021
MessageRole,
2122
OutOfTokensOrSymbolsError,
@@ -76,6 +77,7 @@ class ChatCompletionsRequest(pydantic.BaseModel):
7677
class OneStreamingChoiceDelta(pydantic.BaseModel):
7778
role: typing.Literal[MessageRole.assistant] | None = None
7879
content: str | None = None
80+
reasoning_content: str | None = None
7981

8082

8183
class OneStreamingChoice(pydantic.BaseModel):
@@ -89,6 +91,7 @@ class ChatCompletionsStreamingEvent(pydantic.BaseModel):
8991
class OneNotStreamingChoiceMessage(pydantic.BaseModel):
9092
role: MessageRole
9193
content: str
94+
reasoning_content: str | None = None
9295

9396

9497
class OneNotStreamingChoice(pydantic.BaseModel):
@@ -143,14 +146,16 @@ def _make_user_assistant_alternate_messages(
143146
else:
144147
if current_message_content_chunks:
145148
yield ChatCompletionsInputMessage(
146-
role=current_message_role, content=_merge_content_chunks(current_message_content_chunks)
149+
role=current_message_role,
150+
content=_merge_content_chunks(current_message_content_chunks),
147151
)
148152
current_message_content_chunks = [one_message.content]
149153
current_message_role = one_message.role
150154

151155
if current_message_content_chunks:
152156
yield ChatCompletionsInputMessage(
153-
role=current_message_role, content=_merge_content_chunks(current_message_content_chunks)
157+
role=current_message_role,
158+
content=_merge_content_chunks(current_message_content_chunks),
154159
)
155160

156161

@@ -195,7 +200,12 @@ def _prepare_messages(self, messages: str | list[Message]) -> list[ChatCompletio
195200
)
196201

197202
def _prepare_payload(
198-
self, *, messages: str | list[Message], temperature: float, stream: bool, extra: dict[str, typing.Any] | None
203+
self,
204+
*,
205+
messages: str | list[Message],
206+
temperature: float,
207+
stream: bool,
208+
extra: dict[str, typing.Any] | None,
199209
) -> dict[str, typing.Any]:
200210
return ChatCompletionsRequest(
201211
stream=stream,
@@ -211,9 +221,12 @@ async def request_llm_message(
211221
*,
212222
temperature: float = LLMConfigValue(attr="temperature"),
213223
extra: dict[str, typing.Any] | None = None,
214-
) -> str:
224+
) -> LLMResponse:
215225
payload: typing.Final = self._prepare_payload(
216-
messages=messages, temperature=temperature, stream=False, extra=extra
226+
messages=messages,
227+
temperature=temperature,
228+
stream=False,
229+
extra=extra,
217230
)
218231
try:
219232
response: typing.Final = await make_http_request(
@@ -224,18 +237,27 @@ async def request_llm_message(
224237
except httpx.HTTPStatusError as exception:
225238
_handle_status_error(status_code=exception.response.status_code, content=exception.response.content)
226239
try:
227-
return ChatCompletionsNotStreamingResponse.model_validate_json(response.content).choices[0].message.content
240+
validated_message_model: typing.Final = (
241+
ChatCompletionsNotStreamingResponse.model_validate_json(response.content).choices[0].message
242+
)
243+
return LLMResponse(
244+
content=validated_message_model.content,
245+
reasoning_content=validated_message_model.reasoning_content,
246+
)
228247
finally:
229248
await response.aclose()
230249

231-
async def _iter_response_chunks(self, response: httpx.Response) -> typing.AsyncIterable[str]:
250+
async def _iter_response_chunks(self, response: httpx.Response) -> typing.AsyncIterable[LLMResponse]:
232251
async for event in httpx_sse.EventSource(response).aiter_sse():
233252
if event.data == "[DONE]":
234253
break
235254
validated_response = ChatCompletionsStreamingEvent.model_validate_json(event.data)
236-
if not (one_chunk := validated_response.choices[0].delta.content):
255+
if not (
256+
(validated_delta := validated_response.choices[0].delta)
257+
and (validated_delta.content or validated_delta.reasoning_content)
258+
):
237259
continue
238-
yield one_chunk
260+
yield LLMResponse(content=validated_delta.content, reasoning_content=validated_delta.reasoning_content)
239261

240262
@contextlib.asynccontextmanager
241263
async def stream_llm_message_chunks(
@@ -244,9 +266,12 @@ async def stream_llm_message_chunks(
244266
*,
245267
temperature: float = LLMConfigValue(attr="temperature"),
246268
extra: dict[str, typing.Any] | None = None,
247-
) -> typing.AsyncIterator[typing.AsyncIterable[str]]:
269+
) -> typing.AsyncIterator[typing.AsyncIterable[LLMResponse]]:
248270
payload: typing.Final = self._prepare_payload(
249-
messages=messages, temperature=temperature, stream=True, extra=extra
271+
messages=messages,
272+
temperature=temperature,
273+
stream=True,
274+
extra=extra,
250275
)
251276
try:
252277
async with make_streaming_http_request(

any_llm_client/clients/yandexgpt.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
LLMConfigValue,
1818
LLMError,
1919
LLMRequestValidationError,
20+
LLMResponse,
2021
Message,
2122
MessageRole,
2223
OutOfTokensOrSymbolsError,
@@ -35,10 +36,12 @@ class YandexGPTConfig(LLMConfig):
3536
else:
3637
url: pydantic.HttpUrl = "https://llm.api.cloud.yandex.net/foundationModels/v1/completion"
3738
auth_header: str = pydantic.Field( # type: ignore[assignment]
38-
default_factory=lambda: os.environ.get(YANDEXGPT_AUTH_HEADER_ENV_NAME), validate_default=True
39+
default_factory=lambda: os.environ.get(YANDEXGPT_AUTH_HEADER_ENV_NAME),
40+
validate_default=True,
3941
)
4042
folder_id: str = pydantic.Field( # type: ignore[assignment]
41-
default_factory=lambda: os.environ.get(YANDEXGPT_FOLDER_ID_ENV_NAME), validate_default=True
43+
default_factory=lambda: os.environ.get(YANDEXGPT_FOLDER_ID_ENV_NAME),
44+
validate_default=True,
4245
)
4346
model_name: str
4447
model_version: str = "latest"
@@ -126,7 +129,7 @@ def _prepare_payload(
126129
if isinstance(one_message.content, list):
127130
if len(one_message.content) != 1:
128131
raise LLMRequestValidationError(
129-
"YandexGPTClient does not support multiple content items per message"
132+
"YandexGPTClient does not support multiple content items per message",
130133
)
131134
message_content = one_message.content[0]
132135
if isinstance(message_content, ImageContentItem):
@@ -153,9 +156,12 @@ async def request_llm_message(
153156
*,
154157
temperature: float = LLMConfigValue(attr="temperature"),
155158
extra: dict[str, typing.Any] | None = None,
156-
) -> str:
159+
) -> LLMResponse:
157160
payload: typing.Final = self._prepare_payload(
158-
messages=messages, temperature=temperature, stream=False, extra=extra
161+
messages=messages,
162+
temperature=temperature,
163+
stream=False,
164+
extra=extra,
159165
)
160166

161167
try:
@@ -167,14 +173,16 @@ async def request_llm_message(
167173
except httpx.HTTPStatusError as exception:
168174
_handle_status_error(status_code=exception.response.status_code, content=exception.response.content)
169175

170-
return YandexGPTResponse.model_validate_json(response.content).result.alternatives[0].message.text
176+
return LLMResponse(
177+
content=YandexGPTResponse.model_validate_json(response.content).result.alternatives[0].message.text,
178+
)
171179

172-
async def _iter_response_chunks(self, response: httpx.Response) -> typing.AsyncIterable[str]:
180+
async def _iter_response_chunks(self, response: httpx.Response) -> typing.AsyncIterable[LLMResponse]:
173181
previous_cursor = 0
174182
async for one_line in response.aiter_lines():
175183
validated_response = YandexGPTResponse.model_validate_json(one_line)
176184
response_text = validated_response.result.alternatives[0].message.text
177-
yield response_text[previous_cursor:]
185+
yield LLMResponse(content=response_text[previous_cursor:])
178186
previous_cursor = len(response_text)
179187

180188
@contextlib.asynccontextmanager
@@ -184,9 +192,12 @@ async def stream_llm_message_chunks(
184192
*,
185193
temperature: float = LLMConfigValue(attr="temperature"),
186194
extra: dict[str, typing.Any] | None = None,
187-
) -> typing.AsyncIterator[typing.AsyncIterable[str]]:
195+
) -> typing.AsyncIterator[typing.AsyncIterable[LLMResponse]]:
188196
payload: typing.Final = self._prepare_payload(
189-
messages=messages, temperature=temperature, stream=True, extra=extra
197+
messages=messages,
198+
temperature=temperature,
199+
stream=True,
200+
extra=extra,
190201
)
191202

192203
try:

any_llm_client/core.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ class Message:
3939
content: str | ContentItemList
4040

4141

42+
@pydantic.dataclasses.dataclass
43+
class LLMResponse:
44+
content: str | None = None
45+
reasoning_content: str | None = None
46+
47+
4248
if typing.TYPE_CHECKING:
4349

4450
@pydantic.dataclasses.dataclass
@@ -55,6 +61,7 @@ class UserMessage(Message):
5561
class AssistantMessage(Message):
5662
role: typing.Literal[MessageRole.assistant] = pydantic.Field(MessageRole.assistant, init=False)
5763
content: str | ContentItemList
64+
5865
else:
5966

6067
def SystemMessage(content: str | ContentItemList) -> Message: # noqa: N802
@@ -102,7 +109,7 @@ async def request_llm_message(
102109
*,
103110
temperature: float = LLMConfigValue(attr="temperature"),
104111
extra: dict[str, typing.Any] | None = None,
105-
) -> str: ... # raises LLMError, LLMRequestValidationError
112+
) -> LLMResponse: ... # raises LLMError, LLMRequestValidationError
106113

107114
@contextlib.asynccontextmanager
108115
def stream_llm_message_chunks(
@@ -111,7 +118,7 @@ def stream_llm_message_chunks(
111118
*,
112119
temperature: float = LLMConfigValue(attr="temperature"),
113120
extra: dict[str, typing.Any] | None = None,
114-
) -> typing.AsyncIterator[typing.AsyncIterable[str]]: ... # raises LLMError, LLMRequestValidationError
121+
) -> typing.AsyncIterator[typing.AsyncIterable[LLMResponse]]: ... # raises LLMError, LLMRequestValidationError
115122

116123
async def __aenter__(self) -> typing_extensions.Self: ...
117124
async def __aexit__(

examples/openai-image-input.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
content=[
1818
any_llm_client.TextContentItem("What's on the image?"),
1919
any_llm_client.ImageContentItem(f"data:image/jpeg;base64,{base64.b64encode(image_content).decode('utf-8')}"),
20-
]
20+
],
2121
)
2222

2323

@@ -27,7 +27,7 @@ async def main() -> None:
2727
client.stream_llm_message_chunks(messages=[message]) as message_chunks,
2828
):
2929
async for chunk in message_chunks:
30-
print(chunk, end="", flush=True)
30+
print(chunk.content, end="", flush=True)
3131

3232

3333
asyncio.run(main())

examples/openai-reasoning-response.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""Install ollama and pull the model to run this script: `ollama pull qwen2.5-coder:1.5b`."""
2+
3+
import asyncio
4+
import typing
5+
6+
import any_llm_client
7+
8+
9+
config = any_llm_client.OpenAIConfig(url="http://127.0.0.1:11434/v1/chat/completions", model_name="qwen2.5-coder:1.5b")
10+
11+
12+
async def main() -> None:
13+
async with any_llm_client.get_client(config) as client:
14+
llm_response: typing.Final = await client.request_llm_message(
15+
"Кек, чо как вообще на нарах? Порассуждай как философ.",
16+
)
17+
print(llm_response.reasoning_content)
18+
print(llm_response.content)
19+
20+
21+
asyncio.run(main())

examples/openai-stream-advanced.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ async def main() -> None:
2020
) as message_chunks,
2121
):
2222
async for chunk in message_chunks:
23-
print(chunk, end="", flush=True)
23+
print(chunk.content, end="", flush=True)
2424

2525

2626
asyncio.run(main())

0 commit comments

Comments
 (0)