Skip to content

Commit

Permalink
Noam/streaming (#26)
Browse files Browse the repository at this point in the history
* refactor:extract code to multiple files

* monitor stream for chat completion

* rename file

* async chat completion

* fix stream async + add spans for streams

* remove unused code

* Added stream token calculation

* fix typo + remove prints and comments

* added "stream" attribute to completion events

* handle errors before the stream starts
  • Loading branch information
nhoffmann-newrelic authored Oct 11, 2023
1 parent 4c95c99 commit 000e0b8
Show file tree
Hide file tree
Showing 7 changed files with 810 additions and 563 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ keywords = ["newrelic", "observability", "openai", "gpt", "chatGPT", "GPT-4", "m
python = ">=3.8.1,<4.0.0"
newrelic-telemetry-sdk = "^0.4.0"
openai = ">=0.8,<0.30"
tiktoken = "^0.5.1"

[tool.poetry.dev-dependencies]
pytest = "^7.2.1"
Expand Down
100 changes: 99 additions & 1 deletion src/nr_openai_observability/build_events.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import logging
import uuid
from ast import Dict
from datetime import datetime
from typing import Any, Tuple

import openai
import tiktoken
from newrelic_telemetry_sdk import Span

logger = logging.getLogger("nr_openai_observability")


def _build_messages_events(messages, completion_id, model):
events = []
Expand Down Expand Up @@ -43,6 +47,98 @@ def _get_numeric_header(name):
}


def calc_completion_tokens(model, message_content):
try:
encoding = tiktoken.encoding_for_model(model)
# could not find encoding for model
except KeyError:
return None

return len(encoding.encode(message_content))


def calc_prompt_tokens(model, messages):
"""
calculate prompt tokens based on this document
https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
"""
try:
encoding = tiktoken.encoding_for_model(model)
# could not find encoding for model
except KeyError:
return None

num_of_tokens_per_msg = 3
num_of_tokens_per_name = 1

if model == "gpt-3.5-turbo-0301":
num_of_tokens_per_msg = 4
num_of_tokens_per_name = -1

if "gpt-4" not in model and "gpt-3" not in model:
logger.warn(f"model:{model} is unsupported for streaming token calculation")
return None

num_of_tokens = 3 # this is based on the link in the docstring, every reply contains base 3 tokens that are added tp the prompt

for message in messages:
num_of_tokens += num_of_tokens_per_msg
for key, value in message.items():
num_of_tokens += len(encoding.encode(value))
if key == "name":
num_of_tokens += num_of_tokens_per_name

return num_of_tokens


def build_stream_completion_events(
last_chunk, request, response_headers, message, response_time
):
completion_id = str(uuid.uuid4())
request_messages = request.get("messages", [])

prompt_tokens = calc_prompt_tokens(last_chunk.model, request_messages)
completion_tokens = calc_completion_tokens(
last_chunk.model, message.get("content", "")
)
total_tokens = (
completion_tokens + prompt_tokens
if completion_tokens and prompt_tokens
else None
)

completion = {
"id": completion_id,
"api_key_last_four_digits": f"sk-{last_chunk.api_key[-4:]}",
"response_time": int(response_time * 1000),
"request.model": request.get("model") or request.get("engine"),
"response.model": last_chunk.model,
"usage.completion_tokens": completion_tokens,
"usage.total_tokens": total_tokens,
"usage.prompt_tokens": prompt_tokens,
"temperature": request.get("temperature"),
"max_tokens": request.get("max_tokens"),
"finish_reason": last_chunk.choices[0].finish_reason,
"api_type": last_chunk.api_type,
"vendor": "openAI",
"ingest_source": "PythonSDK",
"number_of_messages": len(request.get("messages", [])) + 1,
"organization": last_chunk.organization,
"api_version": response_headers.get("openai-version"),
"stream": True,
}

completion.update(_get_rate_limit_data(response_headers))

messages = _build_messages_events(
request_messages + [message],
completion_id,
last_chunk.model,
)

return {"messages": messages, "completion": completion}


def build_completion_events(response, request, response_headers, response_time):
completion_id = str(uuid.uuid4())

Expand All @@ -64,6 +160,7 @@ def build_completion_events(response, request, response_headers, response_time):
"number_of_messages": len(request.get("messages", [])) + len(response.choices),
"organization": response.organization,
"api_version": response_headers.get("openai-version"),
"stream": False,
}

completion.update(_get_rate_limit_data(response_headers))
Expand All @@ -77,7 +174,7 @@ def build_completion_events(response, request, response_headers, response_time):
return {"messages": messages, "completion": completion}


def build_completion_error_events(request, error):
def build_completion_error_events(request, error, isStream=False):
completion_id = str(uuid.uuid4())

completion = {
Expand All @@ -95,6 +192,7 @@ def build_completion_error_events(request, error):
"error_type": error.error.type,
"error_code": error.error.code,
"error_param": error.error.param,
"stream": isStream,
}

messages = _build_messages_events(
Expand Down
6 changes: 6 additions & 0 deletions src/nr_openai_observability/consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
EventName = "LlmCompletion"
MessageEventName = "LlmChatCompletionMessage"
SummaryEventName = "LlmChatCompletionSummary"
EmbeddingEventName = "LlmEmbedding"
VectorSearchEventName = "LlmVectorSearch"
VectorSearchResultsEventName = "LlmVectorSearchResult"
Loading

0 comments on commit 000e0b8

Please sign in to comment.