Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI/Build] Simplify OpenAI server setup in tests #5100

Merged
merged 11 commits into from
Jun 13, 2024
31 changes: 15 additions & 16 deletions tests/async_engine/test_openapi_server_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,22 @@
# and debugging.
import ray

from ..utils import ServerRunner
from ..utils import VLLM_PATH, RemoteOpenAIServer

# any model with a chat template should work here
MODEL_NAME = "facebook/opt-125m"


@pytest.fixture(scope="module")
def server():
ray.init()
server_runner = ServerRunner.remote([
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def server(ray_ctx):
return RemoteOpenAIServer([
"--model",
MODEL_NAME,
# use half precision for speed and memory savings in CI environment
Expand All @@ -24,22 +30,15 @@ def server():
"--enforce-eager",
"--engine-use-ray"
])
ray.get(server_runner.ready.remote())
yield server_runner
ray.shutdown()


@pytest.fixture(scope="module")
def client():
client = openai.AsyncOpenAI(
base_url="http://localhost:8000/v1",
api_key="token-abc123",
)
yield client
def client(server):
return server.get_async_client()


@pytest.mark.asyncio
async def test_check_models(server, client: openai.AsyncOpenAI):
async def test_check_models(client: openai.AsyncOpenAI):
models = await client.models.list()
models = models.data
served_model = models[0]
Expand All @@ -48,7 +47,7 @@ async def test_check_models(server, client: openai.AsyncOpenAI):


@pytest.mark.asyncio
async def test_single_completion(server, client: openai.AsyncOpenAI):
async def test_single_completion(client: openai.AsyncOpenAI):
completion = await client.completions.create(model=MODEL_NAME,
prompt="Hello, my name is",
max_tokens=5,
Expand All @@ -72,7 +71,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI):


@pytest.mark.asyncio
async def test_single_chat_session(server, client: openai.AsyncOpenAI):
async def test_single_chat_session(client: openai.AsyncOpenAI):
messages = [{
"role": "system",
"content": "you are a helpful assistant"
Expand Down
117 changes: 117 additions & 0 deletions tests/entrypoints/test_openai_embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import openai
import pytest
import ray

from ..utils import VLLM_PATH, RemoteOpenAIServer

EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"

pytestmark = pytest.mark.openai


@pytest.fixture(scope="module")
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def embedding_server(ray_ctx):
return RemoteOpenAIServer([
"--model",
EMBEDDING_MODEL_NAME,
# use half precision for speed and memory savings in CI environment
"--dtype",
"bfloat16",
"--enforce-eager",
"--gpu-memory-utilization",
"0.75",
"--max-model-len",
"8192",
"--gpu-memory-utilization",
"0.45",
"--enforce-eager",
])


@pytest.mark.asyncio
@pytest.fixture(scope="module")
def embedding_client(embedding_server):
return embedding_server.get_async_client()


@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name",
[EMBEDDING_MODEL_NAME],
)
async def test_single_embedding(embedding_client: openai.AsyncOpenAI,
model_name: str):
input_texts = [
"The chef prepared a delicious meal.",
]

# test single embedding
embeddings = await embedding_client.embeddings.create(
model=model_name,
input=input_texts,
encoding_format="float",
)
assert embeddings.id is not None
assert len(embeddings.data) == 1
assert len(embeddings.data[0].embedding) == 4096
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 9
assert embeddings.usage.total_tokens == 9

# test using token IDs
input_tokens = [1, 1, 1, 1, 1]
embeddings = await embedding_client.embeddings.create(
model=model_name,
input=input_tokens,
encoding_format="float",
)
assert embeddings.id is not None
assert len(embeddings.data) == 1
assert len(embeddings.data[0].embedding) == 4096
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 5
assert embeddings.usage.total_tokens == 5


@pytest.mark.asyncio
@pytest.mark.parametrize(
"model_name",
[EMBEDDING_MODEL_NAME],
)
async def test_batch_embedding(embedding_client: openai.AsyncOpenAI,
model_name: str):
# test List[str]
input_texts = [
"The cat sat on the mat.", "A feline was resting on a rug.",
"Stars twinkle brightly in the night sky."
]
embeddings = await embedding_client.embeddings.create(
model=model_name,
input=input_texts,
encoding_format="float",
)
assert embeddings.id is not None
assert len(embeddings.data) == 3
assert len(embeddings.data[0].embedding) == 4096
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to test all 3 embeddings len? Not much to add and we have all convered.

Copy link
Member Author

@DarkLight1337 DarkLight1337 Jun 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only moved the tests in this PR. We can address other existing issues regarding those tests in a later PR.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, Can we also test the embeddings vectors? or is it not consistent?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will have to revamp the embeddings tests later anyway (currently it is using some unrecognized attributes, at least according to my IDE). Let's finish moving the tests first before updating them.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point.


# test List[List[int]]
input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
[25, 32, 64, 77]]
embeddings = await embedding_client.embeddings.create(
model=model_name,
input=input_tokens,
encoding_format="float",
)
assert embeddings.id is not None
assert len(embeddings.data) == 4
assert len(embeddings.data[0].embedding) == 4096
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 17
assert embeddings.usage.total_tokens == 17
Loading
Loading