Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions evals/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
},
"codegen": {"llm": "/generate_stream", "llmserve": "/v1/chat/completions", "e2e": "/v1/codegen"},
"codetrans": {"llm": "/generate", "llmserve": "/v1/chat/completions", "e2e": "/v1/codetrans"},
"docsum": {
"docsum": "/v1/docsum",
"docsum-vllm": "/generate",
"docsum-llm-uservice": "v1/chat/docsum",
"e2e": "/v1/docsum",
},
"faqgen": {"llm": "/v1/chat/completions", "llmserve": "/v1/chat/completions", "e2e": "/v1/faqgen"},
"audioqna": {
"asr": "/v1/audio/transcriptions",
Expand Down Expand Up @@ -102,7 +108,10 @@ def create_run_yaml_content(service, base_url, bench_target, test_phase, num_que
"dataset": service.get("dataset", "default"),
"prompts": service.get("prompts", None),
"max-output": service.get("max_output", 128),
"max-new-tokens": service.get("max_new_tokens", 128),
"seed": test_params.get("seed", None),
"stream": service.get("stream", True),
"summary_type": service.get("summary_type", "stuff"),
"llm-model": test_params["llm_model"],
"deployment-type": test_params["deployment_type"],
"load-shape": test_params["load_shape"],
Expand Down Expand Up @@ -341,6 +350,7 @@ def run_benchmark(report=False, yaml=yaml):
],
"codegen": ["llm", "llmserve", "e2e"],
"codetrans": ["llm", "llmserve", "e2e"],
"docsum": ["e2e"],
"faqgen": ["llm", "llmserve", "e2e"],
"audioqna": ["asr", "llm", "llmserve", "tts", "e2e"],
"visualqna": ["lvm", "lvmserve", "e2e"],
Expand Down
15 changes: 14 additions & 1 deletion evals/benchmark/benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-License-Identifier: Apache-2.0

test_suite_config: # Overall configuration settings for the test suite
examples: ["chatqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
examples: ["chatqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, docsum, faqgen, audioqna, visualqna
deployment_type: "k8s" # Default is "k8s", can also be "docker"
service_ip: None # Leave as None for k8s, specify for Docker
service_port: None # Leave as None for k8s, specify for Docker
Expand Down Expand Up @@ -121,6 +121,19 @@ test_cases:
run_test: true
service_name: "codetrans-backend-server-svc" # Replace with your service name

docsum:
e2e:
run_test: true
service_name: "docsum" # Replace with your service name
stream: true # Stream output, false if summary type is map_reduce or refine
max_new_tokens: 1024
summary_type: "stuff" # Summary_types: stuff, truncate, map_reduce, refine
dataset: # Path to document to be summarized when random prompt is true
service_list:
- "docsum"
- "docsum-llm-uservice"
- "docsum-vllm"

faqgen:
llm:
run_test: false
Expand Down
9 changes: 9 additions & 0 deletions evals/benchmark/stresscli/commands/load_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
runspec["seed"] = run_settings.get("seed", global_settings.get("seed", locust_defaults["seed"]))
runspec["seed"] = locust_defaults["seed"] if runspec["seed"] is None else runspec["seed"]
runspec["run_name"] = run_settings["name"]
runspec["summary_type"] = global_settings.get("summary_type", None)
runspec["stream"] = global_settings.get("stream", None)
runspec["max-new-tokens"] = global_settings.get("max-new-tokens", locust_defaults["max-output"])

# Specify load shape to adjust user distribution
load_shape_conf = run_settings.get("load-shape", global_settings.get("load-shape", locust_defaults["load-shape"]))
Expand Down Expand Up @@ -249,6 +252,12 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
str(runspec["llm-model"]),
"--stop-timeout",
str(runspec["stop_timeout"]),
"--summary_type",
str(runspec["summary_type"]),
"--stream",
str(runspec["stream"]),
"--max-new-tokens",
str(runspec["max-new-tokens"]),
"--csv",
csv_output,
"--headless",
Expand Down
27 changes: 27 additions & 0 deletions evals/benchmark/stresscli/locust/aistress.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,27 @@ def _(parser):
parser.add_argument(
"--max-output", type=int, env_var="OPEA_EVAL_MAX_OUTPUT_TOKENS", default=128, help="Max number of output tokens"
)
parser.add_argument(
"--summary_type",
type=str,
env_var="OPEA_EVAL_SUMMARY_TYPE",
default="stuff",
help="Summary type for Docsum example",
)
parser.add_argument(
"--stream",
type=str,
env_var="OPEA_EVAL_STREAM",
default="true",
help="Specify whether the HTTP request response from the service should be streamed",
)
parser.add_argument(
"--max-new-tokens",
type=int,
env_var="OPEA_EVAL_MAX_NEW_TOKENS",
default=256,
help="Specify the maximum number of new tokens to generate for OPEA services",
)


reqlist = []
Expand Down Expand Up @@ -122,12 +143,15 @@ def bench_main(self):
"chatqnabench",
"codegenfixed",
"codegenbench",
"docsumbench",
"faqgenfixed",
"faqgenbench",
"chatqna_qlist_pubmed",
]
if self.environment.parsed_options.bench_target in ["faqgenfixed", "faqgenbench"]:
req_params = {"data": reqData}
elif self.environment.parsed_options.bench_target in ["docsumbench", "docsumfixed"]:
req_params = {"files": reqData}
else:
req_params = {"json": reqData}
test_start_time = time.time()
Expand Down Expand Up @@ -254,6 +278,9 @@ def on_locust_init(environment, **_kwargs):
os.environ["OPEA_EVAL_PROMPTS"] = environment.parsed_options.prompts
os.environ["OPEA_EVAL_MAX_OUTPUT_TOKENS"] = str(environment.parsed_options.max_output)
os.environ["LLM_MODEL"] = environment.parsed_options.llm_model
os.environ["OPEA_EVAL_SUMMARY_TYPE"] = environment.parsed_options.summary_type
os.environ["OPEA_EVAL_STREAM"] = environment.parsed_options.stream
os.environ["OPEA_EVAL_MAX_NEW_TOKENS"] = str(environment.parsed_options.max_new_tokens)

bench_package = __import__(environment.parsed_options.bench_target)

Expand Down
40 changes: 40 additions & 0 deletions evals/benchmark/stresscli/locust/docsumbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import os

import tokenresponse as token

cwd = os.path.dirname(__file__)
filepath = os.environ["OPEA_EVAL_DATASET"]
filename = os.path.basename(filepath)
max_tokens = os.environ["OPEA_EVAL_MAX_NEW_TOKENS"]
summary_type = os.environ["OPEA_EVAL_SUMMARY_TYPE"]
stream = os.environ["OPEA_EVAL_STREAM"]


def getUrl():
return "/v1/docsum"


def getReqData():

files = {
"type": (None, "text"),
"messages": (None, ""),
"files": (filename, open(filepath, "rb"), "text/plain"),
"max_tokens": (None, max_tokens),
"language": (None, "en"),
"summary_type": (None, summary_type),
"stream": (None, stream),
}

return files


def respStatics(environment, reqData, respData):
return token.respStatics(environment, reqData, respData)


def staticsOutput(environment, reqlist):
token.staticsOutput(environment, reqlist)
38 changes: 38 additions & 0 deletions evals/benchmark/stresscli/locust/docsumfixed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import os

import tokenresponse as token

cwd = os.path.dirname(__file__)
filepath = f"{cwd}/../../data/upload_file.txt"
filename = os.path.basename(filepath)
max_tokens = os.environ["OPEA_EVAL_MAX_NEW_TOKENS"]


def getUrl():
return "/v1/docsum"


def getReqData():

files = {
"type": (None, "text"),
"messages": (None, ""),
"files": (filename, open(filepath, "rb"), "text/plain"),
"max_tokens": (None, max_tokens),
"language": (None, "en"),
"summary_type": (None, "stuff"),
"stream": (None, "true"),
}

return files


def respStatics(environment, reqData, respData):
return token.respStatics(environment, reqData, respData)


def staticsOutput(environment, reqlist):
token.staticsOutput(environment, reqlist)