opea-project · lvliang-intel · Apr 4, 2025 · Mar 21, 2025 · Apr 3, 2025 · Apr 3, 2025
@@ -23,6 +23,12 @@
     },
     "codegen": {"llm": "/generate_stream", "llmserve": "/v1/chat/completions", "e2e": "/v1/codegen"},
     "codetrans": {"llm": "/generate", "llmserve": "/v1/chat/completions", "e2e": "/v1/codetrans"},
+    "docsum": {
+        "docsum": "/v1/docsum",
+        "docsum-vllm": "/generate",
+        "docsum-llm-uservice": "v1/chat/docsum",
+        "e2e": "/v1/docsum",
+    },
     "faqgen": {"llm": "/v1/chat/completions", "llmserve": "/v1/chat/completions", "e2e": "/v1/faqgen"},
     "audioqna": {
         "asr": "/v1/audio/transcriptions",
@@ -102,7 +108,10 @@ def create_run_yaml_content(service, base_url, bench_target, test_phase, num_que
                 "dataset": service.get("dataset", "default"),
                 "prompts": service.get("prompts", None),
                 "max-output": service.get("max_output", 128),
+                "max-new-tokens": service.get("max_new_tokens", 128),
                 "seed": test_params.get("seed", None),
+                "stream": service.get("stream", True),
+                "summary_type": service.get("summary_type", "stuff"),
                 "llm-model": test_params["llm_model"],
                 "deployment-type": test_params["deployment_type"],
                 "load-shape": test_params["load_shape"],
@@ -341,6 +350,7 @@ def run_benchmark(report=False, yaml=yaml):
         ],
         "codegen": ["llm", "llmserve", "e2e"],
         "codetrans": ["llm", "llmserve", "e2e"],
+        "docsum": ["e2e"],
         "faqgen": ["llm", "llmserve", "e2e"],
         "audioqna": ["asr", "llm", "llmserve", "tts", "e2e"],
         "visualqna": ["lvm", "lvmserve", "e2e"],

@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 test_suite_config: # Overall configuration settings for the test suite
-  examples: ["chatqna"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
+  examples: ["chatqna"]  # The specific test cases being tested, e.g., chatqna, codegen, codetrans, docsum, faqgen, audioqna, visualqna
   deployment_type: "k8s"  # Default is "k8s", can also be "docker"
   service_ip: None  # Leave as None for k8s, specify for Docker
   service_port: None  # Leave as None for k8s, specify for Docker
@@ -121,6 +121,19 @@ test_cases:
       run_test: true
       service_name: "codetrans-backend-server-svc"  # Replace with your service name
 
+  docsum:
+    e2e:
+      run_test: true
+      service_name: "docsum"  # Replace with your service name
+      stream: true  # Stream output, false if summary type is map_reduce or refine
+      max_new_tokens: 1024
+      summary_type: "stuff"   # Summary_types: stuff, truncate, map_reduce, refine
+      dataset:   # Path to document to be summarized when random prompt is true
+      service_list:
+        - "docsum"
+        - "docsum-llm-uservice"
+        - "docsum-vllm"
+
   faqgen:
     llm:
       run_test: false

@@ -146,6 +146,9 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
     runspec["seed"] = run_settings.get("seed", global_settings.get("seed", locust_defaults["seed"]))
     runspec["seed"] = locust_defaults["seed"] if runspec["seed"] is None else runspec["seed"]
     runspec["run_name"] = run_settings["name"]
+    runspec["summary_type"] = global_settings.get("summary_type", None)
+    runspec["stream"] = global_settings.get("stream", None)
+    runspec["max-new-tokens"] = global_settings.get("max-new-tokens", locust_defaults["max-output"])
 
     # Specify load shape to adjust user distribution
     load_shape_conf = run_settings.get("load-shape", global_settings.get("load-shape", locust_defaults["load-shape"]))
@@ -249,6 +252,12 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
         str(runspec["llm-model"]),
         "--stop-timeout",
         str(runspec["stop_timeout"]),
+        "--summary_type",
+        str(runspec["summary_type"]),
+        "--stream",
+        str(runspec["stream"]),
+        "--max-new-tokens",
+        str(runspec["max-new-tokens"]),
         "--csv",
         csv_output,
         "--headless",

@@ -75,6 +75,27 @@ def _(parser):
     parser.add_argument(
         "--max-output", type=int, env_var="OPEA_EVAL_MAX_OUTPUT_TOKENS", default=128, help="Max number of output tokens"
     )
+    parser.add_argument(
+        "--summary_type",
+        type=str,
+        env_var="OPEA_EVAL_SUMMARY_TYPE",
+        default="stuff",
+        help="Summary type for Docsum example",
+    )
+    parser.add_argument(
+        "--stream",
+        type=str,
+        env_var="OPEA_EVAL_STREAM",
+        default="true",
+        help="Specify whether the HTTP request response from the service should be streamed",
+    )
+    parser.add_argument(
+        "--max-new-tokens",
+        type=int,
+        env_var="OPEA_EVAL_MAX_NEW_TOKENS",
+        default=256,
+        help="Specify the maximum number of new tokens to generate for OPEA services",
+    )
 
 
 reqlist = []
@@ -122,12 +143,15 @@ def bench_main(self):
             "chatqnabench",
             "codegenfixed",
             "codegenbench",
+            "docsumbench",
             "faqgenfixed",
             "faqgenbench",
             "chatqna_qlist_pubmed",
         ]
         if self.environment.parsed_options.bench_target in ["faqgenfixed", "faqgenbench"]:
             req_params = {"data": reqData}
+        elif self.environment.parsed_options.bench_target in ["docsumbench", "docsumfixed"]:
+            req_params = {"files": reqData}
         else:
             req_params = {"json": reqData}
         test_start_time = time.time()
@@ -254,6 +278,9 @@ def on_locust_init(environment, **_kwargs):
     os.environ["OPEA_EVAL_PROMPTS"] = environment.parsed_options.prompts
     os.environ["OPEA_EVAL_MAX_OUTPUT_TOKENS"] = str(environment.parsed_options.max_output)
     os.environ["LLM_MODEL"] = environment.parsed_options.llm_model
+    os.environ["OPEA_EVAL_SUMMARY_TYPE"] = environment.parsed_options.summary_type
+    os.environ["OPEA_EVAL_STREAM"] = environment.parsed_options.stream
+    os.environ["OPEA_EVAL_MAX_NEW_TOKENS"] = str(environment.parsed_options.max_new_tokens)
 
     bench_package = __import__(environment.parsed_options.bench_target)
 

@@ -0,0 +1,40 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import tokenresponse as token
+
+cwd = os.path.dirname(__file__)
+filepath = os.environ["OPEA_EVAL_DATASET"]
+filename = os.path.basename(filepath)
+max_tokens = os.environ["OPEA_EVAL_MAX_NEW_TOKENS"]
+summary_type = os.environ["OPEA_EVAL_SUMMARY_TYPE"]
+stream = os.environ["OPEA_EVAL_STREAM"]
+
+
+def getUrl():
+    return "/v1/docsum"
+
+
+def getReqData():
+
+    files = {
+        "type": (None, "text"),
+        "messages": (None, ""),
+        "files": (filename, open(filepath, "rb"), "text/plain"),
+        "max_tokens": (None, max_tokens),
+        "language": (None, "en"),
+        "summary_type": (None, summary_type),
+        "stream": (None, stream),
+    }
+
+    return files
+
+
+def respStatics(environment, reqData, respData):
+    return token.respStatics(environment, reqData, respData)
+
+
+def staticsOutput(environment, reqlist):
+    token.staticsOutput(environment, reqlist)
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import tokenresponse as token
+
+cwd = os.path.dirname(__file__)
+filepath = f"{cwd}/../../data/upload_file.txt"
+filename = os.path.basename(filepath)
+max_tokens = os.environ["OPEA_EVAL_MAX_NEW_TOKENS"]
+
+
+def getUrl():
+    return "/v1/docsum"
+
+
+def getReqData():
+
+    files = {
+        "type": (None, "text"),
+        "messages": (None, ""),
+        "files": (filename, open(filepath, "rb"), "text/plain"),
+        "max_tokens": (None, max_tokens),
+        "language": (None, "en"),
+        "summary_type": (None, "stuff"),
+        "stream": (None, "true"),
+    }
+
+    return files
+
+
+def respStatics(environment, reqData, respData):
+    return token.respStatics(environment, reqData, respData)
+
+
+def staticsOutput(environment, reqlist):
+    token.staticsOutput(environment, reqlist)