diff --git a/evalscope/perf/plugin/datasets/__init__.py b/evalscope/perf/plugin/datasets/__init__.py index 17dcb34b..2e942827 100644 --- a/evalscope/perf/plugin/datasets/__init__.py +++ b/evalscope/perf/plugin/datasets/__init__.py @@ -1,6 +1,7 @@ from .base import DatasetPluginBase from .custom import CustomDatasetPlugin from .flickr8k import FlickrDatasetPlugin +from .gsm8k import Gsm8kDatasetPlugin from .kontext_bench import KontextDatasetPlugin from .line_by_line import LineByLineDatasetPlugin from .longalpaca import LongAlpacaDatasetPlugin diff --git a/evalscope/perf/plugin/datasets/gsm8k.py b/evalscope/perf/plugin/datasets/gsm8k.py new file mode 100644 index 00000000..0673fd00 --- /dev/null +++ b/evalscope/perf/plugin/datasets/gsm8k.py @@ -0,0 +1,30 @@ +import json +import os +from typing import Any, Dict, Iterator, List + +from evalscope.perf.arguments import Arguments +from evalscope.perf.plugin.datasets.base import DatasetPluginBase +from evalscope.perf.plugin.registry import register_dataset + + +@register_dataset('gsm8k') +class Gsm8kDatasetPlugin(DatasetPluginBase): + """ + Read dataset and return prompt. + """ + + def __init__(self, query_parameters: Arguments): + super().__init__(query_parameters) + + def build_messages(self) -> Iterator[List[Dict]]: + from modelscope.msdatasets import MsDataset + dataset = MsDataset.load('modelscope/gsm8k', subset_name='main', split='test') + + for item in dataset: + prompt = item['question'].strip() + if ( + len(prompt) > self.query_parameters.min_prompt_length + and len(prompt) < self.query_parameters.max_prompt_length + ): + message = self.create_message(prompt) + yield [message] diff --git a/evalscope/perf/plugin/datasets/openqa.py b/evalscope/perf/plugin/datasets/openqa.py index 3796f8f0..e2f99982 100644 --- a/evalscope/perf/plugin/datasets/openqa.py +++ b/evalscope/perf/plugin/datasets/openqa.py @@ -31,8 +31,5 @@ def build_messages(self) -> Iterator[List[Dict]]: len(prompt) > self.query_parameters.min_prompt_length and len(prompt) < self.query_parameters.max_prompt_length ): - if self.query_parameters.apply_chat_template: - message = self.create_message(prompt) - yield [message] - else: - yield prompt + message = self.create_message(prompt) + yield [message]