From 5aa930d5e9bf996c58e65796b40e3a32c50a2185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=90=E6=82=AC?= Date: Thu, 4 Sep 2025 20:05:56 +0800 Subject: [PATCH 1/5] add cmmlu and gsm8k MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 子悬 --- evalscope/perf/plugin/datasets/__init__.py | 2 + evalscope/perf/plugin/datasets/cmmlu.py | 44 ++++++++++++++++++++++ evalscope/perf/plugin/datasets/gsm8k.py | 30 +++++++++++++++ evalscope/perf/plugin/datasets/openqa.py | 2 +- 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 evalscope/perf/plugin/datasets/cmmlu.py create mode 100644 evalscope/perf/plugin/datasets/gsm8k.py diff --git a/evalscope/perf/plugin/datasets/__init__.py b/evalscope/perf/plugin/datasets/__init__.py index 17dcb34b..e34f00cf 100644 --- a/evalscope/perf/plugin/datasets/__init__.py +++ b/evalscope/perf/plugin/datasets/__init__.py @@ -1,6 +1,8 @@ from .base import DatasetPluginBase from .custom import CustomDatasetPlugin +from .cmmlu import CmmluDatasetPlugin from .flickr8k import FlickrDatasetPlugin +from .gsm8k import Gsm8kDatasetPlugin from .kontext_bench import KontextDatasetPlugin from .line_by_line import LineByLineDatasetPlugin from .longalpaca import LongAlpacaDatasetPlugin diff --git a/evalscope/perf/plugin/datasets/cmmlu.py b/evalscope/perf/plugin/datasets/cmmlu.py new file mode 100644 index 00000000..ae93ad4c --- /dev/null +++ b/evalscope/perf/plugin/datasets/cmmlu.py @@ -0,0 +1,44 @@ +import json +import os +from typing import Any, Dict, Iterator, List + +from evalscope.perf.arguments import Arguments +from evalscope.perf.plugin.datasets.base import DatasetPluginBase +from evalscope.perf.plugin.registry import register_dataset + + +@register_dataset('cmmlu') +class CmmluDatasetPlugin(DatasetPluginBase): + """Read dataset and return prompt. + Datasets: https://huggingface.co/datasets/haonan-li/cmmlu/resolve/main/cmmlu-test.jsonl + """ + + def __init__(self, query_parameters: Arguments): + super().__init__(query_parameters) + + def build_messages(self) -> Iterator[List[Dict]]: + if not self.query_parameters.dataset_path: + from modelscope import dataset_snapshot_download + + file_name = 'cmmlu-test.jsonl' # 实际文件名请按下载后的为准 + local_path = dataset_snapshot_download( + 'haonan-li/cmmlu', + allow_patterns=[file_name] + ) + self.query_parameters.dataset_path = os.path.join(local_path, file_name) + + for item in self.dataset_line_by_line(self.query_parameters.dataset_path): + item = json.loads(item) + prompt = item['question'].strip() + + # 根据长度过滤 + if not (self.query_parameters.min_prompt_length + < len(prompt) + < self.query_parameters.max_prompt_length): + continue + + if self.query_parameters.apply_chat_template: + message = self.create_message(prompt) + yield [message] + else: + yield prompt \ No newline at end of file diff --git a/evalscope/perf/plugin/datasets/gsm8k.py b/evalscope/perf/plugin/datasets/gsm8k.py new file mode 100644 index 00000000..257b063f --- /dev/null +++ b/evalscope/perf/plugin/datasets/gsm8k.py @@ -0,0 +1,30 @@ +import json +import os +from typing import Any, Dict, Iterator, List + +from evalscope.perf.arguments import Arguments +from evalscope.perf.plugin.datasets.base import DatasetPluginBase +from evalscope.perf.plugin.registry import register_dataset + + +@register_dataset('gsm8k') +class Gsm8kDatasetPlugin(DatasetPluginBase): + """Read dataset and return prompt. + """ + + def __init__(self, query_parameters: Arguments): + super().__init__(query_parameters) + + def build_messages(self) -> Iterator[List[Dict]]: + from modelscope.msdatasets import MsDataset + dataset = MsDataset.load('modelscope/gsm8k', subset_name='main', split='train') + + for item in dataset: + prompt = item['question'].strip() + if (len(prompt) > self.query_parameters.min_prompt_length + and len(prompt) < self.query_parameters.max_prompt_length): + if self.query_parameters.apply_chat_template: + message = self.create_message(prompt) + yield [message] + else: + yield prompt diff --git a/evalscope/perf/plugin/datasets/openqa.py b/evalscope/perf/plugin/datasets/openqa.py index 3796f8f0..53ea94ad 100644 --- a/evalscope/perf/plugin/datasets/openqa.py +++ b/evalscope/perf/plugin/datasets/openqa.py @@ -21,7 +21,7 @@ def build_messages(self) -> Iterator[List[Dict]]: from modelscope import dataset_snapshot_download file_name = 'open_qa.jsonl' - local_path = dataset_snapshot_download('AI-ModelScope/HC3-Chinese', allow_patterns=[file_name]) + local_path = dataset_snapshot_download('AI-ModelScope/HC3-Chinese') self.query_parameters.dataset_path = os.path.join(local_path, file_name) for item in self.dataset_line_by_line(self.query_parameters.dataset_path): From ffb1512e08eb4836c14e31c4477df6297a70aa00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=90=E6=82=AC?= Date: Thu, 4 Sep 2025 20:50:39 +0800 Subject: [PATCH 2/5] fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 子悬 --- evalscope/perf/plugin/datasets/cmmlu.py | 5 +++-- evalscope/perf/plugin/datasets/gsm8k.py | 9 ++++++--- evalscope/perf/plugin/datasets/openqa.py | 3 ++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/evalscope/perf/plugin/datasets/cmmlu.py b/evalscope/perf/plugin/datasets/cmmlu.py index ae93ad4c..b2b38f33 100644 --- a/evalscope/perf/plugin/datasets/cmmlu.py +++ b/evalscope/perf/plugin/datasets/cmmlu.py @@ -16,11 +16,12 @@ class CmmluDatasetPlugin(DatasetPluginBase): def __init__(self, query_parameters: Arguments): super().__init__(query_parameters) + def build_messages(self) -> Iterator[List[Dict]]: if not self.query_parameters.dataset_path: from modelscope import dataset_snapshot_download - file_name = 'cmmlu-test.jsonl' # 实际文件名请按下载后的为准 + file_name = 'cmmlu-test.jsonl' local_path = dataset_snapshot_download( 'haonan-li/cmmlu', allow_patterns=[file_name] @@ -41,4 +42,4 @@ def build_messages(self) -> Iterator[List[Dict]]: message = self.create_message(prompt) yield [message] else: - yield prompt \ No newline at end of file + yield [prompt] \ No newline at end of file diff --git a/evalscope/perf/plugin/datasets/gsm8k.py b/evalscope/perf/plugin/datasets/gsm8k.py index 257b063f..6194fd97 100644 --- a/evalscope/perf/plugin/datasets/gsm8k.py +++ b/evalscope/perf/plugin/datasets/gsm8k.py @@ -9,15 +9,18 @@ @register_dataset('gsm8k') class Gsm8kDatasetPlugin(DatasetPluginBase): - """Read dataset and return prompt. + """ + Read dataset and return prompt. """ def __init__(self, query_parameters: Arguments): super().__init__(query_parameters) + def build_messages(self) -> Iterator[List[Dict]]: from modelscope.msdatasets import MsDataset - dataset = MsDataset.load('modelscope/gsm8k', subset_name='main', split='train') + dataset = MsDataset.load('modelscope/gsm8k', + subset_name='main', split='test') for item in dataset: prompt = item['question'].strip() @@ -27,4 +30,4 @@ def build_messages(self) -> Iterator[List[Dict]]: message = self.create_message(prompt) yield [message] else: - yield prompt + yield [prompt] diff --git a/evalscope/perf/plugin/datasets/openqa.py b/evalscope/perf/plugin/datasets/openqa.py index 53ea94ad..c3cdb0af 100644 --- a/evalscope/perf/plugin/datasets/openqa.py +++ b/evalscope/perf/plugin/datasets/openqa.py @@ -21,7 +21,8 @@ def build_messages(self) -> Iterator[List[Dict]]: from modelscope import dataset_snapshot_download file_name = 'open_qa.jsonl' - local_path = dataset_snapshot_download('AI-ModelScope/HC3-Chinese') + local_path = dataset_snapshot_download('AI-ModelScope/HC3-Chinese', + allow_patterns=[file_name]) self.query_parameters.dataset_path = os.path.join(local_path, file_name) for item in self.dataset_line_by_line(self.query_parameters.dataset_path): From bca620d9a195c7671bd41ac7f353adda90d96d7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=90=E6=82=AC?= Date: Fri, 5 Sep 2025 11:07:20 +0800 Subject: [PATCH 3/5] fix pre-commit error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 子悬 --- evalscope/perf/plugin/datasets/__init__.py | 2 +- evalscope/perf/plugin/datasets/cmmlu.py | 14 ++++---------- evalscope/perf/plugin/datasets/gsm8k.py | 10 +++++----- evalscope/perf/plugin/datasets/openqa.py | 3 +-- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/evalscope/perf/plugin/datasets/__init__.py b/evalscope/perf/plugin/datasets/__init__.py index e34f00cf..65a261f2 100644 --- a/evalscope/perf/plugin/datasets/__init__.py +++ b/evalscope/perf/plugin/datasets/__init__.py @@ -1,6 +1,6 @@ from .base import DatasetPluginBase -from .custom import CustomDatasetPlugin from .cmmlu import CmmluDatasetPlugin +from .custom import CustomDatasetPlugin from .flickr8k import FlickrDatasetPlugin from .gsm8k import Gsm8kDatasetPlugin from .kontext_bench import KontextDatasetPlugin diff --git a/evalscope/perf/plugin/datasets/cmmlu.py b/evalscope/perf/plugin/datasets/cmmlu.py index b2b38f33..a8d17ec1 100644 --- a/evalscope/perf/plugin/datasets/cmmlu.py +++ b/evalscope/perf/plugin/datasets/cmmlu.py @@ -16,16 +16,12 @@ class CmmluDatasetPlugin(DatasetPluginBase): def __init__(self, query_parameters: Arguments): super().__init__(query_parameters) - def build_messages(self) -> Iterator[List[Dict]]: if not self.query_parameters.dataset_path: from modelscope import dataset_snapshot_download - file_name = 'cmmlu-test.jsonl' - local_path = dataset_snapshot_download( - 'haonan-li/cmmlu', - allow_patterns=[file_name] - ) + file_name = 'cmmlu-test.jsonl' + local_path = dataset_snapshot_download('haonan-li/cmmlu', allow_patterns=[file_name]) self.query_parameters.dataset_path = os.path.join(local_path, file_name) for item in self.dataset_line_by_line(self.query_parameters.dataset_path): @@ -33,13 +29,11 @@ def build_messages(self) -> Iterator[List[Dict]]: prompt = item['question'].strip() # 根据长度过滤 - if not (self.query_parameters.min_prompt_length - < len(prompt) - < self.query_parameters.max_prompt_length): + if not (self.query_parameters.min_prompt_length < len(prompt) < self.query_parameters.max_prompt_length): continue if self.query_parameters.apply_chat_template: message = self.create_message(prompt) yield [message] else: - yield [prompt] \ No newline at end of file + yield [prompt] diff --git a/evalscope/perf/plugin/datasets/gsm8k.py b/evalscope/perf/plugin/datasets/gsm8k.py index 6194fd97..02b4db1b 100644 --- a/evalscope/perf/plugin/datasets/gsm8k.py +++ b/evalscope/perf/plugin/datasets/gsm8k.py @@ -16,16 +16,16 @@ class Gsm8kDatasetPlugin(DatasetPluginBase): def __init__(self, query_parameters: Arguments): super().__init__(query_parameters) - def build_messages(self) -> Iterator[List[Dict]]: from modelscope.msdatasets import MsDataset - dataset = MsDataset.load('modelscope/gsm8k', - subset_name='main', split='test') + dataset = MsDataset.load('modelscope/gsm8k', subset_name='main', split='test') for item in dataset: prompt = item['question'].strip() - if (len(prompt) > self.query_parameters.min_prompt_length - and len(prompt) < self.query_parameters.max_prompt_length): + if ( + len(prompt) > self.query_parameters.min_prompt_length + and len(prompt) < self.query_parameters.max_prompt_length + ): if self.query_parameters.apply_chat_template: message = self.create_message(prompt) yield [message] diff --git a/evalscope/perf/plugin/datasets/openqa.py b/evalscope/perf/plugin/datasets/openqa.py index c3cdb0af..3796f8f0 100644 --- a/evalscope/perf/plugin/datasets/openqa.py +++ b/evalscope/perf/plugin/datasets/openqa.py @@ -21,8 +21,7 @@ def build_messages(self) -> Iterator[List[Dict]]: from modelscope import dataset_snapshot_download file_name = 'open_qa.jsonl' - local_path = dataset_snapshot_download('AI-ModelScope/HC3-Chinese', - allow_patterns=[file_name]) + local_path = dataset_snapshot_download('AI-ModelScope/HC3-Chinese', allow_patterns=[file_name]) self.query_parameters.dataset_path = os.path.join(local_path, file_name) for item in self.dataset_line_by_line(self.query_parameters.dataset_path): From 2e222114c10e2be1c6aa6508d23dec092fd17b38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=90=E6=82=AC?= Date: Fri, 5 Sep 2025 11:11:53 +0800 Subject: [PATCH 4/5] fixed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 子悬 --- evalscope/perf/plugin/datasets/cmmlu.py | 7 ++----- evalscope/perf/plugin/datasets/gsm8k.py | 7 ++----- evalscope/perf/plugin/datasets/openqa.py | 7 ++----- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/evalscope/perf/plugin/datasets/cmmlu.py b/evalscope/perf/plugin/datasets/cmmlu.py index a8d17ec1..dff9d7cb 100644 --- a/evalscope/perf/plugin/datasets/cmmlu.py +++ b/evalscope/perf/plugin/datasets/cmmlu.py @@ -32,8 +32,5 @@ def build_messages(self) -> Iterator[List[Dict]]: if not (self.query_parameters.min_prompt_length < len(prompt) < self.query_parameters.max_prompt_length): continue - if self.query_parameters.apply_chat_template: - message = self.create_message(prompt) - yield [message] - else: - yield [prompt] + message = self.create_message(prompt) + yield [message] diff --git a/evalscope/perf/plugin/datasets/gsm8k.py b/evalscope/perf/plugin/datasets/gsm8k.py index 02b4db1b..0673fd00 100644 --- a/evalscope/perf/plugin/datasets/gsm8k.py +++ b/evalscope/perf/plugin/datasets/gsm8k.py @@ -26,8 +26,5 @@ def build_messages(self) -> Iterator[List[Dict]]: len(prompt) > self.query_parameters.min_prompt_length and len(prompt) < self.query_parameters.max_prompt_length ): - if self.query_parameters.apply_chat_template: - message = self.create_message(prompt) - yield [message] - else: - yield [prompt] + message = self.create_message(prompt) + yield [message] diff --git a/evalscope/perf/plugin/datasets/openqa.py b/evalscope/perf/plugin/datasets/openqa.py index 3796f8f0..e2f99982 100644 --- a/evalscope/perf/plugin/datasets/openqa.py +++ b/evalscope/perf/plugin/datasets/openqa.py @@ -31,8 +31,5 @@ def build_messages(self) -> Iterator[List[Dict]]: len(prompt) > self.query_parameters.min_prompt_length and len(prompt) < self.query_parameters.max_prompt_length ): - if self.query_parameters.apply_chat_template: - message = self.create_message(prompt) - yield [message] - else: - yield prompt + message = self.create_message(prompt) + yield [message] From fbc8171225c71bb84c41e76bf8156ff8acc8297c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=90=E6=82=AC?= Date: Fri, 5 Sep 2025 13:57:17 +0800 Subject: [PATCH 5/5] rm cmmlu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 子悬 --- evalscope/perf/plugin/datasets/__init__.py | 1 - evalscope/perf/plugin/datasets/cmmlu.py | 36 ---------------------- 2 files changed, 37 deletions(-) delete mode 100644 evalscope/perf/plugin/datasets/cmmlu.py diff --git a/evalscope/perf/plugin/datasets/__init__.py b/evalscope/perf/plugin/datasets/__init__.py index 65a261f2..2e942827 100644 --- a/evalscope/perf/plugin/datasets/__init__.py +++ b/evalscope/perf/plugin/datasets/__init__.py @@ -1,5 +1,4 @@ from .base import DatasetPluginBase -from .cmmlu import CmmluDatasetPlugin from .custom import CustomDatasetPlugin from .flickr8k import FlickrDatasetPlugin from .gsm8k import Gsm8kDatasetPlugin diff --git a/evalscope/perf/plugin/datasets/cmmlu.py b/evalscope/perf/plugin/datasets/cmmlu.py deleted file mode 100644 index dff9d7cb..00000000 --- a/evalscope/perf/plugin/datasets/cmmlu.py +++ /dev/null @@ -1,36 +0,0 @@ -import json -import os -from typing import Any, Dict, Iterator, List - -from evalscope.perf.arguments import Arguments -from evalscope.perf.plugin.datasets.base import DatasetPluginBase -from evalscope.perf.plugin.registry import register_dataset - - -@register_dataset('cmmlu') -class CmmluDatasetPlugin(DatasetPluginBase): - """Read dataset and return prompt. - Datasets: https://huggingface.co/datasets/haonan-li/cmmlu/resolve/main/cmmlu-test.jsonl - """ - - def __init__(self, query_parameters: Arguments): - super().__init__(query_parameters) - - def build_messages(self) -> Iterator[List[Dict]]: - if not self.query_parameters.dataset_path: - from modelscope import dataset_snapshot_download - - file_name = 'cmmlu-test.jsonl' - local_path = dataset_snapshot_download('haonan-li/cmmlu', allow_patterns=[file_name]) - self.query_parameters.dataset_path = os.path.join(local_path, file_name) - - for item in self.dataset_line_by_line(self.query_parameters.dataset_path): - item = json.loads(item) - prompt = item['question'].strip() - - # 根据长度过滤 - if not (self.query_parameters.min_prompt_length < len(prompt) < self.query_parameters.max_prompt_length): - continue - - message = self.create_message(prompt) - yield [message]