add supported_video_datasets function for quick start

FangXinyu-0913 · FangXinyu-0913 · commit 8b70019740e8 · 2024-12-19T09:39:26.000Z
diff --git a/run.py b/run.py
@@ -2,6 +2,7 @@
 import torch.distributed as dist
 
 from vlmeval.config import supported_VLM
+from vlmeval.dataset.video_dataset_config import supported_video_datasets
 from vlmeval.dataset import build_dataset
 from vlmeval.inference import infer_data_job
 from vlmeval.inference_video import infer_data_job_video
@@ -26,16 +27,22 @@ def build_model_from_config(cfg, model_name):
         raise ValueError(f'Class {cls_name} is not supported in `vlmeval.api` or `vlmeval.vlm`')
 
 
-def build_dataset_from_config(cfg):
+def build_dataset_from_config(cfg, dataset_name):
     import vlmeval.dataset
     import inspect
-    config = cp.deepcopy(cfg)
+    config = cp.deepcopy(cfg[dataset_name])
+    if config == {}:
+        return supported_video_datasets[dataset_name]()
     assert 'class' in config
     cls_name = config.pop('class')
     if hasattr(vlmeval.dataset, cls_name):
         cls = getattr(vlmeval.dataset, cls_name)
         sig = inspect.signature(cls.__init__)
         valid_params = {k: v for k, v in config.items() if k in sig.parameters}
+        if valid_params.get('fps', 0) > 0 and valid_params.get('nframe', 0) > 0:
+            raise ValueError('fps and nframe should not be set at the same time')
+        if valid_params.get('fps', 0) <= 0 and valid_params.get('nframe', 0) <= 0:
+            raise ValueError('fps and nframe should be set at least one valid value')
         return cls(**valid_params)
     else:
         raise ValueError(f'Class {cls_name} is not supported in `vlmeval.dataset`')
@@ -190,20 +197,16 @@ def main():
                 if use_config:
                     if world_size > 1:
                         if rank == 0:
-                            dataset = build_dataset_from_config(cfg['data'][dataset_name])
+                            dataset = build_dataset_from_config(cfg['data'], dataset_name)
                         dist.barrier()
-                    dataset = build_dataset_from_config(cfg['data'][dataset_name])
+                    dataset = build_dataset_from_config(cfg['data'], dataset_name)
                     if dataset is None:
                         logger.error(f'Dataset {dataset_name} is not valid, will be skipped. ')
                         continue
                 else:
                     dataset_kwargs = {}
                     if dataset_name in ['MMLongBench_DOC', 'DUDE', 'DUDE_MINI', 'SLIDEVQA', 'SLIDEVQA_MINI']:
                         dataset_kwargs['model'] = model_name
-                    if dataset_name == 'MMBench-Video':
-                        dataset_kwargs['pack'] = args.pack
-                    if dataset_name == 'Video-MME':
-                        dataset_kwargs['use_subtitle'] = args.use_subtitle
 
                     # If distributed, first build the dataset on the main process for doing preparation works
                     if world_size > 1:
@@ -215,29 +218,6 @@ def main():
                     if dataset is None:
                         logger.error(f'Dataset {dataset_name} is not valid, will be skipped. ')
                         continue
-                    # Handling Video Datasets. For Video Dataset, set the fps for priority
-                    if args.fps > 0:
-                        if dataset_name == 'MVBench':
-                            raise ValueError('MVBench does not support fps setting, please transfer to MVBench_MP4!')
-                        args.nframe = 0
-                    if dataset_name in ['MMBench-Video']:
-                        packstr = 'pack' if args.pack else 'nopack'
-                        if args.nframe > 0:
-                            result_file_base = f'{model_name}_{dataset_name}_{args.nframe}frame_{packstr}.xlsx'
-                        else:
-                            result_file_base = f'{model_name}_{dataset_name}_{args.fps}fps_{packstr}.xlsx'
-                    elif dataset.MODALITY == 'VIDEO':
-                        if args.pack:
-                            logger.info(f'{dataset_name} not support Pack Mode, directly change to unpack')
-                            args.pack = False
-                        packstr = 'pack' if args.pack else 'nopack'
-                        if args.nframe > 0:
-                            result_file_base = f'{model_name}_{dataset_name}_{args.nframe}frame_{packstr}.xlsx'
-                        else:
-                            result_file_base = f'{model_name}_{dataset_name}_{args.fps}fps_{packstr}.xlsx'
-                        if dataset_name in ['Video-MME', 'LongVideoBench']:
-                            subtitlestr = 'subs' if args.use_subtitle else 'nosubs'
-                            result_file_base = result_file_base.replace('.xlsx', f'_{subtitlestr}.xlsx')
 
                 # Handling Multi-Turn Dataset
                 if dataset.TYPE == 'MT':
diff --git a/vlmeval/dataset/__init__.py b/vlmeval/dataset/__init__.py
@@ -34,6 +34,7 @@
 from .mmmath import MMMath
 from .dynamath import Dynamath
 from .utils import *
+from .video_dataset_config import *
 from ..smp import *
 
 
@@ -196,7 +197,9 @@ def DATASET_MODALITY(dataset, *, default: str = 'IMAGE') -> str:
 
 def build_dataset(dataset_name, **kwargs):
     for cls in DATASET_CLASSES:
-        if dataset_name in cls.supported_datasets():
+        if dataset_name in supported_video_datasets:
+            return supported_video_datasets[dataset_name](**kwargs)
+        elif dataset_name in cls.supported_datasets():
             return cls(dataset=dataset_name, **kwargs)
 
     warnings.warn(f'Dataset {dataset_name} is not officially supported. ')
diff --git a/vlmeval/dataset/mvbench.py b/vlmeval/dataset/mvbench.py
@@ -28,7 +28,7 @@ class MVBench(VideoBaseDataset):
 
     TYPE = 'Video-MCQ'
 
-    def __init__(self, dataset='MVBench', pack=False, nframe=0, fps=-1):
+    def __init__(self, dataset='MVBench', nframe=0, fps=-1):
         self.type_data_list = {
             'Action Sequence': ('action_sequence.json',
                                 'your_data_path/star/Charades_v1_480/', 'video', True),  # has start & end
@@ -71,7 +71,7 @@ def __init__(self, dataset='MVBench', pack=False, nframe=0, fps=-1):
             'Counterfactual Inference': ('counterfactual_inference.json',
                                          'your_data_path/clevrer/video_validation/', 'video', False),
         }
-        super().__init__(dataset=dataset, pack=pack, nframe=nframe, fps=fps)
+        super().__init__(dataset=dataset, nframe=nframe, fps=fps)
 
     @classmethod
     def supported_datasets(cls):
@@ -432,8 +432,8 @@ class MVBench_MP4(VideoBaseDataset):
 """
     TYPE = 'Video-MCQ'
 
-    def __init__(self, dataset='MVBench_MP4', pack=False, nframe=0, fps=-1):
-        super().__init__(dataset=dataset, pack=pack, nframe=nframe, fps=fps)
+    def __init__(self, dataset='MVBench_MP4', nframe=0, fps=-1):
+        super().__init__(dataset=dataset, nframe=nframe, fps=fps)
 
     @classmethod
     def supported_datasets(cls):
diff --git a/vlmeval/dataset/video_base.py b/vlmeval/dataset/video_base.py
@@ -37,6 +37,10 @@ def __init__(self,
         self.pack = pack
         self.nframe = nframe
         self.fps = fps
+        if self.fps > 0 and self.nframe > 0:
+            raise ValueError('fps and nframe should not be set at the same time')
+        if self.fps <= 0 and self.nframe <= 0:
+            raise ValueError('fps and nframe should be set at least one valid value')
 
     def __len__(self):
         return len(self.videos) if self.pack else len(self.data)
@@ -81,7 +85,7 @@ def save_video_frames(self, video):
             indices = [int(i * step_size) for i in range(required_frames)]
 
             # 提取帧并保存
-            frame_paths = self.frame_paths_fps(video, len(indices), self.fps)
+            frame_paths = self.frame_paths_fps(video, len(indices))
             flag = np.all([osp.exists(p) for p in frame_paths])
             if flag:
                 return frame_paths
diff --git a/vlmeval/dataset/video_dataset_config.py b/vlmeval/dataset/video_dataset_config.py
@@ -0,0 +1,54 @@
+from vlmeval.dataset import *
+from functools import partial
+
+mmbench_video_dataset = {
+    'MMBench_Video_8frame_nopack': partial(MMBenchVideo, dataset='MMBench-Video', nframe=8, pack=False),
+    'MMBench_Video_8frame_pack': partial(MMBenchVideo, dataset='MMBench-Video', nframe=8, pack=True),
+    'MMBench_Video_16frame_nopack': partial(MMBenchVideo, dataset='MMBench-Video', nframe=16, pack=False),
+    'MMBench_Video_1fps_nopack': partial(MMBenchVideo, dataset='MMBench-Video', fps=1.0, pack=False),
+    'MMBench_Video_1fps_pack': partial(MMBenchVideo, dataset='MMBench-Video', fps=1.0, pack=True)
+}
+
+mvbench_dataset = {
+    'MVBench_8frame': partial(MVBench, dataset='MVBench', nframe=8),
+    # MVBench not support fps, but MVBench_MP4 does
+    'MVBench_MP4_8frame': partial(MVBench_MP4, dataset='MVBench_MP4', nframe=8),
+    'MVBench_MP4_1fps': partial(MVBench_MP4, dataset='MVBench_MP4', fps=1.0),
+}
+
+videomme_dataset = {
+    'Video-MME_8frame': partial(VideoMME, dataset='Video-MME', nframe=8),
+    'Video-MME_8frame_subs': partial(VideoMME, dataset='Video-MME', nframe=8, use_subtitle=True),
+    'Video-MME_1fps': partial(VideoMME, dataset='Video-MME', fps=1.0),
+    'Video-MME_0.5fps': partial(VideoMME, dataset='Video-MME', fps=0.5),
+    'Video-MME_0.5fps_subs': partial(VideoMME, dataset='Video-MME', fps=0.5, use_subtitle=True),
+}
+
+longvideobench_dataset = {
+    'LongVideoBench_8frame': partial(LongVideoBench, dataset='LongVideoBench', nframe=8),
+    'LongVideoBench_8frame_subs': partial(LongVideoBench, dataset='LongVideoBench', nframe=8, use_subtitle=True),
+    'LongVideoBench_1fps': partial(LongVideoBench, dataset='LongVideoBench', fps=1.0),
+    'LongVideoBench_0.5fps': partial(LongVideoBench, dataset='LongVideoBench', fps=0.5),
+    'LongVideoBench_0.5fps_subs': partial(LongVideoBench, dataset='LongVideoBench', fps=0.5, use_subtitle=True)
+}
+
+mlvu_dataset = {
+    'MLVU_8frame': partial(MLVU, dataset='MLVU', nframe=8),
+    'MLVU_1fps': partial(MLVU, dataset='MLVU', fps=1.0)
+}
+
+tempcompass_dataset = {
+    'TempCompass_8frame': partial(TempCompass, dataset='TempCompass', nframe=8),
+    'TempCompass_1fps': partial(TempCompass, dataset='TempCompass', fps=1.0),
+    'TempCompass_0.5fps': partial(TempCompass, dataset='TempCompass', fps=0.5)
+}
+
+supported_video_datasets = {}
+
+dataset_groups = [
+    mmbench_video_dataset, mvbench_dataset, videomme_dataset, longvideobench_dataset,
+    mlvu_dataset, tempcompass_dataset
+]
+
+for grp in dataset_groups:
+    supported_video_datasets.update(grp)