From aaead4ffa89f908c370c51efc784185d48b6a772 Mon Sep 17 00:00:00 2001 From: lianqing11 Date: Mon, 12 Jun 2023 13:09:48 +0000 Subject: [PATCH] fix the code style for the multi-modal module --- ...timodal_model.sh => run_inference_multimodal_model.sh} | 4 ++-- src/lmflow/args.py | 2 +- src/lmflow/models/auto_model.py | 2 +- src/lmflow/models/hf_encoder_decoder_model.py | 8 ++++---- src/lmflow/pipeline/inferencer.py | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) rename scripts/{inference_multimodal_model.sh => run_inference_multimodal_model.sh} (82%) diff --git a/scripts/inference_multimodal_model.sh b/scripts/run_inference_multimodal_model.sh similarity index 82% rename from scripts/inference_multimodal_model.sh rename to scripts/run_inference_multimodal_model.sh index 924e161a1..ba150fa03 100755 --- a/scripts/inference_multimodal_model.sh +++ b/scripts/run_inference_multimodal_model.sh @@ -9,9 +9,9 @@ if [ $# -ge 2 ]; then lora_args="--lora_model_path $2" fi -CUDA_VISIBLE_DEVICES=7 \ +CUDA_VISIBLE_DEVICES=0 \ deepspeed examples/inference.py \ --deepspeed configs/ds_config_multimodal.json \ --model_name_or_path ${model} \ - --arch_type visionEncoder_decoder \ + --arch_type vision_encoder_decoder \ ${lora_args} diff --git a/src/lmflow/args.py b/src/lmflow/args.py index 828d47c25..a3a45cce4 100644 --- a/src/lmflow/args.py +++ b/src/lmflow/args.py @@ -121,7 +121,7 @@ class ModelArguments: "Model architecture type, e.g. \"decoder_only\"," " \"encoder_decoder\"" ), - "choices": ["decoder_only", "encoder_decoder", "text_regression", "visionEncoder_decoder"], + "choices": ["decoder_only", "encoder_decoder", "text_regression", "vision_encoder_decoder"], }, ) config_name: Optional[str] = field( diff --git a/src/lmflow/models/auto_model.py b/src/lmflow/models/auto_model.py index d75976c67..1db77a423 100644 --- a/src/lmflow/models/auto_model.py +++ b/src/lmflow/models/auto_model.py @@ -17,7 +17,7 @@ def get_model(self, model_args, *args, **kwargs): elif arch_type == "text_regression": return TextRegressionModel(model_args, *args, **kwargs) elif arch_type == "encoder_decoder" or \ - arch_type == "visionEncoder_decoder": + arch_type == "vision_encoder_decoder": return HFEncoderDecoderModel(model_args, *args, **kwargs) else: raise NotImplementedError( diff --git a/src/lmflow/models/hf_encoder_decoder_model.py b/src/lmflow/models/hf_encoder_decoder_model.py index 8eee0627b..e03aeaa45 100644 --- a/src/lmflow/models/hf_encoder_decoder_model.py +++ b/src/lmflow/models/hf_encoder_decoder_model.py @@ -19,6 +19,7 @@ """ import logging +import copy from typing import List, Union import deepspeed @@ -50,7 +51,6 @@ from lmflow.datasets.dataset import Dataset from lmflow.models.encoder_decoder_model import EncoderDecoderModel from lmflow.models.interfaces.tunable import Tunable -import copy logger = logging.getLogger(__name__) @@ -132,7 +132,7 @@ def __init__( model_register = AutoModel else: model_register = AutoModelForSeq2SeqLM - elif self.arch_type == "visionEncoder_decoder": + elif self.arch_type == "vision_encoder_decoder": model_register = AutoModelForVision2Seq else: raise NotImplementedError @@ -168,7 +168,7 @@ def __init__( ) if self.arch_type == "encoder_decoder": tokenizer_register = AutoTokenizer - elif self.arch_type == "visionEncoder_decoder": + elif self.arch_type == "vision_encoder_decoder": tokenizer_register = AutoProcessor else: raise NotImplementedError @@ -302,7 +302,7 @@ def inference(self, inputs, *args, **kwargs): # TODO need to discuss how to handle pad_token_id if self.arch_type == "encoder_decoder": kwargs.update(pad_token_id=self.tokenizer.pad_token_id) - elif self.arch_type == "visionEncoder_decoder": + elif self.arch_type == "vision_encoder_decoder": # TODO disucss how to modify the interface to remove this part. inputs = copy.deepcopy(inputs) input_ids = inputs.pop('input_ids') diff --git a/src/lmflow/pipeline/inferencer.py b/src/lmflow/pipeline/inferencer.py index 786cc2fbd..e43d54ad1 100644 --- a/src/lmflow/pipeline/inferencer.py +++ b/src/lmflow/pipeline/inferencer.py @@ -90,7 +90,7 @@ def create_dataloader(self, dataset: Dataset): elif dataset.get_type() == "image_text": backend_dataset = dataset.get_backend_dataset() # can not do the do_dict information because the data contains image. - inputs = [backend_dataset.__getitem__(idx) \ + inputs = [backend_dataset.__getitem__(idx) for idx in range(len(backend_dataset))] dataset_size = len(inputs) dataset_buf = [] @@ -170,7 +170,7 @@ def inference( ) text_out = model.decode(outputs[0], skip_special_tokens=True) # only return the generation, trucating the input - if self.model_args.arch_type != "visionEncoder_decoder": + if self.model_args.arch_type != "vision_encoder_decoder": prompt_length = len(model.decode(inputs[0], skip_special_tokens=True,)) text_out = text_out[prompt_length:] output_dict["instances"].append({ "text": text_out })