Skip to content

Commit

Permalink
Merge branch 'main' of github.com:open-compass/VLMEvalKit into precommit
Browse files Browse the repository at this point in the history
  • Loading branch information
kennymckormick committed Mar 19, 2024
2 parents 9d93ac9 + b35ab5b commit 7c26768
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 4 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

## 🆕 News

- **[2024-03-17]** We have added an API wrapper for [**Step-1V**](https://www.stepfun.com/#step1v) 🔥🔥🔥
- **[2024-03-15]** We have updated to be compatible with the latest version of LLaVA. All LLaVA series models have been re-evaluated with temperature=0, and the new results have been updated to the leaderboard 🔥🔥🔥
- **[2024-02-27]** We have fixed the evaluation results of [**Yi-VL-34B**](https://huggingface.co/01-ai/Yi-VL-34B), check the updated results [**here**](https://huggingface.co/spaces/opencompass/open_vlm_leaderboard) 🔥🔥🔥
- **[2024-02-25]** We have supported [**OCRBench**](https://github.com/Yuliang-Liu/MultimodalOCR). 🔥🔥🔥
Expand Down Expand Up @@ -63,8 +64,8 @@

**Supported API Models**

| [**GPT-4-Vision-Preview**](https://platform.openai.com/docs/guides/vision)🎞️🚅 | [**GeminiProVision**](https://platform.openai.com/docs/guides/vision)🎞️🚅 | [**QwenVLPlus**](https://huggingface.co/spaces/Qwen/Qwen-VL-Plus)🎞️🚅 | [**QwenVLMax**](https://huggingface.co/spaces/Qwen/Qwen-VL-Max)🎞️🚅 |
| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
| [**GPT-4-Vision-Preview**](https://platform.openai.com/docs/guides/vision)🎞️🚅 | [**GeminiProVision**](https://platform.openai.com/docs/guides/vision)🎞️🚅 | [**QwenVLPlus**](https://huggingface.co/spaces/Qwen/Qwen-VL-Plus)🎞️🚅 | [**QwenVLMax**](https://huggingface.co/spaces/Qwen/Qwen-VL-Max)🎞️🚅 | [**Step-1V**](https://www.stepfun.com/#step1v)🎞️🚅 |
| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------ |

**Supported PyTorch / HF Models**

Expand Down
4 changes: 3 additions & 1 deletion vlmeval/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from .gemini import GeminiWrapper, GeminiProVision
from .qwen_vl_api import QwenVLWrapper, QwenVLAPI
from .qwen_api import QwenAPI
from .stepai import Step1V
from .claude import Claude_Wrapper,Claude3V

__all__ = [
'OpenAIWrapper', 'HFChatModel', 'OpenAIWrapperInternal', 'GeminiWrapper',
'GPT4V', 'GPT4V_Internal', 'GeminiProVision','QwenVLWrapper', 'QwenVLAPI',
'QwenAPI'
'QwenAPI', 'Step1V','Claude3V','Claude_Wrapper'
]
102 changes: 102 additions & 0 deletions vlmeval/api/claude.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from vlmeval.smp import *
from vlmeval.api.base import BaseAPI
from time import sleep
import base64

url = "https://openxlab.org.cn/gw/alles-apin-hub/v1/claude/v1/text/chat"
headers = {
'alles-apin-token': '',
'Content-Type': 'application/json'
}

class Claude_Wrapper(BaseAPI):

is_api: bool = True

def __init__(self,
model: str = 'claude-3-opus-20240229',
key: str = None,
retry: int = 10,
wait: int = 3,
system_prompt: str = None,
verbose: bool = True,
temperature: float = 0,
max_tokens: int = 1024,
**kwargs):

self.model = model
self.headers = headers
self.temperature = temperature
self.max_tokens = max_tokens
if key is not None:
self.key = key
else:
self.key = os.environ.get('ALLES', '')
self.headers['alles-apin-token'] = self.key

super().__init__(retry=retry, wait=wait, verbose=verbose, system_prompt=system_prompt, **kwargs)

@staticmethod
def build_msgs(msgs_raw):
messages = []
message = {"role": "user", "content": []}
for msg in msgs_raw:
if isimg(msg):
media_type_map = {
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'png': 'image/png',
'gif': 'image/gif',
'webp': 'iamge/webp'
}
media_type = media_type_map[msg.split('.')[-1].lower()]
with open(msg, "rb") as file:
image_data = base64.b64encode(file.read()).decode("utf-8")
item = {
'type': 'image',
'source': {'type': 'base64', 'media_type': media_type, 'data': image_data}
}

else:
item = {'type': 'text', 'text': msg}
message['content'].append(item)
messages.append(message)
return messages

def generate_inner(self, inputs, **kwargs) -> str:

payload = json.dumps({
"model": self.model,
"max_tokens": self.max_tokens,
"messages": self.build_msgs(msgs_raw=inputs),
**kwargs
})
response = requests.request("POST", url, headers=headers, data=payload)

ret_code = response.status_code
retry = self.retry
while ret_code == 429 and retry > 0:
sleep(15)
response = requests.request("POST", url, headers=headers, data=payload)
ret_code = response.status_code
retry -= 1

ret_code = 0 if (200 <= int(ret_code) < 300) else ret_code
answer = self.fail_msg

try:
resp_struct = json.loads(response.text)
answer = resp_struct['data']['content'][0]['text'].strip()
except:
pass

return ret_code, answer, response


class Claude3V(Claude_Wrapper):

def generate(self, image_path, prompt, dataset=None):
return super(Claude_Wrapper, self).generate([image_path, prompt])

def interleave_generate(self, ti_list, dataset=None):
return super(Claude_Wrapper, self).generate(ti_list)
98 changes: 98 additions & 0 deletions vlmeval/api/stepai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from vlmeval.smp import *
from vlmeval.api.base import BaseAPI

url = "https://b-openapi.basemind.com/openapi/v1/chat/completions"
headers = {
'X-Request-Orgcode': 'companyA',
'Authorization': 'Bearer {}',
'Content-Type': 'application/json'
}

def convert_image_to_base64(image_path):
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
return encoded_string

class StepAPI(BaseAPI):

is_api: bool = True

def __init__(self,
model: str = 'stepapi-rankboard',
retry: int = 10,
wait: int = 3,
key: str = None,
temperature: float = 0,
max_tokens: int = 300,
verbose: bool = True,
system_prompt: str = None,
**kwargs):
self.model = model
self.fail_msg = 'Fail to obtain answer via API.'
self.headers = headers
self.temperature = temperature
self.max_tokens = max_tokens
self.system_prompt = system_prompt
if key is not None:
self.key = key
else:
self.key = os.environ.get('STEPAI_API_KEY', '')
headers['Authorization'] = headers['Authorization'].format(self.key)

super().__init__(retry=retry, wait=wait, verbose=verbose, system_prompt=system_prompt, **kwargs)

@staticmethod
def build_msgs(msgs_raw):
messages = []
message = {"role": "user", "content": []}

for msg in msgs_raw:
if isimg(msg):
image_b64 = convert_image_to_base64(msg)
message['content'].append({
"image_b64": {'b64_json': image_b64},
"type": "image_b64"
})
else:
message['content'].append({
'text': msg,
"type": 'text'
})

messages.append(message)
return messages

def generate_inner(self, inputs, **kwargs) -> str:
print(inputs, '\n')
payload = dict(
model=self.model,
max_tokens=self.max_tokens,
temperature=self.temperature,
messages= self.build_msgs(msgs_raw=inputs), #需要构建message
**kwargs)
response = requests.post(url, headers=headers, data=json.dumps(payload))
# print('response is here!!:',response.text,'\n')
ret_code = response.status_code
# print('ret_code is:',ret_code)
ret_code = 0 if (200 <= int(ret_code) < 300) else ret_code

answer = self.fail_msg
# print('initial answer is',answer)
try:
resp_struct = json.loads(response.text)
# print('resp_struct is',resp_struct)
answer = resp_struct['choices'][0]['message']['content'].strip()
# print('answer!!!!!!=========',answer,'\n')
except:
pass
# print('finial answer is',answer)
return ret_code, answer, response


class Step1V(StepAPI):

def generate(self, image_path, prompt, dataset=None):
return super(StepAPI, self).generate([image_path, prompt])

def interleave_generate(self, ti_list, dataset=None):
return super(StepAPI, self).generate(ti_list)
10 changes: 9 additions & 1 deletion vlmeval/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .vlm import *
from .api import GPT4V, GeminiProVision, GPT4V_Internal, QwenVLAPI
from .api import *
from functools import partial

PandaGPT_ROOT = None
Expand Down Expand Up @@ -53,16 +53,24 @@

api_models = {
'GPT4V': partial(GPT4V, model='gpt-4-vision-preview', temperature=0, img_size=512, img_detail='low', retry=10),
# Internal Only
'GPT4V_INT': partial(GPT4V_Internal, model='gpt-4-vision-preview', temperature=0, img_size=512, img_detail='low', retry=10),
'GPT4V_SHORT': partial(
GPT4V, model='gpt-4-vision-preview', temperature=0, img_size=512, img_detail='low', retry=10,
system_prompt="Please responde to the following question / request in a short reply. "),
# Internal Only
'GPT4V_SHORT_INT': partial(
GPT4V_Internal, model='gpt-4-vision-preview', temperature=0, img_size=512, img_detail='low', retry=10,
system_prompt="Please responde to the following question / request in a short reply. "),
'GeminiProVision': partial(GeminiProVision, temperature=0, retry=10),
'QwenVLPlus': partial(QwenVLAPI, model='qwen-vl-plus', temperature=0, retry=10),
'QwenVLMax': partial(QwenVLAPI, model='qwen-vl-max', temperature=0, retry=10),
# Internal Only
'Step1V': partial(Step1V, temperature=0, retry=10),
# Internal Only
'Claude3V_Opus': partial(Claude3V, model='claude-3-opus-20240229', temperature=0, retry=10),
'Claude3V_Sonnet': partial(Claude3V, model='claude-3-sonnet-20240229', temperature=0, retry=10),
'Claude3V_Haiku': partial(Claude3V, model='claude-3-haiku-20240307', temperature=0, retry=10),
}

xtuner_models = {
Expand Down

0 comments on commit 7c26768

Please sign in to comment.