Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion evalscope/api/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def get_model(

logger.info(
f'Creating model {model} with eval_type={eval_type} '
f'base_url={base_url}, api_key={api_key}, config={config}, model_args={model_args}'
f'base_url={base_url}, config={config}, model_args={model_args}'
)

# find a matching model type
Expand Down
4 changes: 2 additions & 2 deletions evalscope/backend/rag_eval/utils/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ def __init__(
model_name = download_model(self.model_name, self.revision)

# Load the model and processor
self.model = AutoModel.from_pretrained(model_name).to(self.device)
self.processor = AutoProcessor.from_pretrained(model_name)
self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True).to(self.device)
self.processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
Comment on lines +84 to +85
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Setting trust_remote_code=True introduces a security risk, as it allows the execution of arbitrary code from the model hub. This could be exploited if a malicious model is used. It's recommended to make this behavior configurable and disabled by default, or at least add a clear warning to users about the potential risks.

self.transform = self.processor.image_processor
self.tokenizer = self.processor.tokenizer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def _init_length(self):
def _init_tokenizer(self):
""" Initialize the tokenizer based on the provided tokenizer path."""
from modelscope import AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path)
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path, trust_remote_code=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Using trust_remote_code=True can be a security vulnerability. It permits the execution of code from the model repository, which could be malicious. Consider making this feature optional and off by default, or adding a prominent warning about the security implications when loading models from untrusted sources.


def load(self):
"""Load dataset from local disk or remote."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ def load_model(self):
# model_pretrained_name_or_path = "yuvalkirstain/PickScore_v1"
model_pretrained_name_or_path = 'AI-ModelScope/PickScore_v1' # modelscope model

self.processor = AutoProcessor.from_pretrained(processor_name_or_path)
self.model = AutoModel.from_pretrained(model_pretrained_name_or_path).eval().to(self.device)
self.processor = AutoProcessor.from_pretrained(processor_name_or_path, trust_remote_code=True)
self.model = AutoModel.from_pretrained(model_pretrained_name_or_path,
trust_remote_code=True).eval().to(self.device)
Comment on lines +27 to +29
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Enabling trust_remote_code=True for both the processor and the model poses a security risk by allowing arbitrary code execution from the model hub. If the models are not from a fully trusted source, this could be dangerous. It would be safer to make this configurable and disabled by default.


def load_images(self, image: List[str]) -> torch.Tensor:
"""Load the image(s), and return a tensor (no preprocessing!!) put on self.device
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ def load_model(self):
from ..utils import download_file

# load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained('AI-ModelScope/bert-base-uncased', truncation_side='right')
self.tokenizer = AutoTokenizer.from_pretrained(
'AI-ModelScope/bert-base-uncased', truncation_side='right', trust_remote_code=True
)
Comment on lines +40 to +42
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Setting trust_remote_code=True allows remote code execution and should be used with caution. This can be a security risk if the model source is not completely trusted. I'd suggest making this behavior configurable, so users can disable it when working with untrusted models.

self.tokenizer.add_special_tokens({'bos_token': '[DEC]'})
# load model
self.variant = FGA_BLIP2_MODELS[self.model_name]['variant']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


def init_tokenizer():
tokenizer = AutoTokenizer.from_pretrained('AI-ModelScope/bert-base-uncased')
tokenizer = AutoTokenizer.from_pretrained('AI-ModelScope/bert-base-uncased', trust_remote_code=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The use of trust_remote_code=True introduces a security risk by allowing code from the model hub to be executed. This should be avoided if possible, or at least made configurable and opt-in, with clear warnings to the user.

tokenizer.add_special_tokens({'bos_token': '[DEC]'})
tokenizer.add_special_tokens({'additional_special_tokens': ['[ENC]']})
tokenizer.enc_token_id = tokenizer.additional_special_tokens_ids[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class BlipBase(BaseModel):

@classmethod
def init_tokenizer(cls):
tokenizer = AutoTokenizer.from_pretrained('AI-ModelScope/bert-base-uncased')
tokenizer = AutoTokenizer.from_pretrained('AI-ModelScope/bert-base-uncased', trust_remote_code=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Adding trust_remote_code=True can lead to security vulnerabilities by executing code from the model repository. It's recommended to only use this with fully trusted models. Consider making this setting configurable to enhance security.

tokenizer.add_special_tokens({'bos_token': '[DEC]'})
tokenizer.add_special_tokens({'additional_special_tokens': ['[ENC]']})
tokenizer.enc_token_id = tokenizer.additional_special_tokens_ids[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def load_pretrained_model(

from ..utils import download_file

tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, **tokenizer_dict)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True, **tokenizer_dict)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The trust_remote_code=True parameter can be a security risk, as it allows arbitrary code execution from the model hub. This should be used with caution. It would be better to make this configurable and off by default.

# tokenizer.pad_token = tokenizer.unk_token # could be redundant

model_path = download_file(model_path, cache_dir=cache_dir)
Expand Down
2 changes: 1 addition & 1 deletion evalscope/perf/plugin/api/custom_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self, param: Arguments):
super().__init__(param=param)
if param.tokenizer_path is not None:
from modelscope import AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained(param.tokenizer_path)
self.tokenizer = AutoTokenizer.from_pretrained(param.tokenizer_path, trust_remote_code=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Using trust_remote_code=True poses a security risk by allowing remote code execution. This is especially concerning in a performance testing plugin where various models might be tested. It's advisable to make this behavior configurable and require users to explicitly enable it.

else:
self.tokenizer = None

Expand Down
2 changes: 1 addition & 1 deletion evalscope/perf/plugin/api/openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, param: Arguments):
super().__init__(param=param)
if param.tokenizer_path is not None:
from modelscope import AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained(param.tokenizer_path)
self.tokenizer = AutoTokenizer.from_pretrained(param.tokenizer_path, trust_remote_code=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Setting trust_remote_code=True can be a security vulnerability. It allows executing code from the model repository, which could be malicious. This should ideally be a configurable option that is disabled by default to ensure security.

else:
self.tokenizer = None

Expand Down
2 changes: 1 addition & 1 deletion evalscope/third_party/thinkbench/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, report_path, tokenizer_path, model_name, dataset_name, subset
self.switch_tokens = ['alternatively', 'but wait', 'let me reconsider', 'another way', 'another approach', 'another method', 'another angle']
self.subset_dict = defaultdict(lambda: defaultdict(list))
self.think_end_token = '</think>'
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The addition of trust_remote_code=True introduces a security risk. It allows arbitrary code execution from the model hub, which can be dangerous if the model source is not trusted. Please consider making this configurable or adding a warning.

self.model_name = model_name
self.dataset_name = dataset_name
self.subsets = subsets
Expand Down