Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions comps/cores/mega/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,20 @@
return prompt, images
else:
return prompt


def sanitize_env(value: Optional[str]) -> Optional[str]:
"""Remove quotes from a configuration value if present.

Args:
value (str): The configuration value to sanitize.
Returns:
str: The sanitized configuration value.
"""
if value is None:
return None
if value.startswith('"') and value.endswith('"'):
value = value[1:-1]
elif value.startswith("'") and value.endswith("'"):
value = value[1:-1]
return value

Check warning on line 200 in comps/cores/mega/utils.py

View check run for this annotation

Codecov / codecov/patch

comps/cores/mega/utils.py#L194-L200

Added lines #L194 - L200 were not covered by tests
261 changes: 255 additions & 6 deletions comps/cores/proto/docarray.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
from docarray import BaseDoc, DocList
Expand Down Expand Up @@ -163,16 +163,257 @@ class LVMSearchedMultimodalDoc(SearchedMultimodalDoc):
)


class GeneratedDoc(BaseDoc):
text: str
prompt: str


class RerankedDoc(BaseDoc):
reranked_docs: DocList[TextDoc]
initial_query: str


class AnonymizeModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
hidden_names: Optional[List[str]] = None
allowed_names: Optional[List[str]] = None
entity_types: Optional[List[str]] = None
preamble: Optional[str] = None
regex_patterns: Optional[List[str]] = None
use_faker: Optional[bool] = None
recognizer_conf: Optional[str] = None
threshold: Optional[float] = None
language: Optional[str] = None


class BanCodeModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
threshold: Optional[float] = None


class BanCompetitorsModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
competitors: List[str] = ["Competitor1", "Competitor2", "Competitor3"]
model: Optional[str] = None
threshold: Optional[float] = None
redact: Optional[bool] = None


class BanSubstringsModel(BaseDoc):
enabled: bool = False
substrings: List[str] = ["backdoor", "malware", "virus"]
match_type: Optional[str] = None
case_sensitive: bool = False
redact: Optional[bool] = None
contains_all: Optional[bool] = None


class BanTopicsModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
topics: List[str] = ["violence", "attack", "war"]
threshold: Optional[float] = None
model: Optional[str] = None


class CodeModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
languages: List[str] = ["Java", "Python"]
model: Optional[str] = None
is_blocked: Optional[bool] = None
threshold: Optional[float] = None


class GibberishModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
threshold: Optional[float] = None
match_type: Optional[str] = None


class InvisibleText(BaseDoc):
enabled: bool = False


class LanguageModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
valid_languages: List[str] = ["en", "es"]
model: Optional[str] = None
threshold: Optional[float] = None
match_type: Optional[str] = None


class PromptInjectionModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
threshold: Optional[float] = None
match_type: Optional[str] = None


class RegexModel(BaseDoc):
enabled: bool = False
patterns: List[str] = ["Bearer [A-Za-z0-9-._~+/]+"]
is_blocked: Optional[bool] = None
match_type: Optional[str] = None
redact: Optional[bool] = None


class SecretsModel(BaseDoc):
enabled: bool = False
redact_mode: Optional[str] = None


class SentimentModel(BaseDoc):
enabled: bool = False
threshold: Optional[float] = None
lexicon: Optional[str] = None


class TokenLimitModel(BaseDoc):
enabled: bool = False
limit: Optional[int] = None
encoding_name: Optional[str] = None
model_name: Optional[str] = None


class ToxicityModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
threshold: Optional[float] = None
match_type: Optional[str] = None


class BiasModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
threshold: Optional[float] = None
match_type: Optional[str] = None


class DeanonymizeModel(BaseDoc):
enabled: bool = False
matching_strategy: Optional[str] = None


class JSONModel(BaseDoc):
enabled: bool = False
required_elements: Optional[int] = None
repair: Optional[bool] = None


class LanguageSameModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
threshold: Optional[float] = None


class MaliciousURLsModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
threshold: Optional[float] = None


class NoRefusalModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
threshold: Optional[float] = None
match_type: Optional[str] = None


class NoRefusalLightModel(BaseDoc):
enabled: bool = False


class ReadingTimeModel(BaseDoc):
enabled: bool = False
max_time: float = 0.5
truncate: Optional[bool] = None


class FactualConsistencyModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
minimum_score: Optional[float] = None


class RelevanceModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
model: Optional[str] = None
threshold: Optional[float] = None


class SensitiveModel(BaseDoc):
enabled: bool = False
use_onnx: bool = False
entity_types: Optional[List[str]] = None
regex_patterns: Optional[List[str]] = None
redact: Optional[bool] = None
recognizer_conf: Optional[str] = None
threshold: Optional[float] = None


class URLReachabilityModel(BaseDoc):
enabled: bool = False
success_status_codes: Optional[List[int]] = None
timeout: Optional[int] = None


class LLMGuardInputGuardrailParams(BaseDoc):
anonymize: AnonymizeModel = AnonymizeModel()
ban_code: BanCodeModel = BanCodeModel()
ban_competitors: BanCompetitorsModel = BanCompetitorsModel()
ban_substrings: BanSubstringsModel = BanSubstringsModel()
ban_topics: BanTopicsModel = BanTopicsModel()
code: CodeModel = CodeModel()
gibberish: GibberishModel = GibberishModel()
invisible_text: InvisibleText = InvisibleText()
language: LanguageModel = LanguageModel()
prompt_injection: PromptInjectionModel = PromptInjectionModel()
regex: RegexModel = RegexModel()
secrets: SecretsModel = SecretsModel()
sentiment: SentimentModel = SentimentModel()
token_limit: TokenLimitModel = TokenLimitModel()
toxicity: ToxicityModel = ToxicityModel()


class LLMGuardOutputGuardrailParams(BaseDoc):
ban_code: BanCodeModel = BanCodeModel()
ban_competitors: BanCompetitorsModel = BanCompetitorsModel()
ban_substrings: BanSubstringsModel = BanSubstringsModel()
ban_topics: BanTopicsModel = BanTopicsModel()
bias: BiasModel = BiasModel()
code: CodeModel = CodeModel()
deanonymize: DeanonymizeModel = DeanonymizeModel()
json_scanner: JSONModel = JSONModel()
language: LanguageModel = LanguageModel()
language_same: LanguageSameModel = LanguageSameModel()
malicious_urls: MaliciousURLsModel = MaliciousURLsModel()
no_refusal: NoRefusalModel = NoRefusalModel()
no_refusal_light: NoRefusalLightModel = NoRefusalLightModel()
reading_time: ReadingTimeModel = ReadingTimeModel()
factual_consistency: FactualConsistencyModel = FactualConsistencyModel()
gibberish: GibberishModel = GibberishModel()
regex: RegexModel = RegexModel()
relevance: RelevanceModel = RelevanceModel()
sensitive: SensitiveModel = SensitiveModel()
sentiment: SentimentModel = SentimentModel()
toxicity: ToxicityModel = ToxicityModel()
url_reachability: URLReachabilityModel = URLReachabilityModel()
anonymize_vault: Optional[List[Tuple]] = (
None # the only parameter not available in fingerprint. Used to transmit vault
)


class LLMParamsDoc(BaseDoc):
model: Optional[str] = None # for openai and ollama
query: str
Expand All @@ -187,6 +428,8 @@ class LLMParamsDoc(BaseDoc):
repetition_penalty: NonNegativeFloat = 1.03
stream: bool = True
language: str = "auto" # can be "en", "zh"
input_guardrail_params: Optional[LLMGuardInputGuardrailParams] = None
output_guardrail_params: Optional[LLMGuardOutputGuardrailParams] = None

chat_template: Optional[str] = Field(
default=None,
Expand All @@ -213,6 +456,12 @@ def chat_template_must_contain_variables(cls, v):
return v


class GeneratedDoc(BaseDoc):
text: str
prompt: str
output_guardrail_params: Optional[LLMGuardOutputGuardrailParams] = None


class LLMParams(BaseDoc):
model: Optional[str] = None
max_tokens: int = 1024
Expand Down
Loading
Loading