From 82e54a4dd8f5242e9c2d5d99ac05cf7caf4379c1 Mon Sep 17 00:00:00 2001 From: yym68686 Date: Tue, 14 May 2024 16:49:39 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Bug:=201.=20Fix=20the=20bug=20th?= =?UTF-8?q?at=20gpt-4o=20cannot=20answer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2. Fix the bug where gpt-4o cannot read images. 3. Fix the bug of infinite function calls 4. Optimize max token calculation 💻 Code: 1. Upgrade ModelMerge version to 0.3.7 2. Refactor the code, streamline the bot.py code. 3. Delete useless test files. --- bot.py | 82 +++------ config.py | 152 ++++++++++------ requirements.txt | 2 +- test/test.py | 13 -- test/test_API.py | 6 - test/test_Faucet.py | 13 -- test/test_Web_crawler.py | 191 -------------------- test/test_claude.py | 190 -------------------- test/test_claude3.py | 205 ---------------------- test/test_claude_zh_char.py | 26 --- test/test_ddg.py | 109 ------------ test/test_dict.py | 22 --- test/test_download_pdf.py | 56 ------ test/test_fstring.py | 13 -- test/test_gpt4free.py | 25 --- test/test_gpt4free_langchain_agent.py | 57 ------ test/test_groq.py | 228 ------------------------ test/test_jieba.py | 32 ---- test/test_json.py | 22 --- test/test_keyword.py | 79 --------- test/test_langchain_search_old.py | 235 ------------------------- test/test_logging.py | 32 ---- test/test_ollama.py | 57 ------ test/test_pdf.py | 21 --- test/test_re_agent.py | 9 - test/test_search.py | 8 - test/test_string_len.py | 9 - test/test_summary.py | 43 ----- test/test_tikitoken.py | 19 -- test/test_token.py | 94 ---------- test/test_tools_class.py | 243 -------------------------- test/test_url.py | 33 ---- test/test_whisper.py | 14 -- test/test_yield.py | 24 --- vercel.json | 15 -- 35 files changed, 122 insertions(+), 2257 deletions(-) delete mode 100644 test/test.py delete mode 100644 test/test_API.py delete mode 100644 test/test_Faucet.py delete mode 100644 test/test_Web_crawler.py delete mode 100644 test/test_claude.py delete mode 100644 test/test_claude3.py delete mode 100644 test/test_claude_zh_char.py delete mode 100644 test/test_ddg.py delete mode 100644 test/test_dict.py delete mode 100644 test/test_download_pdf.py delete mode 100644 test/test_fstring.py delete mode 100644 test/test_gpt4free.py delete mode 100644 test/test_gpt4free_langchain_agent.py delete mode 100644 test/test_groq.py delete mode 100644 test/test_jieba.py delete mode 100644 test/test_json.py delete mode 100644 test/test_keyword.py delete mode 100644 test/test_langchain_search_old.py delete mode 100644 test/test_logging.py delete mode 100644 test/test_ollama.py delete mode 100644 test/test_pdf.py delete mode 100644 test/test_re_agent.py delete mode 100644 test/test_search.py delete mode 100644 test/test_string_len.py delete mode 100644 test/test_summary.py delete mode 100644 test/test_tikitoken.py delete mode 100644 test/test_token.py delete mode 100644 test/test_tools_class.py delete mode 100644 test/test_url.py delete mode 100644 test/test_whisper.py delete mode 100644 test/test_yield.py delete mode 100644 vercel.json diff --git a/bot.py b/bot.py index ae690c31..05d9e577 100644 --- a/bot.py +++ b/bot.py @@ -6,13 +6,23 @@ import utils.decorators as decorators from md2tgmd import escape -from ModelMerge.models import chatgpt, claude, groq, claude3, gemini from ModelMerge.models.config import PLUGINS from ModelMerge.utils.prompt import translator_en2zh_prompt, translator_prompt, claude3_doc_assistant_prompt from ModelMerge.utils.scripts import Document_extract, get_encode_image, claude_replace import config -from config import WEB_HOOK, PORT, BOT_TOKEN, update_first_buttons_message, update_model_buttons, get_current_lang +from config import ( + WEB_HOOK, + PORT, + BOT_TOKEN, + update_first_buttons_message, + update_model_buttons, + get_current_lang, + update_info_message, + update_ENGINE, + update_language +) + from utils.i18n import strings from telegram.constants import ChatAction @@ -114,8 +124,6 @@ async def command_bot(update, context, language=None, prompt=translator_prompt, elif reply_to_message_text and not update_message.reply_to_message.from_user.is_bot: message = reply_to_message_text + "\n" + message - if "claude-2.1" in config.GPT_ENGINE and config.ClaudeAPI: - robot = config.claudeBot if "claude-3" in config.GPT_ENGINE and config.ClaudeAPI: robot = config.claude3Bot if ("mixtral" in config.GPT_ENGINE or "llama" in config.GPT_ENGINE) and config.GROQ_API_KEY: @@ -124,7 +132,7 @@ async def command_bot(update, context, language=None, prompt=translator_prompt, robot = config.gemini_Bot if "gpt" in config.GPT_ENGINE or (config.ClaudeAPI and "claude-3" in config.GPT_ENGINE): message = [{"type": "text", "text": message}] - if image_url and config.GPT_ENGINE == "gpt-4-turbo-2024-04-09": + if image_url and (config.GPT_ENGINE == "gpt-4-turbo-2024-04-09" or "gpt-4o" in config.GPT_ENGINE): base64_image = get_encode_image(image_url) message.append( { @@ -303,47 +311,21 @@ async def delete_message(update, context, messageid, delay=10): print("error", e) print('\033[0m') - -def replace_with_asterisk(string, start=10, end=45): - return string[:start] + '*' * (end - start) + string[end:] - -def update_info_message(update): - return ( - f"`Hi, {update.effective_user.username}!`\n\n" - f"**Default engine:** `{config.GPT_ENGINE}`\n" - f"**Temperature:** `{config.temperature}`\n" - f"**API_URL:** `{config.API_URL}`\n\n" - f"**API:** `{replace_with_asterisk(config.API)}`\n\n" - f"**WEB_HOOK:** `{config.WEB_HOOK}`\n\n" - ) - - @decorators.AdminAuthorization @decorators.GroupAuthorization @decorators.Authorization async def button_press(update, context): """Function to handle the button press""" - info_message = update_info_message(update) + info_message = update_info_message() callback_query = update.callback_query await callback_query.answer() data = callback_query.data banner = strings['message_banner'][get_current_lang()] - if "gpt-" in data or "claude" in data or "mixtral" in data or "llama" in data or "gemini" in data or (config.CUSTOM_MODELS and data in config.CUSTOM_MODELS): - config.GPT_ENGINE = data - # print("config.GPT_ENGINE", config.GPT_ENGINE) - if (config.API and "gpt-" in data) or (config.API and not config.ClaudeAPI) or (config.API and config.CUSTOM_MODELS and data in config.CUSTOM_MODELS): - config.ChatGPTbot = chatgpt(api_key=f"{config.API}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature) - config.ChatGPTbot.reset(convo_id=str(update.effective_chat.id), system_prompt=config.systemprompt) - if config.ClaudeAPI and "claude-2.1" in data: - config.claudeBot = claude(api_key=f"{config.ClaudeAPI}", engine=config.GPT_ENGINE, system_prompt=config.claude_systemprompt, temperature=config.temperature) - if config.ClaudeAPI and "claude-3" in data: - config.claude3Bot = claude3(api_key=f"{config.ClaudeAPI}", engine=config.GPT_ENGINE, system_prompt=config.claude_systemprompt, temperature=config.temperature) - if config.GROQ_API_KEY and ("mixtral" in data or "llama" in data): - config.groqBot = groq(api_key=f"{config.GROQ_API_KEY}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature) - if config.GOOGLE_AI_API_KEY and "gemini" in data: - config.gemini_Bot = gemini(api_key=f"{config.GOOGLE_AI_API_KEY}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature) + if data.endswith("ENGINE"): + data = data[:-6] + update_ENGINE(data) try: - info_message = update_info_message(update) + info_message = update_info_message() if info_message + banner != callback_query.message.text: message = await callback_query.edit_message_text( text=escape(info_message + banner), @@ -366,27 +348,9 @@ async def button_press(update, context): parse_mode='MarkdownV2' ) elif "language" in data: - if config.LANGUAGE == "Simplified Chinese": - config.LANGUAGE = "English" - config.systemprompt = config.systemprompt.replace("Simplified Chinese", "English") - config.claude_systemprompt = config.claude_systemprompt.replace("Simplified Chinese", "English") - else: - config.LANGUAGE = "Simplified Chinese" - config.systemprompt = config.systemprompt.replace("English", "Simplified Chinese") - config.claude_systemprompt = config.claude_systemprompt.replace("English", "Simplified Chinese") - # config.systemprompt = f"You are ChatGPT, a large language model trained by OpenAI. Respond conversationally in {config.LANGUAGE}. Knowledge cutoff: 2021-09. Current date: [ {config.Current_Date} ]" - if config.API: - config.ChatGPTbot = chatgpt(api_key=f"{config.API}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature) - config.ChatGPTbot.reset(convo_id=str(update.effective_chat.id), system_prompt=config.systemprompt) - if config.ClaudeAPI: - config.claudeBot = claude(api_key=f"{config.ClaudeAPI}", engine=config.GPT_ENGINE, system_prompt=config.claude_systemprompt, temperature=config.temperature) - config.claude3Bot = claude3(api_key=f"{config.ClaudeAPI}", engine=config.GPT_ENGINE, system_prompt=config.claude_systemprompt, temperature=config.temperature) - if config.GROQ_API_KEY: - config.groqBot = groq(api_key=f"{config.GROQ_API_KEY}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature) - if config.GOOGLE_AI_API_KEY: - config.gemini_Bot = gemini(api_key=f"{config.GOOGLE_AI_API_KEY}", engine=config.GPT_ENGINE, system_prompt=config.systemprompt, temperature=config.temperature) - - info_message = update_info_message(update) + update_language() + update_ENGINE() + info_message = update_info_message() message = await callback_query.edit_message_text( text=escape(info_message), reply_markup=InlineKeyboardMarkup(update_first_buttons_message()), @@ -397,7 +361,7 @@ async def button_press(update, context): PLUGINS[data] = not PLUGINS[data] except: setattr(config, data, not getattr(config, data)) - info_message = update_info_message(update) + info_message = update_info_message() message = await callback_query.edit_message_text( text=escape(info_message), reply_markup=InlineKeyboardMarkup(update_first_buttons_message()), @@ -408,7 +372,7 @@ async def button_press(update, context): @decorators.GroupAuthorization @decorators.Authorization async def info(update, context): - info_message = update_info_message(update) + info_message = update_info_message() message = await context.bot.send_message(chat_id=update.message.chat_id, text=escape(info_message), reply_markup=InlineKeyboardMarkup(update_first_buttons_message()), parse_mode='MarkdownV2', disable_web_page_preview=True) @decorators.GroupAuthorization diff --git a/config.py b/config.py index cb52e263..7922f305 100644 --- a/config.py +++ b/config.py @@ -11,51 +11,76 @@ NICK = os.environ.get('NICK', None) PORT = int(os.environ.get('PORT', '8080')) -WEB_HOOK = os.environ.get('WEB_HOOK', None) BOT_TOKEN = os.environ.get('BOT_TOKEN', None) -API = os.environ.get('API', None) -API_URL = os.environ.get('API_URL', 'https://api.openai.com/v1/chat/completions') +def replace_with_asterisk(string, start=10, end=45): + return string[:start] + '*' * (end - start) + string[end:] + GPT_ENGINE = os.environ.get('GPT_ENGINE', 'gpt-4o') -PASS_HISTORY = (os.environ.get('PASS_HISTORY', "True") == "False") == False +API_URL = os.environ.get('API_URL', 'https://api.openai.com/v1/chat/completions') +API = os.environ.get('API', None) +WEB_HOOK = os.environ.get('WEB_HOOK', None) -USE_GOOGLE = (os.environ.get('USE_GOOGLE', "True") == "False") == False -if os.environ.get('GOOGLE_API_KEY', None) == None and os.environ.get('GOOGLE_CSE_ID', None) == None: - USE_GOOGLE = False +def update_info_message(): + return ( + f"**Default engine:** `{GPT_ENGINE}`\n" + f"**API_URL:** `{API_URL}`\n\n" + f"**API:** `{replace_with_asterisk(API)}`\n\n" + f"**WEB_HOOK:** `{WEB_HOOK}`\n\n" + ) -temperature = float(os.environ.get('temperature', '0.5')) -LANGUAGE = os.environ.get('LANGUAGE', 'English') GROQ_API_KEY = os.environ.get('GROQ_API_KEY', None) GOOGLE_AI_API_KEY = os.environ.get('GOOGLE_AI_API_KEY', None) -CUSTOM_MODELS = os.environ.get('CUSTOM_MODELS', None) -if CUSTOM_MODELS: - CUSTOM_MODELS_LIST = [id for id in CUSTOM_MODELS.split(",")] -else: - CUSTOM_MODELS_LIST = None current_date = datetime.now() Current_Date = current_date.strftime("%Y-%m-%d") + +LANGUAGE = os.environ.get('LANGUAGE', 'English') systemprompt = os.environ.get('SYSTEMPROMPT', prompt.system_prompt.format(LANGUAGE, Current_Date)) claude_systemprompt = os.environ.get('SYSTEMPROMPT', prompt.claude_system_prompt.format(LANGUAGE)) -if API: - ChatGPTbot = chatgpt(api_key=f"{API}", engine=GPT_ENGINE, system_prompt=systemprompt, temperature=temperature) - - translate_bot = chatgpt(api_key=f"{API}", engine=GPT_ENGINE, system_prompt=systemprompt, temperature=temperature) - copilot_bot = chatgpt(api_key=f"{API}", engine=GPT_ENGINE, system_prompt=prompt.search_system_prompt.format(LANGUAGE), temperature=temperature) - dallbot = dalle3(api_key=f"{API}") -else: - ChatGPTbot = None +def get_current_lang(): + if LANGUAGE == "Simplified Chinese": + lang = "zh" + else: + lang = "en" + return lang -ClaudeAPI = os.environ.get('claude_api_key', None) -if ClaudeAPI: - claudeBot = claude(api_key=f"{ClaudeAPI}", system_prompt=claude_systemprompt) - claude3Bot = claude3(api_key=f"{ClaudeAPI}", system_prompt=claude_systemprompt) +def update_language(): + global LANGUAGE, systemprompt, claude_systemprompt + try: + if LANGUAGE == "Simplified Chinese": + LANGUAGE = "English" + systemprompt = systemprompt.replace("Simplified Chinese", "English") + claude_systemprompt = claude_systemprompt.replace("Simplified Chinese", "English") + else: + LANGUAGE = "Simplified Chinese" + systemprompt = systemprompt.replace("English", "Simplified Chinese") + claude_systemprompt = claude_systemprompt.replace("English", "Simplified Chinese") + except: + pass -if GROQ_API_KEY: - groqBot = groq(api_key=f"{GROQ_API_KEY}") -if GOOGLE_AI_API_KEY: - gemini_Bot = gemini(api_key=f"{GOOGLE_AI_API_KEY}") +temperature = float(os.environ.get('temperature', '0.5')) +CLAUDE_API = os.environ.get('claude_api_key', None) + +def update_ENGINE(data = None): + global GPT_ENGINE, ChatGPTbot, translate_bot, dallbot, claudeBot, claude3Bot, groqBot, gemini_Bot + if data: + GPT_ENGINE = data + if API: + ChatGPTbot = chatgpt(api_key=f"{API}", engine=GPT_ENGINE, system_prompt=systemprompt, temperature=temperature) + translate_bot = chatgpt(api_key=f"{API}", engine=GPT_ENGINE, system_prompt=systemprompt, temperature=temperature) + dallbot = dalle3(api_key=f"{API}") + if CLAUDE_API and "claude-2.1" in GPT_ENGINE: + claudeBot = claude(api_key=f"{CLAUDE_API}", engine=GPT_ENGINE, system_prompt=claude_systemprompt, temperature=temperature) + if CLAUDE_API and "claude-3" in GPT_ENGINE: + claude3Bot = claude3(api_key=f"{CLAUDE_API}", engine=GPT_ENGINE, system_prompt=claude_systemprompt, temperature=temperature) + if GROQ_API_KEY and ("mixtral" in GPT_ENGINE or "llama" in GPT_ENGINE): + groqBot = groq(api_key=f"{GROQ_API_KEY}", engine=GPT_ENGINE, system_prompt=systemprompt, temperature=temperature) + if GOOGLE_AI_API_KEY and "gemini" in GPT_ENGINE: + gemini_Bot = gemini(api_key=f"{GOOGLE_AI_API_KEY}", engine=GPT_ENGINE, system_prompt=systemprompt, temperature=temperature) + +update_ENGINE() whitelist = os.environ.get('whitelist', None) if whitelist: @@ -67,18 +92,35 @@ if GROUP_LIST: GROUP_LIST = [int(id) for id in GROUP_LIST.split(",")] -class userConfig: - def __init__(self, user_id: int): +class UserConfig: + def __init__(self, user_id: str = "default", language="English", engine="gpt-4o"): self.user_id = user_id - self.language = LANGUAGE - self.temperature = temperature - self.engine = GPT_ENGINE - self.system_prompt = systemprompt - self.search_system_prompt = prompt.search_system_prompt.format(self.language) - self.search_model = "gpt-3.5-turbo-1106" - -def get_plugins_status(item): - return "✅" if PLUGINS[item] else "☑️" + self.language = language + self.engine = engine + self.users = { + "default": { + "language": self.language, + "engine": self.engine, + } + } + def user_init(self, user_id): + if user_id not in self.users: + self.users[user_id] = {"language": LANGUAGE, "engine": GPT_ENGINE} + def get_language(self, user_id): + self.user_init(user_id) + return self.users[user_id]["language"] + def set_language(self, user_id, language): + self.user_init(user_id) + self.users[user_id]["language"] = language + + def get_engine(self, user_id): + self.user_init(user_id) + return self.users[user_id]["engine"] + def set_engine(self, user_id, engine): + self.user_init(user_id) + self.users[user_id]["engine"] = engine + +Users = UserConfig() def delete_model_digit_tail(lst): for i in range(len(lst) - 1, -1, -1): @@ -97,7 +139,7 @@ def create_buttons(strings): temp = [] for string in filtered_strings1: - button = InlineKeyboardButton(delete_model_digit_tail(string.split("-")), callback_data=string) + button = InlineKeyboardButton(delete_model_digit_tail(string.split("-")), callback_data=string + "ENGINE") temp.append(button) # 每两个按钮一组 @@ -110,7 +152,7 @@ def create_buttons(strings): buttons.append(temp) for string in filtered_strings2: - button = InlineKeyboardButton(delete_model_digit_tail(string.split("-")), callback_data=string) + button = InlineKeyboardButton(delete_model_digit_tail(string.split("-")), callback_data=string + "ENGINE") buttons.append([button]) return buttons @@ -134,6 +176,11 @@ def create_buttons(strings): "gemini-1.5-pro-latest", ]) +CUSTOM_MODELS = os.environ.get('CUSTOM_MODELS', None) +if CUSTOM_MODELS: + CUSTOM_MODELS_LIST = [id for id in CUSTOM_MODELS.split(",")] +else: + CUSTOM_MODELS_LIST = None if CUSTOM_MODELS_LIST: delete_models = [model[1:] for model in CUSTOM_MODELS_LIST if model[0] == "-"] for target in delete_models: @@ -156,13 +203,10 @@ def update_model_buttons(): ) return buttons -def get_current_lang(): - if LANGUAGE == "Simplified Chinese": - lang = "zh" - else: - lang = "en" - return lang +def get_plugins_status(item): + return "✅" if PLUGINS[item] else "☑️" +PASS_HISTORY = (os.environ.get('PASS_HISTORY', "True") == "False") == False def update_first_buttons_message(): history = "✅" if PASS_HISTORY else "☑️" @@ -176,15 +220,15 @@ def update_first_buttons_message(): [ InlineKeyboardButton(strings["button_change_model"][lang], callback_data="MODEL"), InlineKeyboardButton(strings['button_language'][lang], callback_data="language"), - InlineKeyboardButton(f"{strings['button_history'][lang]} {history}", callback_data="PASS_HISTORY"), + InlineKeyboardButton(f"{history} {strings['button_history'][lang]}", callback_data="PASS_HISTORY"), ], [ - InlineKeyboardButton(f"{strings['button_search'][lang]} {get_plugins_status('SEARCH')}", callback_data='SEARCH'), - InlineKeyboardButton(f"{strings['button_current_time'][lang]} {get_plugins_status('DATE')}", callback_data='DATE'), + InlineKeyboardButton(f"{get_plugins_status('SEARCH')}{strings['button_search'][lang]}", callback_data='SEARCH'), + InlineKeyboardButton(f"{get_plugins_status('DATE')}{strings['button_current_time'][lang]}", callback_data='DATE'), ], [ - InlineKeyboardButton(f"{strings['button_url'][lang]} {get_plugins_status('URL')}", callback_data='URL'), - InlineKeyboardButton(f"{strings['button_version'][lang]} {get_plugins_status('VERSION')}", callback_data='VERSION'), + InlineKeyboardButton(f"{get_plugins_status('URL')}{strings['button_url'][lang]}", callback_data='URL'), + InlineKeyboardButton(f"{get_plugins_status('VERSION')}{strings['button_version'][lang]}", callback_data='VERSION'), ], ] return first_buttons \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e7e5e22c..33738ec4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ pytz python-dotenv md2tgmd==0.1.9 fake_useragent -ModelMerge==0.3.6 +ModelMerge==0.3.7 oauth2client==3.0.0 python-telegram-bot[webhooks,rate-limiter]==21.0.1 \ No newline at end of file diff --git a/test/test.py b/test/test.py deleted file mode 100644 index 3a1cb3f7..00000000 --- a/test/test.py +++ /dev/null @@ -1,13 +0,0 @@ -def delete_model_digit_tail(lst): - for i in range(len(lst) - 1, -1, -1): - if not lst[i].isdigit(): - if i == len(lst) - 1: - return "-".join(lst) - else: - return "-".join(lst[:i + 1]) - - -# 示例 -lst = ["hello", "123", "world", "456"] -index = find_last_string_index(lst) -print("Index of the last numeric string:", index) \ No newline at end of file diff --git a/test/test_API.py b/test/test_API.py deleted file mode 100644 index f548b178..00000000 --- a/test/test_API.py +++ /dev/null @@ -1,6 +0,0 @@ -def replace_with_asterisk(string, start=15, end=40): - return string[:start] + '*' * (end - start) + string[end:] - -original_string = "sk-zIuWeeuWY8vNCVhhHCXLroNmA6QhBxnv0ARMFcODVQwwqGRg" -result = replace_with_asterisk(original_string) -print(result) diff --git a/test/test_Faucet.py b/test/test_Faucet.py deleted file mode 100644 index 4a93e22b..00000000 --- a/test/test_Faucet.py +++ /dev/null @@ -1,13 +0,0 @@ -from langchain.chat_models import ChatOpenAI -from langchain.schema import HumanMessage - -def gptsearch(result, llm): - response = llm([HumanMessage(content=result)]) - response = response.content - return response - -chainllm = ChatOpenAI(temperature=0.5, openai_api_base="https://openkey.cloud/v1", model_name="gpt-3.5-turbo", openai_api_key="sk-ucUnnmqI9DdtsAXG8OKxOFxD5dnSrU3E3ZQh4PJa1dgQ7KzE") -# chainllm = ChatOpenAI(temperature=0.5, openai_api_base="https://openkey.cloud/v1", model_name="gpt-4-1106-preview", openai_api_key="sk-ucUnnmqI9DdtsAXG8OKxOFxD5dnSrU3E3ZQh4PJa1dgQ7KzE") -# chainllm = ChatOpenAI(temperature=0.5, openai_api_base="https://openkey.cloud/v1", model_name="gpt-4", openai_api_key="sk-ucUnnmqI9DdtsAXG8OKxOFxD5dnSrU3E3ZQh4PJa1dgQ7KzE") - -print(gptsearch("鲁迅和周树人为什么打架", chainllm)) \ No newline at end of file diff --git a/test/test_Web_crawler.py b/test/test_Web_crawler.py deleted file mode 100644 index 5e66cd34..00000000 --- a/test/test_Web_crawler.py +++ /dev/null @@ -1,191 +0,0 @@ -import re -import os -os.system('cls' if os.name == 'nt' else 'clear') -import time -import chardet -import requests -from bs4 import BeautifulSoup -from requests.adapters import HTTPAdapter - -# def Web_crawler(url: str) -> str: -# """返回链接网址url正文内容,必须是合法的网址""" -# headers = { -# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" -# } -# result = '' -# try: -# requests.packages.urllib3.disable_warnings() -# response = requests.get(url, headers=headers, verify=False) -# soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') -# body = "".join(soup.find('body').get_text().split('\n')) -# result = body -# except Exception as e: -# print('\033[31m') -# print("error", e) -# print('\033[0m') -# return result - -# def Web_crawler(url: str) -> str: -# """返回链接网址url正文内容,必须是合法的网址""" -# headers = { -# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" -# } -# result = '' -# try: -# requests.packages.urllib3.disable_warnings() -# response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True) -# content_length = int(response.headers.get('Content-Length', 0)) -# if content_length > 500000: -# print("Skipping large file:", url) -# return result -# # detected_encoding = chardet.detect(response.content)['encoding'] -# # decoded_content = response.content.decode(detected_encoding, errors='replace') -# # # soup = BeautifulSoup(response.text, 'html.parser') -# # soup = BeautifulSoup(decoded_content, 'lxml') -# # # soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') -# # body = "".join(soup.find('body').get_text().split('\n')) -# # result = body - -# detected_encoding = chardet.detect(response.content)['encoding'] -# decoded_content = response.content.decode(detected_encoding, errors='ignore') -# decoded_content = re.sub(r'[^\u0000-\uFFFF]', ' ', decoded_content) -# soup = BeautifulSoup(decoded_content, 'lxml') -# body = soup.find('body').get_text() -# body = body.replace('\n', ' ') -# body = re.sub(r'http[s]?://\S+', ' ', body) -# body = re.sub(r'\s+', ' ', body) -# result = body - -# except Exception as e: -# print('\033[31m') -# print("error url", url) -# print("error", e) -# print('\033[0m') -# return result - -def Web_crawler(url: str, isSearch=False) -> str: - """返回链接网址url正文内容,必须是合法的网址""" - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" - } - result = '' - try: - requests.packages.urllib3.disable_warnings() - response = requests.get(url, headers=headers, verify=False, timeout=3, stream=True) - if response.status_code == 404: - print("Page not found:", url) - return "抱歉,网页不存在,目前无法访问该网页。@Trash@" - content_length = int(response.headers.get('Content-Length', 0)) - if content_length > 5000000: - print("Skipping large file:", url) - return result - soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') - - table_contents = "" - tables = soup.find_all('table') - for table in tables: - table_contents += table.get_text() - table.decompose() - body = "".join(soup.find('body').get_text().split('\n')) - result = table_contents + body - if result == '' and not isSearch: - result = "抱歉,可能反爬虫策略,目前无法访问该网页。@Trash@" - if result.count("\"") > 1000: - result = "" - except Exception as e: - print('\033[31m') - print("error url", url) - print("error", e) - print('\033[0m') - print("url content", result + "\n\n") - return result - -def jina_ai_Web_crawler(url: str, isSearch=False) -> str: - """返回链接网址url正文内容,必须是合法的网址""" - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" - } - result = '' - try: - requests.packages.urllib3.disable_warnings() - url = "https://r.jina.ai/" + url - response = requests.get(url, headers=headers, verify=False, timeout=3, stream=True) - if response.status_code == 404: - print("Page not found:", url) - return "抱歉,网页不存在,目前无法访问该网页。@Trash@" - content_length = int(response.headers.get('Content-Length', 0)) - if content_length > 5000000: - print("Skipping large file:", url) - return result - soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') - - table_contents = "" - tables = soup.find_all('table') - for table in tables: - table_contents += table.get_text() - table.decompose() - body = "".join(soup.find('body').get_text().split('\n')) - result = table_contents + body - if result == '' and not isSearch: - result = "抱歉,可能反爬虫策略,目前无法访问该网页。@Trash@" - if result.count("\"") > 1000: - result = "" - except Exception as e: - print('\033[31m') - print("error url", url) - print("error", e) - print('\033[0m') - print(result + "\n\n") - return result - -# def Web_crawler(url: str) -> str: -# """返回链接网址url正文内容,必须是合法的网址""" -# headers = { -# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" -# } -# result = '' -# try: -# requests.packages.urllib3.disable_warnings() -# response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True) -# content_length = int(response.headers.get('Content-Length', 0)) -# if content_length > 500000: -# print("Skipping large file:", url) -# return result -# soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') -# body = "".join(soup.find('body').get_text().split('\n')) -# result = body -# except Exception as e: -# print('\033[31m') -# print("error url", url) -# print("error", e) -# print('\033[0m') -# return result - -start_time = time.time() -# for url in ['https://www.zhihu.com/question/557257320', 'https://job.achi.idv.tw/2021/12/05/what-is-the-403-forbidden-error-how-to-fix-it-8-methods-explained/', 'https://www.lifewire.com/403-forbidden-error-explained-2617989']: -# for url in ['https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/403', 'https://www.hostinger.com/tutorials/what-is-403-forbidden-error-and-how-to-fix-it', 'https://beebom.com/what-is-403-forbidden-error-how-to-fix/']: -# for url in ['https://www.lifewire.com/403-forbidden-error-explained-2617989']: -# for url in ['https://www.usnews.com/news/best-countries/articles/2022-02-24/explainer-why-did-russia-invade-ukraine']: -# for url in ['https://github.com/EAimTY/tuic/issues/107']: -# TODO 没办法访问 -# for url in ['https://s.weibo.com/top/summary?cate=realtimehot']: -# for url in ['https://tophub.today/n/KqndgxeLl9']: -# for url in ['https://support.apple.com/zh-cn/HT213931']: -for url in ["https://zeta.zeabur.app", "https://www.anthropic.com/research/probes-catch-sleeper-agents"]: -# for url in ['https://finance.sina.com.cn/stock/roll/2023-06-26/doc-imyyrexk4053724.shtml', 'https://s.weibo.com/top/summary?cate=realtimehot', 'https://tophub.today/n/KqndgxeLl9', 'https://www.whatsonweibo.com/', 'https://www.trendingonweibo.com/?ref=producthunt', 'https://www.trendingonweibo.com/', 'https://www.statista.com/statistics/1377073/china-most-popular-news-on-weibo/']: -# for url in ['https://www.usnews.com/news/entertainment/articles/2023-12-22/china-drafts-new-rules-proposing-restrictions-on-online-gaming']: -# for url in ['https://developer.aliyun.com/article/721836']: -# for url in ['https://cn.aliyun.com/page-source/price/detail/machinelearning_price']: -# for url in ['https://mp.weixin.qq.com/s/Itad7Y-QBcr991JkF3SrIg']: -# for url in ['https://zhidao.baidu.com/question/317577832.html']: -# for url in ['https://www.cnn.com/2023/09/06/tech/huawei-mate-60-pro-phone/index.html']: -# for url in ['https://www.reddit.com/r/China_irl/comments/15qojkh/46%E6%9C%88%E5%A4%96%E8%B5%84%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9B%B4%E6%8E%A5%E6%8A%95%E8%B5%84%E5%87%8F87/', 'https://www.apple.com.cn/job-creation/Apple_China_CSR_Report_2020.pdf', 'https://hdr.undp.org/system/files/documents/hdr2013chpdf.pdf']: -# for url in ['https://www.airuniversity.af.edu/JIPA/Display/Article/3111127/the-uschina-trade-war-vietnam-emerges-as-the-greatest-winner/']: -# for url in ['https://zhuanlan.zhihu.com/p/646786536', 'https://zh.wikipedia.org/wiki/%E4%BF%84%E7%BE%85%E6%96%AF%E5%85%A5%E4%BE%B5%E7%83%8F%E5%85%8B%E8%98%AD', 'https://stock.finance.sina.com.cn/usstock/quotes/aapl.html']: - Web_crawler(url) - # jina_ai_Web_crawler(url) - print('-----------------------------') -end_time = time.time() -run_time = end_time - start_time -# 打印运行时间 -print(f"程序运行时间:{run_time}秒") diff --git a/test/test_claude.py b/test/test_claude.py deleted file mode 100644 index 8d707257..00000000 --- a/test/test_claude.py +++ /dev/null @@ -1,190 +0,0 @@ -import os -import requests -# from ..utils import typings as t -import json -import tiktoken -# class claudeConversation(dict): -# def __getitem__(self, index): -# conversation_list = super().__getitem__(index) -# return "\n\n" + "\n\n".join([f"{item['role']}:{item['content']}" for item in conversation_list]) + "\n\nAssistant:" - -# c = claudeConversation() -# c['1'] = [{'role': 'A', 'content': 'hello'}, {'role': 'B', 'content': 'hi'}] -# print(repr(c['1'])) - -import platform -python_version = list(platform.python_version_tuple()) -SUPPORT_ADD_NOTES = int(python_version[0]) >= 3 and int(python_version[1]) >= 11 - -class ChatbotError(Exception): - """ - Base class for all Chatbot errors in this Project - """ - - def __init__(self, *args: object) -> None: - if SUPPORT_ADD_NOTES: - super().add_note( - "Please check that the input is correct, or you can resolve this issue by filing an issue", - ) - super().add_note("Project URL: https://github.com/acheong08/ChatGPT") - super().__init__(*args) - -class APIConnectionError(ChatbotError): - """ - Subclass of ChatbotError - - An exception object thrown when an API connection fails or fails to connect due to network or - other miscellaneous reasons - """ - - def __init__(self, *args: object) -> None: - if SUPPORT_ADD_NOTES: - super().add_note( - "Please check if there is a problem with your network connection", - ) - super().__init__(*args) - -class claudeConversation(dict): - def Conversation(self, index): - conversation_list = super().__getitem__(index) - return "\n\n" + "\n\n".join([f"{item['role']}:{item['content']}" for item in conversation_list]) + "\n\nAssistant:" - - -class claudebot: - def __init__( - self, - api_key: str, - engine: str = os.environ.get("GPT_ENGINE") or "claude-2.1", - temperature: float = 0.5, - top_p: float = 0.7, - chat_url: str = "https://api.anthropic.com/v1/complete", - timeout: float = None, - ): - self.api_key: str = api_key - self.engine: str = engine - self.temperature = temperature - self.top_p = top_p - self.chat_url = chat_url - self.timeout = timeout - self.session = requests.Session() - self.conversation = claudeConversation() - - def add_to_conversation( - self, - message: str, - role: str, - convo_id: str = "default", - - ) -> None: - """ - Add a message to the conversation - """ - self.conversation[convo_id].append({"role": role, "content": message}) - - def reset(self, convo_id: str = "default") -> None: - """ - Reset the conversation - """ - self.conversation[convo_id] = list() - - def __truncate_conversation(self, convo_id: str = "default") -> None: - """ - Truncate the conversation - """ - while True: - if ( - self.get_token_count(convo_id) > self.truncate_limit - and len(self.conversation[convo_id]) > 1 - ): - # Don't remove the first message - self.conversation[convo_id].pop(1) - else: - break - - def get_token_count(self, convo_id: str = "default") -> int: - """ - Get token count - """ - if self.engine not in ENGINES: - raise NotImplementedError( - f"Engine {self.engine} is not supported. Select from {ENGINES}", - ) - tiktoken.get_encoding("cl100k_base") - tiktoken.model.MODEL_TO_ENCODING["claude-2.1"] = "cl100k_base" - - encoding = tiktoken.encoding_for_model(self.engine) - - num_tokens = 0 - for message in self.conversation[convo_id]: - # every message follows {role/name}\n{content}\n - num_tokens += 5 - for key, value in message.items(): - if value: - num_tokens += len(encoding.encode(value)) - if key == "name": # if there's a name, the role is omitted - num_tokens += 5 # role is always required and always 1 token - num_tokens += 5 # every reply is primed with assistant - return num_tokens - - def ask_stream( - self, - prompt: str, - role: str = "Human", - convo_id: str = "default", - model: str = None, - pass_history: bool = True, - model_max_tokens: int = 4096, - **kwargs, - ): - if convo_id not in self.conversation or pass_history == False: - self.reset(convo_id=convo_id) - self.add_to_conversation(prompt, role, convo_id=convo_id) - # self.__truncate_conversation(convo_id=convo_id) - # print(self.conversation[convo_id]) - - url = self.chat_url - headers = { - "accept": "application/json", - "anthropic-version": "2023-06-01", - "content-type": "application/json", - "x-api-key": f"{kwargs.get('api_key', self.api_key)}", - } - - json_post = { - "model": os.environ.get("MODEL_NAME") or model or self.engine, - "prompt": self.conversation.Conversation(convo_id) if pass_history else f"\n\nHuman:{prompt}\n\nAssistant:", - "stream": True, - "temperature": kwargs.get("temperature", self.temperature), - "top_p": kwargs.get("top_p", self.top_p), - "max_tokens_to_sample": model_max_tokens, - } - - response = self.session.post( - url, - headers=headers, - json=json_post, - timeout=kwargs.get("timeout", self.timeout), - stream=True, - ) - if response.status_code != 200: - raise BaseException(f"{response.status_code} {response.reason} {response.text}") - response_role: str = "Assistant" - full_response: str = "" - for line in response.iter_lines(): - if not line or line.decode("utf-8") == "event: completion" or line.decode("utf-8") == "event: ping" or line.decode("utf-8") == "data: {}": - continue - line = line.decode("utf-8")[6:] - # print(line) - resp: dict = json.loads(line) - content = resp.get("completion") - full_response += content - yield content - self.add_to_conversation(full_response, response_role, convo_id=convo_id) - print(repr(self.conversation.Conversation(convo_id))) - # print("total tokens:", self.get_token_count(convo_id)) - - -bot = claudebot(api_key=os.environ.get("claude_api_key")) - -for i in bot.ask_stream("python怎么自定义错误"): - print(i, end="") \ No newline at end of file diff --git a/test/test_claude3.py b/test/test_claude3.py deleted file mode 100644 index fab13210..00000000 --- a/test/test_claude3.py +++ /dev/null @@ -1,205 +0,0 @@ -import os -import requests -# from ..utils import typings as t -import json -import tiktoken -# class claudeConversation(dict): -# def __getitem__(self, index): -# conversation_list = super().__getitem__(index) -# return "\n\n" + "\n\n".join([f"{item['role']}:{item['content']}" for item in conversation_list]) + "\n\nAssistant:" - -# c = claudeConversation() -# c['1'] = [{'role': 'A', 'content': 'hello'}, {'role': 'B', 'content': 'hi'}] -# print(repr(c['1'])) - -import platform -python_version = list(platform.python_version_tuple()) -SUPPORT_ADD_NOTES = int(python_version[0]) >= 3 and int(python_version[1]) >= 11 - -class ChatbotError(Exception): - """ - Base class for all Chatbot errors in this Project - """ - - def __init__(self, *args: object) -> None: - if SUPPORT_ADD_NOTES: - super().add_note( - "Please check that the input is correct, or you can resolve this issue by filing an issue", - ) - super().add_note("Project URL: https://github.com/acheong08/ChatGPT") - super().__init__(*args) - -class APIConnectionError(ChatbotError): - """ - Subclass of ChatbotError - - An exception object thrown when an API connection fails or fails to connect due to network or - other miscellaneous reasons - """ - - def __init__(self, *args: object) -> None: - if SUPPORT_ADD_NOTES: - super().add_note( - "Please check if there is a problem with your network connection", - ) - super().__init__(*args) - -class claudeConversation(dict): - def Conversation(self, index): - conversation_list = super().__getitem__(index) - return "\n\n" + "\n\n".join([f"{item['role']}:{item['content']}" for item in conversation_list]) + "\n\nAssistant:" - -class claude3bot: - def __init__( - self, - api_key: str, - engine: str = os.environ.get("GPT_ENGINE") or "claude-3-opus-20240229", - temperature: float = 0.5, - top_p: float = 0.7, - chat_url: str = "https://api.anthropic.com/v1/messages", - timeout: float = None, - system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally", - **kwargs, - ): - self.api_key: str = api_key - self.engine: str = engine - self.temperature = temperature - self.top_p = top_p - self.chat_url = chat_url - self.timeout = timeout - self.session = requests.Session() - self.conversation: dict[str, list[dict]] = { - "default": [], - } - self.system_prompt = system_prompt - - def add_to_conversation( - self, - message: str, - role: str, - convo_id: str = "default", - pass_history: bool = True, - ) -> None: - """ - Add a message to the conversation - """ - - if convo_id not in self.conversation or pass_history == False: - self.reset(convo_id=convo_id) - self.conversation[convo_id].append({"role": role, "content": message}) - - def reset(self, convo_id: str = "default", system_prompt: str = None) -> None: - """ - Reset the conversation - """ - self.conversation[convo_id] = list() - - def __truncate_conversation(self, convo_id: str = "default") -> None: - """ - Truncate the conversation - """ - while True: - if ( - self.get_token_count(convo_id) > self.truncate_limit - and len(self.conversation[convo_id]) > 1 - ): - # Don't remove the first message - self.conversation[convo_id].pop(1) - else: - break - - def get_token_count(self, convo_id: str = "default") -> int: - """ - Get token count - """ - if self.engine not in ENGINES: - raise NotImplementedError( - f"Engine {self.engine} is not supported. Select from {ENGINES}", - ) - tiktoken.model.MODEL_TO_ENCODING["claude-2.1"] = "cl100k_base" - encoding = tiktoken.encoding_for_model(self.engine) - - num_tokens = 0 - for message in self.conversation[convo_id]: - # every message follows {role/name}\n{content}\n - num_tokens += 5 - for key, value in message.items(): - if value: - num_tokens += len(encoding.encode(value)) - if key == "name": # if there's a name, the role is omitted - num_tokens += 5 # role is always required and always 1 token - num_tokens += 5 # every reply is primed with assistant - return num_tokens - - def ask_stream( - self, - prompt: str, - role: str = "user", - convo_id: str = "default", - model: str = None, - pass_history: bool = True, - model_max_tokens: int = 4096, - **kwargs, - ): - pass_history = True - if convo_id not in self.conversation or pass_history == False: - self.reset(convo_id=convo_id) - self.add_to_conversation(prompt, role, convo_id=convo_id) - # self.__truncate_conversation(convo_id=convo_id) - # print(self.conversation[convo_id]) - - url = self.chat_url - headers = { - "x-api-key": f"{kwargs.get('api_key', self.api_key)}", - "anthropic-version": "2023-06-01", - "content-type": "application/json", - } - - json_post = { - "model": os.environ.get("MODEL_NAME") or model or self.engine, - "messages": self.conversation[convo_id] if pass_history else [{ - "role": "user", - "content": prompt - }], - "system": self.system_prompt, - "stream": True, - "temperature": kwargs.get("temperature", self.temperature), - "top_p": kwargs.get("top_p", self.top_p), - "max_tokens": model_max_tokens, - "stream": True, - } - - response = self.session.post( - url, - headers=headers, - json=json_post, - timeout=kwargs.get("timeout", self.timeout), - stream=True, - ) - if response.status_code != 200: - print(response.text) - raise BaseException(f"{response.status_code} {response.reason} {response.text}") - response_role: str = "assistant" - full_response: str = "" - for line in response.iter_lines(): - if not line or line.decode("utf-8")[:6] == "event:" or line.decode("utf-8") == "data: {}": - continue - line = line.decode("utf-8")[6:] - # print(line) - resp: dict = json.loads(line) - delta = resp.get("delta") - if not delta: - continue - if "text" in delta: - content = delta["text"] - full_response += content - yield content - self.add_to_conversation(full_response, response_role, convo_id=convo_id) - # print(repr(self.conversation.Conversation(convo_id))) - # print("total tokens:", self.get_token_count(convo_id)) - - -bot = claude3bot(api_key=os.environ.get("claude_api_key")) - -for i in bot.ask_stream("hi"): - print(i, end="") \ No newline at end of file diff --git a/test/test_claude_zh_char.py b/test/test_claude_zh_char.py deleted file mode 100644 index 38def1dc..00000000 --- a/test/test_claude_zh_char.py +++ /dev/null @@ -1,26 +0,0 @@ -def is_surrounded_by_chinese(text, index): - left_char = text[index - 1] - if 0 < index < len(text) - 1: - right_char = text[index + 1] - return '\u4e00' <= left_char <= '\u9fff' or '\u4e00' <= right_char <= '\u9fff' - if index == len(text) - 1: - return '\u4e00' <= left_char <= '\u9fff' - return False - -def replace_char(string, index, new_char): - return string[:index] + new_char + string[index+1:] - -def claude_replace(text): - Punctuation_mapping = {",": ",", ":": ":", "!": "!", "?": "?", ";": ";"} - key_list = list(Punctuation_mapping.keys()) - for i in range(len(text)): - if is_surrounded_by_chinese(text, i) and (text[i] in key_list): - text = replace_char(text, i, Punctuation_mapping[text[i]]) - return text - -text = ''' -你好!我是一名人工智能助手,很高兴见到你。有什么我可以帮助你的吗?无论是日常问题还是专业领域,我都会尽我所能为你解答。让我们开始愉快的交流吧!''' - -if __name__ == '__main__': - new_text = claude_replace(text) - print(new_text) \ No newline at end of file diff --git a/test/test_ddg.py b/test/test_ddg.py deleted file mode 100644 index ae869227..00000000 --- a/test/test_ddg.py +++ /dev/null @@ -1,109 +0,0 @@ -import re -import time -import requests -import json -import os -from bs4 import BeautifulSoup -from langchain.tools import DuckDuckGoSearchResults -from duckduckgo_search import DDGS -def getddgsearchurl1(result, numresults=3): - requrl = f"https://html.duckduckgo.com/html?q={result}&kl=us-en&s=0&dc=0" - try: - response = requests.get(requrl) - soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') - print(soup) - urls = [] - for link in soup.findAll('a', attrs={'href': re.compile("^http://")}): - urls.append(link.get('href')) - urls = urls[:numresults] - except Exception as e: - print('\033[31m') - print("duckduckgo error", e) - print('\033[0m') - urls = [] - return urls - -def search_duckduckgo(query): - url = 'https://duckduckgo.com/html/' - params = { - 'q': query, - 'ia': 'web' - } - - response = requests.get(url, params=params) - soup = BeautifulSoup(response.text, 'html.parser') - print(soup) - for link in soup.find_all('a', class_='result__url'): - print(link.get('href')) - - - -from duckduckgo_search import DDGS - -def getddg(result, numresults=3): - with DDGS(timeout=2) as ddgs: - results = [r["href"] for r in ddgs.text(result, max_results=numresults)] - # print(json.dumps(results, ensure_ascii=False, indent=4)) - return results - -def getddgsearchurl(result, numresults=3): - try: - # webresult = getddg(result, numresults) - search = DuckDuckGoSearchResults(num_results=numresults) - webresult = search.run(result) - print(webresult) - if webresult == None: - return [] - urls = re.findall(r"(https?://\S+)\]", webresult, re.MULTILINE) - except Exception as e: - print('\033[31m') - print("duckduckgo error", e) - print('\033[0m') - urls = [] - return urls - - -def Web_crawler(url: str) -> str: - """返回链接网址url正文内容,必须是合法的网址""" - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" - } - result = '' - try: - requests.packages.urllib3.disable_warnings() - response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True) - if response.status_code == 404: - print("Page not found:", url) - return "" - content_length = int(response.headers.get('Content-Length', 0)) - if content_length > 5000000: - print("Skipping large file:", url) - return result - soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') - body = "".join(soup.find('body').get_text().split('\n')) - result = body - except Exception as e: - print('\033[31m') - print("error url", url) - print("error", e) - print('\033[0m') - return result - -if __name__ == '__main__': - start_time = time.time() - - # search_duckduckgo('python programming') - # print(getddg("尊嘟假嘟 含义")) - # urls = getddgsearchurl("python programming") - # urls = getddgsearchurl1("test") - # urls = getddgsearchurl("你知道今天有什么热点新闻吗") - # urls = getddg("尊嘟假嘟 含义") - urls = getddgsearchurl("你知道今天有什么热点新闻吗") - print(urls) - # for url in urls: - # print(Web_crawler(url)) - # print('-----------------------------') - end_time = time.time() - run_time = end_time - start_time - # 打印运行时间 - print(f"程序运行时间:{run_time}秒") \ No newline at end of file diff --git a/test/test_dict.py b/test/test_dict.py deleted file mode 100644 index 5e233dc6..00000000 --- a/test/test_dict.py +++ /dev/null @@ -1,22 +0,0 @@ -# # 假设你的列表如下: -# lst = [{"name": "张三", "age": 20}, {"name": "李四", "age": {"url": "wwww"}}, {"name": "王五", "age": 40}] - -# # 使用列表解析和items()方法取出所有值 -# values = [value for dic in lst for value in dic.values()] - -# # 打印结果 -# print(values) - -def extract_values(obj): - if isinstance(obj, dict): - for value in obj.values(): - yield from extract_values(value) - elif isinstance(obj, list): - for item in obj: - yield from extract_values(item) - else: - yield obj - -lst = [{"name": "张三", "age": 20}, {"name": "李四", "age": {"url": "wwww"}}, {"name": "王五", "age": 40}] -values = list(extract_values(lst)) -print(values) diff --git a/test/test_download_pdf.py b/test/test_download_pdf.py deleted file mode 100644 index b5dc7298..00000000 --- a/test/test_download_pdf.py +++ /dev/null @@ -1,56 +0,0 @@ -# import requests -# import urllib.parse -# import os -# import sys -# sys.path.append(os.getcwd()) -# import config - -# from langchain.chat_models import ChatOpenAI -# from langchain.embeddings.openai import OpenAIEmbeddings -# from langchain.vectorstores import Chroma -# from langchain.text_splitter import CharacterTextSplitter -# from langchain.document_loaders import UnstructuredPDFLoader -# from langchain.chains import RetrievalQA - - -# def get_doc_from_url(url): -# filename = urllib.parse.unquote(url.split("/")[-1]) -# response = requests.get(url, stream=True) -# with open(filename, 'wb') as f: -# for chunk in response.iter_content(chunk_size=1024): -# f.write(chunk) -# return filename - -# def pdf_search(docurl, query_message, model="gpt-3.5-turbo"): -# chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.API_URL.split("chat")[0], model_name=model, openai_api_key=os.environ.get('API', None)) -# embeddings = OpenAIEmbeddings(openai_api_base=config.API_URL.split("chat")[0], openai_api_key=os.environ.get('API', None)) -# filename = get_doc_from_url(docurl) -# docpath = os.getcwd() + "/" + filename -# loader = UnstructuredPDFLoader(docpath) -# print(docpath) -# documents = loader.load() -# os.remove(docpath) -# # 初始化加载器 -# text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=25) -# # 切割加载的 document -# split_docs = text_splitter.split_documents(documents) -# vector_store = Chroma.from_documents(split_docs, embeddings) -# # 创建问答对象 -# qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(),return_source_documents=True) -# # 进行问答 -# result = qa({"query": query_message}) -# return result['result'] - -# pdf_search("https://www.nsfc.gov.cn/csc/20345/22468/pdf/2001/%E5%86%BB%E7%BB%93%E8%A3%82%E9%9A%99%E7%A0%82%E5%B2%A9%E4%BD%8E%E5%91%A8%E5%BE%AA%E7%8E%AF%E5%8A%A8%E5%8A%9B%E7%89%B9%E6%80%A7%E8%AF%95%E9%AA%8C%E7%A0%94%E7%A9%B6.pdf", "端水实验的目的是什么?") - -from PyPDF2 import PdfReader - -def has_text(pdf_path): - with open(pdf_path, 'rb') as file: - pdf = PdfReader(file) - page = pdf.pages[0] - text = page.extract_text() - return text - -pdf_path = '/Users/yanyuming/Downloads/GitHub/ChatGPT-Telegram-Bot/冻结裂隙砂岩低周循环动力特性试验研究.pdf' -print(has_text(pdf_path)) \ No newline at end of file diff --git a/test/test_fstring.py b/test/test_fstring.py deleted file mode 100644 index 35754ec4..00000000 --- a/test/test_fstring.py +++ /dev/null @@ -1,13 +0,0 @@ -username = "efwef" -def create_info_message(): - return ( - f"`Hi, {username}!`\n\n" - ) - -# 假设 update 和 config 是已经定义好的对象 -# 当你需要更新 info_message 时,只需调用这个函数 -info_message = create_info_message() -print(info_message) -username = "e111111" -info_message = create_info_message() -print(info_message) \ No newline at end of file diff --git a/test/test_gpt4free.py b/test/test_gpt4free.py deleted file mode 100644 index 7f769f8f..00000000 --- a/test/test_gpt4free.py +++ /dev/null @@ -1,25 +0,0 @@ -import os -import g4f -from rich.console import Console -from rich.markdown import Markdown - -def get_response(message, model="gpt-3.5-turbo"): - response = g4f.ChatCompletion.create( - model=model, - messages=[{"role": "user", "content": message}], - stream=True, - ) - for message in response: - yield message - -if __name__ == "__main__": - console = Console() - message = r""" -李雪主是谁? - """ - answer = "" - for result in get_response(message, "gpt-4"): - os.system("clear") - answer += result - md = Markdown(answer) - console.print(md, no_wrap=False) \ No newline at end of file diff --git a/test/test_gpt4free_langchain_agent.py b/test/test_gpt4free_langchain_agent.py deleted file mode 100644 index 7be8cfd6..00000000 --- a/test/test_gpt4free_langchain_agent.py +++ /dev/null @@ -1,57 +0,0 @@ -import g4f -from typing import Optional, List -from langchain.llms.base import LLM -from langchain.agents import AgentType, load_tools, initialize_agent - -class EducationalLLM(LLM): - - @property - def _llm_type(self) -> str: - return "custom" - - def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: - out = g4f.ChatCompletion.create( - model=g4f.models.gpt_4, - messages=[{"role": "user", "content": prompt}], - ) # - if stop: - stop_indexes = (out.find(s) for s in stop if s in out) - min_stop = min(stop_indexes, default=-1) - if min_stop > -1: - out = out[:min_stop] - return out - - -llm = EducationalLLM() -# print(llm("今天的微博热搜有哪些?")) -tools = load_tools(["ddg-search", "llm-math"], llm=llm) -agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True) -agent.run("今天的微博热搜有哪些?") - - -# def duckduckgo_search(searchtext, model="gpt-3.5-turbo", temperature=0.5): -# llm = ChatOpenAI(temperature=temperature, openai_api_base='https://api.ohmygpt.com/v1/', model_name=model, openai_api_key=API) -# tools = load_tools(["ddg-search", "llm-math"], llm=llm) -# agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION) -# result = agent.run(searchtext) - -# en2zh_prompt = PromptTemplate( -# input_variables=["targetlang", "text"], -# template="You are a translation engine, you can only translate text and cannot interpret it, and do not explain. Translate the text to {targetlang}, please do not explain any sentences, just translate or leave them as they are.: {text}", -# ) -# chain = LLMChain(llm=llm, prompt=en2zh_prompt) -# return chain.run({"targetlang": "simplified chinese", "text": result}) - -# if __name__ == "__main__": -# os.system("clear") -# print(duckduckgo_search("夏威夷火灾死了多少人?")) - - -# prompt = PromptTemplate( -# input_variables=["product"], -# template="What is a good name for a company that makes {product}? Just tell one and only the name", -# ) - -# chain = LLMChain(llm=llm, prompt=prompt) - -# print(chain.run("colorful socks")) \ No newline at end of file diff --git a/test/test_groq.py b/test/test_groq.py deleted file mode 100644 index c9be6307..00000000 --- a/test/test_groq.py +++ /dev/null @@ -1,228 +0,0 @@ -import os -import requests -# from ..utils import typings as t -import json -import tiktoken -# class claudeConversation(dict): -# def __getitem__(self, index): -# conversation_list = super().__getitem__(index) -# return "\n\n" + "\n\n".join([f"{item['role']}:{item['content']}" for item in conversation_list]) + "\n\nAssistant:" - -# c = claudeConversation() -# c['1'] = [{'role': 'A', 'content': 'hello'}, {'role': 'B', 'content': 'hi'}] -# print(repr(c['1'])) - -import platform -python_version = list(platform.python_version_tuple()) -SUPPORT_ADD_NOTES = int(python_version[0]) >= 3 and int(python_version[1]) >= 11 - -class ChatbotError(Exception): - """ - Base class for all Chatbot errors in this Project - """ - - def __init__(self, *args: object) -> None: - if SUPPORT_ADD_NOTES: - super().add_note( - "Please check that the input is correct, or you can resolve this issue by filing an issue", - ) - super().add_note("Project URL: https://github.com/acheong08/ChatGPT") - super().__init__(*args) - -class APIConnectionError(ChatbotError): - """ - Subclass of ChatbotError - - An exception object thrown when an API connection fails or fails to connect due to network or - other miscellaneous reasons - """ - - def __init__(self, *args: object) -> None: - if SUPPORT_ADD_NOTES: - super().add_note( - "Please check if there is a problem with your network connection", - ) - super().__init__(*args) - - -class groqbot: - def __init__( - self, - api_key: str, - engine: str = os.environ.get("GPT_ENGINE") or "mixtral-8x7b-32768", - temperature: float = 0.5, - top_p: float = 1, - chat_url: str = "https://api.groq.com/openai/v1/chat/completions", - timeout: float = None, - system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally", - **kwargs, - ): - self.api_key: str = api_key - self.engine: str = engine - self.temperature = temperature - self.top_p = top_p - self.chat_url = chat_url - self.timeout = timeout - self.session = requests.Session() - self.conversation: dict[str, list[dict]] = { - "default": [ - { - "role": "system", - "content": system_prompt, - }, - ], - } - self.system_prompt = system_prompt - - def add_to_conversation( - self, - message: str, - role: str, - convo_id: str = "default", - pass_history: bool = True, - ) -> None: - """ - Add a message to the conversation - """ - - if convo_id not in self.conversation or pass_history == False: - self.reset(convo_id=convo_id) - self.conversation[convo_id].append({"role": role, "content": message}) - - def reset(self, convo_id: str = "default", system_prompt: str = None) -> None: - """ - Reset the conversation - """ - self.conversation[convo_id] = list() - - def __truncate_conversation(self, convo_id: str = "default") -> None: - """ - Truncate the conversation - """ - while True: - if ( - self.get_token_count(convo_id) > self.truncate_limit - and len(self.conversation[convo_id]) > 1 - ): - # Don't remove the first message - self.conversation[convo_id].pop(1) - else: - break - - def get_token_count(self, convo_id: str = "default") -> int: - """ - Get token count - """ - if self.engine not in ENGINES: - raise NotImplementedError( - f"Engine {self.engine} is not supported. Select from {ENGINES}", - ) - # tiktoken.model.MODEL_TO_ENCODING["mixtral-8x7b-32768"] = "cl100k_base" - encoding = tiktoken.get_encoding("cl100k_base") - - num_tokens = 0 - for message in self.conversation[convo_id]: - # every message follows {role/name}\n{content}\n - num_tokens += 5 - for key, value in message.items(): - if value: - num_tokens += len(encoding.encode(value)) - if key == "name": # if there's a name, the role is omitted - num_tokens += 5 # role is always required and always 1 token - num_tokens += 5 # every reply is primed with assistant - return num_tokens - - def ask_stream( - self, - prompt: str, - role: str = "user", - convo_id: str = "default", - model: str = None, - pass_history: bool = True, - model_max_tokens: int = 1024, - **kwargs, - ): - pass_history = True - if convo_id not in self.conversation or pass_history == False: - self.reset(convo_id=convo_id) - self.add_to_conversation(prompt, role, convo_id=convo_id) - # self.__truncate_conversation(convo_id=convo_id) - # print(self.conversation[convo_id]) - - url = self.chat_url - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {kwargs.get('GROQ_API_KEY', self.api_key)}", - } - - json_post = { - "messages": self.conversation[convo_id] if pass_history else [{ - "role": "user", - "content": prompt - }], - "model": os.environ.get("GPT_ENGINE") or model or self.engine, - "temperature": kwargs.get("temperature", self.temperature), - "max_tokens": model_max_tokens, - "top_p": kwargs.get("top_p", self.top_p), - "stop": None, - "stream": True, - } - - # json_post = { - # "model": os.environ.get("MODEL_NAME") or model or self.engine, - # "prompt": self.conversation.Conversation(convo_id) if pass_history else f"\n\nHuman:{prompt}\n\nAssistant:", - # "stream": True, - # "temperature": kwargs.get("temperature", self.temperature), - # "top_p": kwargs.get("top_p", self.top_p), - # "max_tokens_to_sample": model_max_tokens, - # } - - response = self.session.post( - url, - headers=headers, - json=json_post, - timeout=kwargs.get("timeout", self.timeout), - stream=True, - ) - if response.status_code != 200: - raise BaseException(f"{response.status_code} {response.reason} {response.text}") - response_role: str = "assistant" - full_response: str = "" - print("11111111") - for line in response.iter_lines(): - if not line: - continue - # Remove "data: " - print(line.decode("utf-8")) - if line.decode("utf-8")[:6] == "data: ": - line = line.decode("utf-8")[6:] - else: - print(line.decode("utf-8")) - full_response = json.loads(line.decode("utf-8"))["choices"][0]["message"]["content"] - yield full_response - break - if line == "[DONE]": - break - resp: dict = json.loads(line) - # print("resp", resp) - choices = resp.get("choices") - if not choices: - continue - delta = choices[0].get("delta") - if not delta: - continue - if "role" in delta: - response_role = delta["role"] - if "content" in delta and delta["content"]: - content = delta["content"] - full_response += content - yield content - self.add_to_conversation(full_response, response_role, convo_id=convo_id) - # print(repr(self.conversation.Conversation(convo_id))) - # print("total tokens:", self.get_token_count(convo_id)) - - -bot = groqbot(api_key=os.environ.get("GROQ_API_KEY")) - -for i in bot.ask_stream("python怎么自定义错误"): - print(i, end="") \ No newline at end of file diff --git a/test/test_jieba.py b/test/test_jieba.py deleted file mode 100644 index ee29e1d3..00000000 --- a/test/test_jieba.py +++ /dev/null @@ -1,32 +0,0 @@ -import jieba -import jieba.analyse - -# 加载文本 -# text = "话说葬送的芙莉莲动漫是半年番还是季番?完结没?" -# text = "民进党当初为什么支持柯文哲选台北市长?" -text = "今天的微博热搜有哪些?" -# text = "How much does the 'zeabur' software service cost per month? Is it free to use? Any limitations?" - -# 使用TF-IDF算法提取关键词 -keywords_tfidf = jieba.analyse.extract_tags(text, topK=10, withWeight=False, allowPOS=()) - -# 使用TextRank算法提取关键词 -keywords_textrank = jieba.analyse.textrank(text, topK=10, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v')) - -print("TF-IDF算法提取的关键词:", keywords_tfidf) -print("TextRank算法提取的关键词:", keywords_textrank) - - -seg_list = jieba.cut(text, cut_all=True) -print("Full Mode: " + " ".join(seg_list)) # 全模式 - -seg_list = jieba.cut(text, cut_all=False) -print("Default Mode: " + " ".join(seg_list)) # 精确模式 - -seg_list = jieba.cut(text) # 默认是精确模式 -print(" ".join(seg_list)) - -seg_list = jieba.cut_for_search(text) # 搜索引擎模式 -result = " ".join(seg_list) - -print([result] * 3) \ No newline at end of file diff --git a/test/test_json.py b/test/test_json.py deleted file mode 100644 index 2d810f71..00000000 --- a/test/test_json.py +++ /dev/null @@ -1,22 +0,0 @@ -import json - -json_data = '爱' -# json_data = '爱的主人,我会尽快为您规划一个走线到美国的安全路线。请您稍等片刻。\n\n首先,我会检查免签国家并为您提供相应的信息。接下来,我会 搜索有关旅行到美国的安全建议和路线规划。{}' -def check_json(json_data): - while True: - try: - json.loads(json_data) - break - except json.decoder.JSONDecodeError as e: - print("JSON error:", e) - print("JSON body", repr(json_data)) - if "Invalid control character" in str(e): - json_data = json_data.replace("\n", "\\n") - if "Unterminated string starting" in str(e): - json_data += '"}' - if "Expecting ',' delimiter" in str(e): - json_data += '}' - if "Expecting value: line 1 column 1" in str(e): - json_data = '{"prompt": ' + json.dumps(json_data) + '}' - return json_data -print(json.loads(check_json(json_data))) diff --git a/test/test_keyword.py b/test/test_keyword.py deleted file mode 100644 index e4c0fca5..00000000 --- a/test/test_keyword.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import time as record_time -import config -from langchain.chains import LLMChain -from langchain.prompts import PromptTemplate -from langchain.chat_models import ChatOpenAI -from utils.googlesearch import GoogleSearchAPIWrapper - -def getgooglesearchurl(result, numresults=1): - google_search = GoogleSearchAPIWrapper() - urls = [] - googleresult = google_search.results(result, numresults) - for i in googleresult: - if "No good Google Search Result was found" in i: - continue - urls.append(i["link"]) - return urls - - -if __name__ == "__main__": - # os.system("clear") - start_time = record_time.time() - - chainllm = ChatOpenAI(temperature=0.5, openai_api_base=config.bot_api_url.v1_url, model_name=config.GPT_ENGINE, openai_api_key=config.API) - keyword_prompt = PromptTemplate( - input_variables=["source"], - template=( - "根据我的问题,总结关键词概括问题,输出要求如下:" - "1. 给出三行不同的关键词组合,每行的关键词用空格连接。每行关键词可以是一个或者多个。" - "2. 至少有一行关键词里面有中文,至少有一行关键词里面有英文。" - "3. 只要直接给出这三行关键词,不需要其他任何解释,不要出现其他符号和内容。" - "4. 如果问题有关于日漫,至少有一行关键词里面有日文。" - "下面是一些根据问题提取关键词的示例:" - "问题 1:How much does the 'zeabur' software service cost per month? Is it free to use? Any limitations?" - "三行关键词是:" - "zeabur price" - "zeabur documentation" - "zeabur 价格" - "问题 2:pplx API 怎么使用?" - "三行关键词是:" - "pplx API demo" - "pplx API" - "pplx API 使用方法" - "问题 3:以色列哈马斯的最新情况" - "三行关键词是:" - "以色列 哈马斯 最新情况" - "Israel Hamas situation" - "哈马斯 以色列 冲突" - "问题 4:话说葬送的芙莉莲动漫是半年番还是季番?完结没?" - "三行关键词是:" - "葬送的芙莉莲" - "葬送のフリーレン" - "Frieren: Beyond Journey's End" - "问题 5:周海媚最近发生了什么" - "三行关键词是:" - "周海媚" - "周海媚 事件" - "Kathy Chau Hoi Mei news" - "这是我的问题:{source}" - ), - ) - key_chain = LLMChain(llm=chainllm, prompt=keyword_prompt) - # result = key_chain.run("以色列哈马斯的最新情况") - # result = key_chain.run("今天的微博热搜有哪些?") - result = key_chain.run("中国最新公布的游戏政策,对游戏行业和其他相关行业有什么样的影响?") - # result = key_chain.run("鸿蒙是安卓套壳吗?") - # result = key_chain.run("How much does the 'zeabur' software service cost per month? Is it free to use? Any limitations?") - - end_time = record_time.time() - run_time = end_time - start_time - - print(result) - print("Run time: {}".format(run_time)) - # print(getgooglesearchurl("zeabur price")) - # for i in result: - # print(getgooglesearchurl(i)) \ No newline at end of file diff --git a/test/test_langchain_search_old.py b/test/test_langchain_search_old.py deleted file mode 100644 index d6a67740..00000000 --- a/test/test_langchain_search_old.py +++ /dev/null @@ -1,235 +0,0 @@ -import os -import re - -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import config - -from langchain.chat_models import ChatOpenAI - - -from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain - -from langchain.prompts.chat import ( - ChatPromptTemplate, - SystemMessagePromptTemplate, - HumanMessagePromptTemplate, -) -from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.vectorstores import Chroma -from langchain.text_splitter import CharacterTextSplitter - -from langchain.document_loaders import UnstructuredPDFLoader - -def getmd5(string): - import hashlib - md5_hash = hashlib.md5() - md5_hash.update(string.encode('utf-8')) - md5_hex = md5_hash.hexdigest() - return md5_hex - -from utils.sitemap import SitemapLoader -async def get_doc_from_sitemap(url): - # https://www.langchain.asia/modules/indexes/document_loaders/examples/sitemap#%E8%BF%87%E6%BB%A4%E7%AB%99%E7%82%B9%E5%9C%B0%E5%9B%BE-url- - sitemap_loader = SitemapLoader(web_path=url) - docs = await sitemap_loader.load() - return docs - -async def get_doc_from_local(docpath, doctype="md"): - from langchain.document_loaders import DirectoryLoader - # 加载文件夹中的所有txt类型的文件 - loader = DirectoryLoader(docpath, glob='**/*.' + doctype) - # 将数据转成 document 对象,每个文件会作为一个 document - documents = loader.load() - return documents - -system_template="""Use the following pieces of context to answer the users question. -If you don't know the answer, just say "Hmm..., I'm not sure.", don't try to make up an answer. -ALWAYS return a "Sources" part in your answer. -The "Sources" part should be a reference to the source of the document from which you got your answer. - -Example of your response should be: - -``` -The answer is foo - -Sources: -1. abc -2. xyz -``` -Begin! ----------------- -{summaries} -""" -messages = [ - SystemMessagePromptTemplate.from_template(system_template), - HumanMessagePromptTemplate.from_template("{question}") -] -prompt = ChatPromptTemplate.from_messages(messages) - -def get_chain(store, llm): - chain_type_kwargs = {"prompt": prompt} - chain = RetrievalQAWithSourcesChain.from_chain_type( - llm, - chain_type="stuff", - retriever=store.as_retriever(), - chain_type_kwargs=chain_type_kwargs, - reduce_k_below_max_tokens=True - ) - return chain - -async def docQA(docpath, query_message, persist_db_path="db", model = "gpt-3.5-turbo"): - chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.bot_api_url.v1_url, model_name=model, openai_api_key=config.API) - embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=config.API) - - sitemap = "sitemap.xml" - match = re.match(r'^(https?|ftp)://[^\s/$.?#].[^\s]*$', docpath) - if match: - doc_method = get_doc_from_sitemap - docpath = os.path.join(docpath, sitemap) - else: - doc_method = get_doc_from_local - - persist_db_path = getmd5(docpath) - if not os.path.exists(persist_db_path): - documents = await doc_method(docpath) - # 初始化加载器 - text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=50) - # 持久化数据 - split_docs = text_splitter.split_documents(documents) - vector_store = Chroma.from_documents(split_docs, embeddings, persist_directory=persist_db_path) - vector_store.persist() - else: - # 加载数据 - vector_store = Chroma(persist_directory=persist_db_path, embedding_function=embeddings) - - # 创建问答对象 - qa = get_chain(vector_store, chatllm) - # qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(), return_source_documents=True) - # 进行问答 - result = qa({"question": query_message}) - return result - - -def persist_emdedding_pdf(docurl, persist_db_path): - embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=os.environ.get('API', None)) - filename = get_doc_from_url(docurl) - docpath = os.getcwd() + "/" + filename - loader = UnstructuredPDFLoader(docpath) - documents = loader.load() - # 初始化加载器 - text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=25) - # 切割加载的 document - split_docs = text_splitter.split_documents(documents) - vector_store = Chroma.from_documents(split_docs, embeddings, persist_directory=persist_db_path) - vector_store.persist() - os.remove(docpath) - return vector_store - -async def pdfQA(docurl, docpath, query_message, model="gpt-3.5-turbo"): - chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.bot_api_url.v1_url, model_name=model, openai_api_key=os.environ.get('API', None)) - embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=os.environ.get('API', None)) - persist_db_path = getmd5(docpath) - if not os.path.exists(persist_db_path): - vector_store = persist_emdedding_pdf(docurl, persist_db_path) - else: - vector_store = Chroma(persist_directory=persist_db_path, embedding_function=embeddings) - qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(), return_source_documents=True) - result = qa({"query": query_message}) - return result['result'] - - -def pdf_search(docurl, query_message, model="gpt-3.5-turbo"): - chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.bot_api_url.v1_url, model_name=model, openai_api_key=os.environ.get('API', None)) - embeddings = OpenAIEmbeddings(openai_api_base=config.bot_api_url.v1_url, openai_api_key=os.environ.get('API', None)) - filename = get_doc_from_url(docurl) - docpath = os.getcwd() + "/" + filename - loader = UnstructuredPDFLoader(docpath) - try: - documents = loader.load() - except: - print("pdf load error! docpath:", docpath) - return "" - os.remove(docpath) - # 初始化加载器 - text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=25) - # 切割加载的 document - split_docs = text_splitter.split_documents(documents) - vector_store = Chroma.from_documents(split_docs, embeddings) - # 创建问答对象 - qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(),return_source_documents=True) - # 进行问答 - result = qa({"query": query_message}) - return result['result'] - -def summary_each_url(threads, chainllm, prompt): - summary_prompt = PromptTemplate( - input_variables=["web_summary", "question", "language"], - template=( - "You need to response the following question: {question}." - "Your task is answer the above question in {language} based on the Search results provided. Provide a detailed and in-depth response" - "If there is no relevant content in the search results, just answer None, do not make any explanations." - "Search results: {web_summary}." - ), - ) - summary_threads = [] - - for t in threads: - tmp = t.join() - print(tmp) - chain = LLMChain(llm=chainllm, prompt=summary_prompt) - chain_thread = ThreadWithReturnValue(target=chain.run, args=({"web_summary": tmp, "question": prompt, "language": config.LANGUAGE},)) - chain_thread.start() - summary_threads.append(chain_thread) - - url_result = "" - for t in summary_threads: - tmp = t.join() - print("summary", tmp) - if tmp != "None": - url_result += "\n\n" + tmp - return url_result - -def get_search_results(prompt: str, context_max_tokens: int): - - url_text_list = get_url_text_list(prompt) - useful_source_text = "\n\n".join(url_text_list) - # useful_source_text = summary_each_url(threads, chainllm, prompt) - - useful_source_text, search_tokens_len = cut_message(useful_source_text, context_max_tokens) - print("search tokens len", search_tokens_len, "\n\n") - - return useful_source_text - -from typing import Any -from langchain.schema.output import LLMResult -from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler -class ChainStreamHandler(StreamingStdOutCallbackHandler): - def __init__(self): - self.tokens = [] - # 记得结束后这里置true - self.finish = False - self.answer = "" - - def on_llm_new_token(self, token: str, **kwargs): - # print(token) - self.tokens.append(token) - # yield ''.join(self.tokens) - # print(''.join(self.tokens)) - - def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: - self.finish = 1 - - def on_llm_error(self, error: Exception, **kwargs: Any) -> None: - print(str(error)) - self.tokens.append(str(error)) - - def generate_tokens(self): - while not self.finish or self.tokens: - if self.tokens: - data = self.tokens.pop(0) - self.answer += data - yield data - else: - pass - return self.answer \ No newline at end of file diff --git a/test/test_logging.py b/test/test_logging.py deleted file mode 100644 index 5780336a..00000000 --- a/test/test_logging.py +++ /dev/null @@ -1,32 +0,0 @@ -import logging - -class SpecificStringFilter(logging.Filter): - def __init__(self, specific_string): - super().__init__() - self.specific_string = specific_string - - def filter(self, record): - return self.specific_string not in record.getMessage() - -# 创建一个 logger -logger = logging.getLogger('my_logger') -logger.setLevel(logging.DEBUG) - -# 创建一个 console handler,并设置级别为 debug -ch = logging.StreamHandler() -# ch.setLevel(logging.DEBUG) - -# 创建一个 filter 实例 -specific_string = "httpx.RemoteProtocolError: Server disconnected without sending a response." -my_filter = SpecificStringFilter(specific_string) - -# 将 filter 添加到 handler -ch.addFilter(my_filter) - -# 将 handler 添加到 logger -logger.addHandler(ch) - -# 测试日志消息 -logger.debug("This is a debug message.") -logger.error("This message will be ignored: ignore me.httpx.RemoteProtocolError: Server disconnected without sending a response.") -logger.info("Another info message.") \ No newline at end of file diff --git a/test/test_ollama.py b/test/test_ollama.py deleted file mode 100644 index 8412432f..00000000 --- a/test/test_ollama.py +++ /dev/null @@ -1,57 +0,0 @@ -import os -from rich.console import Console -from rich.markdown import Markdown -import json -import requests - -def query_ollama(prompt, model): - # 设置请求的URL和数据 - url = 'http://localhost:11434/api/generate' - data = { - "model": model, - "prompt": prompt, - "stream": True, - } - - response = requests.Session().post( - url, - json=data, - stream=True, - ) - full_response: str = "" - for line in response.iter_lines(): - if not line or line.decode("utf-8")[:6] == "event:" or line.decode("utf-8") == "data: {}": - continue - line = line.decode("utf-8") - # print(line) - resp: dict = json.loads(line) - content = resp.get("response") - if not content: - continue - full_response += content - yield content - -if __name__ == "__main__": - console = Console() - # model = 'llama2' - # model = 'mistral' - # model = 'llama3:8b' - model = 'qwen:14b' - # model = 'wizardlm2:7b' - # model = 'codeqwen:7b-chat' - # model = 'phi' - - # 查询答案 - prompt = r''' - -The Space-Time Video Super-Resolution (STVSR) task aims to enhance the visual quality of videos, by simultaneously performing video frame interpolation (VFI) and video super-resolution (VSR). However, facing the challenge of the additional temporal dimension and scale inconsistency, most existing STVSR methods are complex and inflexible in dynamically modeling different motion amplitudes. In this work, we find that choosing an appropriate processing scale achieves remarkable benefits in flow-based feature propagation. We propose a novel Scale-Adaptive Feature Aggregation (SAFA) network that adaptively selects sub-networks with different processing scales for individual samples. Experiments on four public STVSR benchmarks demonstrate that SAFA achieves state-of-the-art performance. Our SAFA network outperforms recent state-of-the-art methods such as TMNet [83] and VideoINR [10] by an average improvement of over 0.5dB on PSNR, while requiring less than half the number of parameters and only 1/3 computational costs. - -上面的文字翻译成中文 - -''' - answer = "" - for result in query_ollama(prompt, model): - os.system("clear") - answer += result - md = Markdown(answer) - console.print(md, no_wrap=False) diff --git a/test/test_pdf.py b/test/test_pdf.py deleted file mode 100644 index 03a447e2..00000000 --- a/test/test_pdf.py +++ /dev/null @@ -1,21 +0,0 @@ -from pdfminer.high_level import extract_text -text = extract_text('/Users/yanyuming/Desktop/中国计算机学会推荐中文科技期刊目录.pdf') -# text = extract_text('/Users/yanyuming/Library/Mobile Documents/iCloud~QReader~MarginStudy/Documents/论文/VersatileGait- A Large-Scale Synthetic Gait Dataset with Fine-Grained Attributes and Complicated Scenarios.pdf') -# print(repr(text)) -print(text) - -# from io import StringIO -# from pdfminer.high_level import extract_text_to_fp -# from pdfminer.layout import LAParams -# output_string = StringIO() -# with open('/Users/yanyuming/Desktop/Gait review.pdf', 'rb') as fin: -# extract_text_to_fp(fin, output_string, laparams=LAParams(), -# output_type='html', codec=None) -# print(output_string.getvalue().strip()) - -# from io import StringIO -# from pdfminer.high_level import extract_text_to_fp -# output_string = StringIO() -# with open('/Users/yanyuming/Library/Mobile Documents/iCloud~QReader~MarginStudy/Documents/论文/VersatileGait- A Large-Scale Synthetic Gait Dataset with Fine-Grained Attributes and Complicated Scenarios.pdf', 'rb') as fin: -# extract_text_to_fp(fin, output_string) -# print(output_string.getvalue().strip()) \ No newline at end of file diff --git a/test/test_re_agent.py b/test/test_re_agent.py deleted file mode 100644 index acd346be..00000000 --- a/test/test_re_agent.py +++ /dev/null @@ -1,9 +0,0 @@ -import re -matches = re.finditer(r"answer: (.*)", test_str, re.MULTILINE) -result = [] -for matchNum, match in enumerate(matches, start=1): - for groupNum in range(0, len(match.groups())): - groupNum = groupNum + 1 - result.append(match.group(groupNum)) - -print("\n\n".join(result)) diff --git a/test/test_search.py b/test/test_search.py deleted file mode 100644 index 4b993e66..00000000 --- a/test/test_search.py +++ /dev/null @@ -1,8 +0,0 @@ -import os -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from utils.plugins import get_search_results - -for i in get_search_results("今天的微博热搜有哪些?"): - print(i) \ No newline at end of file diff --git a/test/test_string_len.py b/test/test_string_len.py deleted file mode 100644 index 3cb75f0d..00000000 --- a/test/test_string_len.py +++ /dev/null @@ -1,9 +0,0 @@ -print(len(a), len(repr(a)) - 2) -print(len(b), len(repr(b)) - 2) -print(len(c), len(repr(c)) - 2) -print(len(d), len(repr(d)) - 2) -print(len(e), len(repr(e)) - 2) -print(len(f), len(repr(f)) - 2) -print(len(g), len(repr(g)) - 2) -print(len(h), len(repr(h)) - 2) -print(len(i), len(repr(i)) - 2) \ No newline at end of file diff --git a/test/test_summary.py b/test/test_summary.py deleted file mode 100644 index 14e98863..00000000 --- a/test/test_summary.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -os.system("clear") -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import config -import threading -from langchain.prompts import PromptTemplate -from langchain.chains import LLMChain -from langchain.chat_models import ChatOpenAI -from langchain.chat_models import ChatOpenAI - -class ThreadWithReturnValue(threading.Thread): - def run(self): - if self._target is not None: - self._return = self._target(*self._args, **self._kwargs) - - def join(self): - super().join() - return self._return - - -text = """ -HomeReviewsAppleShould You Wait for the iPhone 15?Should You Install the Latest Version of iOS 16?iOS 16 ProblemsHow to Fix iOS 16 ProblemsThings the Apple Watch Can DoHow to Hide iPhone LocationAndroidBest Android Phones Under $200 (2023)Samsung Galaxy S22 ProblemsSamsung Galaxy Android 13 Update Info (2023)How ToHow to Fix iPhone 14 ProblemsHow to Fix iPhone 14 Battery Life ProblemsHow to Fix iPhone 14 Performance ProblemsGamingHow to Fix NBA 2K23 ProblemsBest Offline GamesEditor’s ChoiceAbout Connect with us Gotta Be Mobile5 Things to Know About the macOS Ventura 13.6.1 UpdateHomeReviewsAppleShould You Wait for the iPhone 15?Should You Install the Latest Version of iOS 16?iOS 16 ProblemsHow to Fix iOS 16 ProblemsThings the Apple Watch Can DoHow to Hide iPhone LocationAndroidBest Android Phones Under $200 (2023)Samsung Galaxy S22 ProblemsSamsung Galaxy Android 13 Update Info (2023)How ToHow to Fix iPhone 14 ProblemsHow to Fix iPhone 14 Battery Life ProblemsHow to Fix iPhone 14 Performance ProblemsGamingHow to Fix NBA 2K23 ProblemsBest Offline GamesEditor’s ChoiceAbout Apple5 Things to Know About the macOS Ventura 13.6.1 UpdatePublished 2 weeks ago on 11/16/2023By Adam Mills Apple’s pushed a brand new macOS Ventura 13.6.1 to Mac users.macOS Ventura 13.6.1 is a point release and it’s a small upgrade for Mac lingering on macOS Ventura. The software brings under-the-hood improvements to these devices.With that in mind, we want to take you through everything there you need to know about the latest macOS Ventura update for Mac.Table of ContentsmacOS Ventura 13.6.1 ReviewmacOS Ventura 13.6.1 ProblemsmacOS Ventura 13.6.1 FeaturesmacOS Ventura 13.6.1 DowngradeWhat’s NextWe’ll start out with some quick impressions regarding macOS Ventura 13.6.1 update’s performance.macOS Ventura 13.6.1 ReviewIf your Mac is currently running macOS Ventura 13.6, you’ll see a fairly small download. After the download, the installation should take around 10 or so minutes to complete.For more on the macOS Ventura 13.6.1 download and installation, take a look at our guide. It’ll walk you through everything you need to know before you install the new firmware on your Mac.We’ve been using the macOS Ventura 13.6.1 update on a MacBook Pro (2017) for several days now and performance is holding up nicely. Here’s what we’ve learned about its performance:macOS Ventura 13.6.1 Battery Life & ConnectivityBattery life is stable right now and we haven’t noticed any abnormal drainWi-Fi connectivity is currently fast and reliableBluetooth is working normallyApp PerformanceThird-party apps, including Twitter, Slack, Chrome, and Spotify are working properlyFirst party apps like Safari and Podcasts are also stable at the momentSpeedmacOS Ventura 13.6.1 feels fast and we haven’t noticed any UI lagIf your Mac is struggling on macOS Ventura 13.6 or an older version of macOS, you might want to install macOS Ventura 13.6.1 on your Mac right now.If you’re feeling leery, we’ve put together a list of the best reasons to, and not to, install the latest version of macOS Ventura on you Mac today.macOS Ventura 13.6.1 ProblemsApple tested macOS Ventura 13.6.1 for issues behind the scenes, but problems have unsurprisingly made their way into the public releaseWe haven’t noticed any major bugs or performance issues yet, but some users are complaining about performance issues and bugs.The current list of problems includes abnormal battery drain, Wi-Fi issues, Bluetooth problems, and problems with various first and third-party applications.If you run into issues on macOS Ventura 13.6.1, here are a few resources that could help.Before you get in touch with Apple’s customer support, have a look at our list of fixes for the most common macOS Ventura problems.If your Mac’s battery life is struggling after the upgrade, check out our fixes for macOS Ventura battery life problems.If you can’t find what you’re looking for in our guides, head over to Apple’s discussion forums. If the users there can’t help you, you might want to get in touch Apple customer support via the company’s website.If you still can’t fix your issue(s), you’ll probably want to schedule an appointment at your local Apple Store.macOS Ventura 13.6.1 FeaturesApple’s maintenance updates typically bring under-the-hood improvements to Mac devices and that’s exactly what macOS Ventura 13.6.1 brings to the table.The software doesn’t have any new features or bug fixes on board, but instead delivers security patches to Mac devices. You can learn more about the patches over on Apple’s security site.macOS Ventura 13.6.1 DowngradeIf you run into problems on macOS Ventura 13.6.1, you can try moving your Mac’s software back to an older version if you think it could help.If you’re interested in moving back to older software, have a look at our downgrade guide. It will help you get familiar with the process and make the move back.What’s NextIf your Mac is struggling on macOS Ventura 13.6.1 or an older version of macOS, you’ll be on your own for a bit.macOS Ventura 13.6.1 won’t be the last version of macOS Ventura, but it’s unclear when the next version will roll out to those still utilizing the operating system.We don’t expect Apple to bring new features to macOS Ventura down the line, but we do expect the company to continue offering security updates.These upgrades will probably roll out alongside new versions of macOS Sonoma so be on the lookout in the weeks ahead. The next version of macOS Sonoma should roll out in December.4 Reasons Not to Install macOS Ventura 13.6.1 & 11 Reasons You ShouldInstall macOS Ventura 13.6.1 for Better Security>1 / 15If you want to protect your Mac and its data from harm, you'll probably want to download the newest version of macOS Ventura.macOS Ventura 13.6.1's focus is on security and the firmware brings important security patches to Macs. The software patches up issues with FindMy, PassKeys, and more.As for older updates, Apple's macOS Ventura 13.6 update had a trio of security patches on board. If you're interested in the particulars, check out the company's security site.macOS Ventura 13.5.2 brought one security patch with it and it was an important one. If you want more information about the fix, head over to Apple.macOS Ventura 13.5 brought 29 security patches to Mac. To learn more about these patches, head over to Apple's website. macOS Ventura 13.4.1 had two essential security patches on board. If you want to learn more about them, head on over to Apple's website.Apple's macOS Security Response 13.4.1 (c) update also included a security update and you can learn more about it right here.macOS Ventura 13.4 had numerous security updates on board and you can read more about them on Apple's security site.macOS Ventura 13.3.1 patched up two actively exploited vulnerabilities which made it an important download. You can learn about the two patches right here.The macOS Ventura 13.3 update brought numerous security patches to Macs. If you want to learn more about these changes, check out Apple's guide.macOS Ventura 13.2.1 brought four security patches to compatible Mac models. You can read more about those over on Apple's security site.The macOS Ventura 13.2 update brought 25 security patches to Mac devices. You can read about those right here.In addition, the software came with support for physical security keys for Apple ID. These will beef up your account security by requiring a physical security key as part of the two factor authentication sign in process. Learn more about the change right here.macOS Ventura 13.1 delivered 35 security patches and it also brought upgrades to Advanced Data Protection for iCloud.macOS Ventura 13.0.1 brought two security patches and the first version of macOS Ventura, macOS Ventura 13.0, had a ton of patches on board. >1 / 15Related Topics:AppleMacBookMacBook PromacOSmacOS VenturamacOS Ventura 13.6.1macOS Ventura 13.6.1 changesmacOS Ventura 13.6.1 featuresmacOS Ventura 13.6.1 issuesmacOS Ventura 13.6.1 performancemacOS Ventura 13.6.1 problemsmacOS Ventura 13.6.1 reviewmacOS Ventura 13.6.1 reviewsmacOS Ventura 13.6.1 softwaremacOS Ventura 13.6.1 updatemacOS Ventura 13.6.1 upgradeSoftwareTechnologyClick to commentLeave a Reply Cancel replyYour email address will not be published. Required fields are marked *Comment * Name * Email * Website Save my name, email, and website in this browser for the next time I comment. Δ As an Amazon Associate I earn from qualifying purchases. Privacy Policy Copyright © 2023 SXL Media Group, Inc. -""" - -summary_prompt = PromptTemplate( - input_variables=["web_summary", "question", "language"], - template=( - "You need to response the following question: {question}." - "Your task is answer the above question in {language} based on the Search results provided." - "Search results: {web_summary}." - # "If there is no relevant content in the search results, just answer None, do not make any explanations." - ), -) -chainllm = ChatOpenAI(temperature=config.temperature, openai_api_base=config.bot_api_url.v1_url, model_name=config.GPT_ENGINE, openai_api_key=config.API) - -chain = LLMChain(llm=chainllm, prompt=summary_prompt) - -# result = chain.run({"web_summary": text, "question": "macos 13.6 有什么新功能", "language": config.LANGUAGE}) -chain_thread = ThreadWithReturnValue(target=chain.run, args=({"web_summary": text, "question": "macos 13.6 有什么新功能", "language": config.LANGUAGE},)) -chain_thread.start() -result = chain_thread.join() -print(result) \ No newline at end of file diff --git a/test/test_tikitoken.py b/test/test_tikitoken.py deleted file mode 100644 index 38d3f5ec..00000000 --- a/test/test_tikitoken.py +++ /dev/null @@ -1,19 +0,0 @@ -import tiktoken -tiktoken.get_encoding("cl100k_base") -tiktoken.model.MODEL_TO_ENCODING["claude-2.1"] = "cl100k_base" -tiktoken.get_encoding("cl100k_base") -encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-16k") -# encoding = tiktoken.encoding_for_model("claude-2.1") -encode_web_text_list = [] -if encode_web_text_list == []: - encode_web_text_list = encoding.encode("Hello, my dog is cute") - print("len", len(encode_web_text_list)) -function_response = encoding.decode(encode_web_text_list[:2]) -print(function_response) -encode_web_text_list = encode_web_text_list[2:] -print(encode_web_text_list) -encode_web_text_list = [856, 5679, 374, 19369] -tiktoken.get_encoding("cl100k_base") -encoding1 = tiktoken.encoding_for_model("gpt-3.5-turbo-16k") -function_response = encoding1.decode(encode_web_text_list[:2]) -print(function_response) \ No newline at end of file diff --git a/test/test_token.py b/test/test_token.py deleted file mode 100644 index e182f7f7..00000000 --- a/test/test_token.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import tiktoken -from utils.function_call import function_call_list -import config -import requests -import json -import re - -from dotenv import load_dotenv -load_dotenv() - -def get_token_count(messages) -> int: - tiktoken.get_encoding("cl100k_base") - encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") - - num_tokens = 0 - for message in messages: - # every message follows {role/name}\n{content}\n - num_tokens += 5 - for key, value in message.items(): - if value: - num_tokens += len(encoding.encode(value)) - if key == "name": # if there's a name, the role is omitted - num_tokens += 5 # role is always required and always 1 token - num_tokens += 5 # every reply is primed with assistant - return num_tokens -# print(get_token_count(message_list)) - - - -def get_message_token(url, json_post): - headers = {"Authorization": f"Bearer {os.environ.get('API', None)}"} - response = requests.Session().post( - url, - headers=headers, - json=json_post, - timeout=None, - ) - if response.status_code != 200: - json_response = json.loads(response.text) - string = json_response["error"]["message"] - print(string) - string = re.findall(r"\((.*?)\)", string)[0] - numbers = re.findall(r"\d+\.?\d*", string) - numbers = [int(i) for i in numbers] - if len(numbers) == 2: - return { - "messages": numbers[0], - "total": numbers[0], - } - elif len(numbers) == 3: - return { - "messages": numbers[0], - "functions": numbers[1], - "total": numbers[0] + numbers[1], - } - else: - raise Exception("Unknown error") - - -if __name__ == "__main__": - # message_list = [{'role': 'system', 'content': 'You are ChatGPT, a large language model trained by OpenAI. Respond conversationally in Simplified Chinese. Knowledge cutoff: 2021-09. Current date: [ 2023-12-12 ]'}, {'role': 'user', 'content': 'hi'}] - messages = [{'role': 'system', 'content': 'You are ChatGPT, a large language model trained by OpenAI. Respond conversationally in Simplified Chinese. Knowledge cutoff: 2021-09. Current date: [ 2023-12-12 ]'}, {'role': 'user', 'content': 'hi'}, {'role': 'assistant', 'content': '你好!有什么我可以帮助你的吗?'}] - - model = "gpt-3.5-turbo" - temperature = 0.5 - top_p = 0.7 - presence_penalty = 0.0 - frequency_penalty = 0.0 - reply_count = 1 - role = "user" - model_max_tokens = 5000 - url = config.bot_api_url.chat_url - - json_post = { - "model": model, - "messages": messages, - "stream": True, - "temperature": temperature, - "top_p": top_p, - "presence_penalty": presence_penalty, - "frequency_penalty": frequency_penalty, - "n": reply_count, - "user": role, - "max_tokens": model_max_tokens, - } - # json_post.update(function_call_list["base"]) - # if config.PLUGINS["SEARCH_USE_GPT"]: - # json_post["functions"].append(function_call_list["SEARCH_USE_GPT"]) - # json_post["functions"].append(function_call_list["URL"]) - # print(get_token_count(message_list)) - print(get_message_token(url, json_post)) diff --git a/test/test_tools_class.py b/test/test_tools_class.py deleted file mode 100644 index 9fc98238..00000000 --- a/test/test_tools_class.py +++ /dev/null @@ -1,243 +0,0 @@ -import json - -class ToolsBase: - def __init__(self, data): - if not isinstance(data, dict): - raise ValueError("Input should be a dictionary.") - for key, value in data.items(): - setattr(self, key, value) - - @property - def name(self): - return self._name - - @property - def description(self): - return self._description - - def to_json(self): - return json.dumps({k: getattr(self, k) for k in vars(self) if not k.startswith("__")}, ensure_ascii=False) - -class GPTFunctionCall(ToolsBase): - - @property.setter - def name(self, value): - if not isinstance(value, str): - raise ValueError("Name should be a string.") - self._name = value - - @property - def description(self): - return self._description - - @property - def parameters(self): - return self._parameters - - def to_json(self): - params = {k: getattr(self.parameters, k) for k in vars(self.parameters) if not k.startswith("__")} - return json.dumps({'name': self.name, 'description': self.description, 'parameters': params}, ensure_ascii=False) - -class CLAUDEToolsUse(ToolsBase): - - @property.setter - def name(self, value): - if not isinstance(value, str): - raise ValueError("Name should be a string.") - self._name = value - - @property - def description(self): - return self._description - - @property - def input_schema(self): - return self._input_schema - - def to_json(self): - props = {k: getattr(self.input_schema, k) for k in vars(self.input_schema['properties']) if not k.startswith("__")} - required = self.input_schema['required'] - return json.dumps({'name': self.name, 'description': self.description, 'input_schema': {'type': -'object', 'properties': props, 'required': required}}, ensure_ascii=False) -# 示例 -gpt_function_call = GPTFunctionCall({"name": "get_search_results", "description": "Search Google to enhance knowledge.", "parameters": {"type": "object", "properties": {"prompt": {"type": "string", "description": "The prompt to search."}}, "required": ["prompt"]}}) -print(gpt_function_call.to_json()) -claude_tools_use = CLAUDEToolsUse({"name": "get_stock_price", "description": "Get the current stock pricefor a given ticker symbol.", "input_schema": {"type": "object", "properties": {"ticker": {"type": "string","description": "The stock ticker symbol, e.g. AAPL for Apple Inc."}}, "required": ["ticker"]}}) -print(claude_tools_use.to_json()) - -class claude3bot: - def __init__( - self, - api_key: str, - engine: str = os.environ.get("GPT_ENGINE") or "claude-3-opus-20240229", - temperature: float = 0.5, - top_p: float = 0.7, - chat_url: str = "https://api.anthropic.com/v1/messages", - timeout: float = 20, - system_prompt: str = "You are ChatGPT, a large language model trained by OpenAI. Respond conversationally", - **kwargs, - ): - self.api_key: str = api_key - self.engine: str = engine - self.temperature = temperature - self.top_p = top_p - self.chat_url = chat_url - self.timeout = timeout - self.session = requests.Session() - self.conversation: dict[str, list[dict]] = { - "default": [], - } - self.system_prompt = system_prompt - - def add_to_conversation( - self, - message: str, - role: str, - convo_id: str = "default", - pass_history: bool = True, - ) -> None: - """ - Add a message to the conversation - """ - - if convo_id not in self.conversation or pass_history == False: - self.reset(convo_id=convo_id) - # print("message", message) - self.conversation[convo_id].append({"role": role, "content": message}) - index = len(self.conversation[convo_id]) - 2 - if index >= 0 and self.conversation[convo_id][index]["role"] == self.conversation[convo_id][index + 1]["role"]: - self.conversation[convo_id][index]["content"] += self.conversation[convo_id][index + 1]["content"] - self.conversation[convo_id].pop(index + 1) - - def reset(self, convo_id: str = "default", system_prompt: str = None) -> None: - """ - Reset the conversation - """ - self.conversation[convo_id] = list() - - def __truncate_conversation(self, convo_id: str = "default") -> None: - """ - Truncate the conversation - """ - while True: - if ( - self.get_token_count(convo_id) > self.truncate_limit - and len(self.conversation[convo_id]) > 1 - ): - # Don't remove the first message - self.conversation[convo_id].pop(1) - else: - break - - def get_token_count(self, convo_id: str = "default") -> int: - """ - Get token count - """ - if self.engine not in ENGINES: - raise NotImplementedError( - f"Engine {self.engine} is not supported. Select from {ENGINES}", - ) - tiktoken.model.MODEL_TO_ENCODING["claude-2.1"] = "cl100k_base" - encoding = tiktoken.encoding_for_model(self.engine) - - num_tokens = 0 - for message in self.conversation[convo_id]: - # every message follows {role/name}\n{content}\n - num_tokens += 5 - for key, value in message.items(): - if value: - num_tokens += len(encoding.encode(value)) - if key == "name": # if there's a name, the role is omitted - num_tokens += 5 # role is always required and always 1 token - num_tokens += 5 # every reply is primed with assistant - return num_tokens - - def ask_stream( - self, - prompt: str, - role: str = "user", - convo_id: str = "default", - model: str = None, - pass_history: bool = True, - model_max_tokens: int = 4096, - **kwargs, - ): - pass_history = True - if convo_id not in self.conversation or pass_history == False: - self.reset(convo_id=convo_id) - self.add_to_conversation(prompt, role, convo_id=convo_id) - # self.__truncate_conversation(convo_id=convo_id) - # print(self.conversation[convo_id]) - - url = self.chat_url - headers = { - "x-api-key": f"{kwargs.get('api_key', self.api_key)}", - "anthropic-version": "2023-06-01", - "content-type": "application/json", - "anthropic-beta": "tools-2024-04-04" - } - - json_post = { - "model": os.environ.get("MODEL_NAME") or model or self.engine, - "messages": self.conversation[convo_id] if pass_history else [{ - "role": "user", - "content": prompt - }], - "temperature": kwargs.get("temperature", self.temperature), - "top_p": kwargs.get("top_p", self.top_p), - "max_tokens": model_max_tokens, - # "stream": True, - } - - # json_post.update(copy.deepcopy(json_post)) - json_post.update(claude_tools_list["base"]) - for item in config.PLUGINS.keys(): - try: - if config.PLUGINS[item]: - json_post["tools"].append(claude_tools_list[item]) - except: - pass - - if self.system_prompt: - json_post["system"] = self.system_prompt - print(json.dumps(json_post, indent=4, ensure_ascii=False)) - - try: - response = self.session.post( - url, - headers=headers, - json=json_post, - timeout=kwargs.get("timeout", self.timeout), - stream=True, - ) - except ConnectionError: - print("连接错误,请检查服务器状态或网络连接。") - return - except Exception as e: - print(f"发生了未预料的错误: {e}") - return - - if response.status_code != 200: - print(response.text) - raise BaseException(f"{response.status_code} {response.reason} {response.text}") - response_role: str = "assistant" - full_response: str = "" - for line in response.iter_lines(): - if not line or line.decode("utf-8")[:6] == "event:" or line.decode("utf-8") == "data: {}": - continue - print(line.decode("utf-8")) - if "tool_use" in line.decode("utf-8"): - tool_input = json.loads(line.decode("utf-8")["content"][1]["input"]) - else: - line = line.decode("utf-8")[6:] - resp: dict = json.loads(line) - delta = resp.get("delta") - if not delta: - continue - if "text" in delta: - content = delta["text"] - full_response += content - yield content - self.add_to_conversation(full_response, response_role, convo_id=convo_id) - # print(repr(self.conversation.Conversation(convo_id))) - # print("total tokens:", self.get_token_count(convo_id)) \ No newline at end of file diff --git a/test/test_url.py b/test/test_url.py deleted file mode 100644 index e63c8b44..00000000 --- a/test/test_url.py +++ /dev/null @@ -1,33 +0,0 @@ -import re -import datetime - -def sort_by_time(urls): - def extract_date(url): - match = re.search(r'[12]\d{3}.\d{1,2}.\d{1,2}', url) - if match is not None: - match = re.sub(r'([12]\d{3}).(\d{1,2}).(\d{1,2})', "\\1/\\2/\\3", match.group()) - print(match) - if int(match[:4]) > datetime.datetime.now().year: - match = "1000/01/01" - else: - match = "1000/01/01" - try: - return datetime.datetime.strptime(match, '%Y/%m/%d') - except: - match = "1000/01/01" - return datetime.datetime.strptime(match, '%Y/%m/%d') - - # 提取日期并创建一个包含日期和URL的元组列表 - date_url_pairs = [(extract_date(url), url) for url in urls] - - # 按日期排序 - date_url_pairs.sort(key=lambda x: x[0], reverse=True) - - # 获取排序后的URL列表 - sorted_urls = [url for _, url in date_url_pairs] - - return sorted_urls - -if __name__ == "__main__": - urls = ['https://www.bbc.com/zhongwen/simp/chinese-news-58392571', 'https://glginc.cn/articles/china-gaming-regulation-impact/', 'https://www.gov.cn/zhengce/2021-08/30/content_5634208.htm', 'https://zwgk.mct.gov.cn/zfxxgkml/zcfg/zcjd/202012/t20201205_915382.html', 'https://www.aljazeera.com/news/2023/12/23/china-considers-revising-gaming-rules-after-tech-giants-lose-billions', 'https://www.reuters.com/world/china/china-issues-draft-rules-online-game-management-2023-12-22/', 'https://www.cnn.com/2023/12/22/business/chinese-tech-giants-shares-plunge-online-gaming-ban-intl-hnk/index.html', 'https://www.bbc.com/news/technology-67801091', 'https://news.cctv.com/2023/12/22/ARTIUFZFQtfoBp1tfwsq1w1B231222.shtml', 'https://news.sina.com.cn/c/2023-12-22/doc-imzywncy6795505.shtml', 'https://www.thepaper.cn/newsDetail_forward_25728500', 'https://new.qq.com/rain/a/20230907A01LKT00'] - print(sort_by_time(urls)) \ No newline at end of file diff --git a/test/test_whisper.py b/test/test_whisper.py deleted file mode 100644 index be81eacf..00000000 --- a/test/test_whisper.py +++ /dev/null @@ -1,14 +0,0 @@ -import requests -import os - -headers = { - "Authorization": f"Bearer {os.environ.get('API', None)}", - "Content-Type": "multipart/form-data" -} -files = { - 'file': ('filename', open('/path/to/file/audio.mp3', 'rb'), 'audio/mpeg'), - 'model': (None, 'whisper-1') -} - -response = requests.post(os.environ.get('API_URL', None), headers=headers, files=files) -print(response.text) \ No newline at end of file diff --git a/test/test_yield.py b/test/test_yield.py deleted file mode 100644 index f7eb846c..00000000 --- a/test/test_yield.py +++ /dev/null @@ -1,24 +0,0 @@ -# def my_generator(): -# r = 0 -# for i in range(5): -# r += 1 -# yield i - -# def a(): -# yield from my_generator() -# print(1) - -def my_generator(): - r = 0 - for i in range(5): - r += 1 - yield i - return r - -def a(): - r = yield from my_generator() - print(r) - print(1) - -for item in a(): - print(item) \ No newline at end of file diff --git a/vercel.json b/vercel.json deleted file mode 100644 index c8126cf5..00000000 --- a/vercel.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "version": 2, - "builds": [ - { - "src": "bot.py", - "use": "@vercel/python" - } - ], - "routes": [ - { - "src": "/(.*)", - "dest": "bot.py" - } - ] - } \ No newline at end of file