Skip to content

Commit

Permalink
1. Modified the prompt for the failure of dalle3 generation.
Browse files Browse the repository at this point in the history
2. Optimized the keyword extraction prompt

3. Fixed the issue where the gpt-4-1106-preview function call search API may return Unicode escape sequences instead of directly returning Unicode characters.

4. Remove Wikipedia dependency
  • Loading branch information
yym68686 committed Dec 9, 2023
1 parent 63e698a commit 47e1731
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 10 deletions.
2 changes: 1 addition & 1 deletion bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ async def image(update, context):
start_messageid = ''
config.API = ''
if "content_policy_violation" in str(e):
await context.bot.edit_message_text(chat_id=chatid, message_id=start_messageid, text="当前 prompt 未能成功生成图片,可能涉及版权等违规内容😣,换句话试试吧~", parse_mode='MarkdownV2', disable_web_page_preview=True)
await context.bot.edit_message_text(chat_id=chatid, message_id=start_messageid, text="当前 prompt 未能成功生成图片,可能因为版权,政治,色情,暴力,种族歧视等违反 OpenAI 的内容政策😣,换句话试试吧~", parse_mode='MarkdownV2', disable_web_page_preview=True)
if "server is busy" in str(e):
await context.bot.edit_message_text(chat_id=chatid, message_id=start_messageid, text="当前服务器繁忙,请稍后再试~", parse_mode='MarkdownV2', disable_web_page_preview=True)
result += f"`出错啦!{e}`"
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
--index-url https://pypi.python.org/simple/
tiktoken
requests
# jieba
python-dotenv
python-telegram-bot[webhooks,rate-limiter]==20.6

# langchain
# chromadb
# unstructured[md,pdf]
# unstructured[md,pdf]
wikipedia
# wikipedia
fake_useragent
openai==0.28.1
google-api-python-client
Expand Down
32 changes: 32 additions & 0 deletions test/test_jieba.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import jieba
import jieba.analyse

# 加载文本
# text = "话说葬送的芙莉莲动漫是半年番还是季番?完结没?"
# text = "民进党当初为什么支持柯文哲选台北市长?"
text = "今天的微博热搜有哪些?"
# text = "How much does the 'zeabur' software service cost per month? Is it free to use? Any limitations?"

# 使用TF-IDF算法提取关键词
keywords_tfidf = jieba.analyse.extract_tags(text, topK=10, withWeight=False, allowPOS=())

# 使用TextRank算法提取关键词
keywords_textrank = jieba.analyse.textrank(text, topK=10, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'))

print("TF-IDF算法提取的关键词:", keywords_tfidf)
print("TextRank算法提取的关键词:", keywords_textrank)


seg_list = jieba.cut(text, cut_all=True)
print("Full Mode: " + " ".join(seg_list)) # 全模式

seg_list = jieba.cut(text, cut_all=False)
print("Default Mode: " + " ".join(seg_list)) # 精确模式

seg_list = jieba.cut(text) # 默认是精确模式
print(" ".join(seg_list))

seg_list = jieba.cut_for_search(text) # 搜索引擎模式
result = " ".join(seg_list)

print([result] * 3)
30 changes: 23 additions & 7 deletions utils/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import config
# import jieba

import asyncio
import tiktoken
Expand Down Expand Up @@ -296,7 +297,7 @@ def getddgsearchurl(result, numresults=3):
urls = re.findall(r"(https?://\S+)\]", webresult, re.MULTILINE)
except Exception as e:
print('\033[31m')
print("error", e)
print("duckduckgo error", e)
print('\033[0m')
urls = []
return urls
Expand All @@ -307,7 +308,7 @@ def getgooglesearchurl(result, numresults=3):
try:
googleresult = google_search.results(result, numresults)
for i in googleresult:
if "No good Google Search Result was found" in i:
if "No good Google Search Result was found" in i or "google.com" in i["link"]:
continue
urls.append(i["link"])
except Exception as e:
Expand Down Expand Up @@ -336,23 +337,32 @@ def get_search_url(prompt, chainllm):
keyword_prompt = PromptTemplate(
input_variables=["source"],
template=(
"根据我的问题,总结最少的关键词概括,给出三行不同的关键词组合,每行的关键词用空格连接,至少有一行关键词里面有中文,至少有一行关键词里面有英文。只要直接给出这三行关键词,不需要其他任何解释,不要出现其他符号。"
"下面是示例:"
"问题1:How much does the 'zeabur' software service cost per month? Is it free to use? Any limitations?"
"根据我的问题,总结最少的关键词概括问题,输出要求如下:"
"1. 给出三行不同的关键词组合,每行的关键词用空格连接。"
"2. 至少有一行关键词里面有中文,至少有一行关键词里面有英文。"
"3. 只要直接给出这三行关键词,不需要其他任何解释,不要出现其他符号和内容。"
"4. 如果问题有关于日漫,至少有一行关键词里面有日文。"
"下面是一些根据问题提取关键词的示例:"
"问题 1:How much does the 'zeabur' software service cost per month? Is it free to use? Any limitations?"
"三行关键词是:"
"zeabur price"
"zeabur documentation"
"zeabur 价格"
"问题2:pplx API 怎么使用?"
"问题 2:pplx API 怎么使用?"
"三行关键词是:"
"pplx API demo"
"pplx API"
"pplx API 使用方法"
"问题3:以色列哈马斯的最新情况"
"问题 3:以色列哈马斯的最新情况"
"三行关键词是:"
"以色列 哈马斯 最新情况"
"Israel Hamas situation"
"哈马斯 以色列 冲突"
"问题 4:话说葬送的芙莉莲动漫是半年番还是季番?完结没?"
"三行关键词是:"
"葬送的芙莉莲"
"葬送のフリーレン"
"Frieren: Beyond Journey's End"
"这是我的问题:{source}"
),
)
Expand All @@ -364,6 +374,12 @@ def get_search_url(prompt, chainllm):
keywords = [item.replace("三行关键词是:", "") for item in keywords if "\\x" not in item]
print("select keywords", keywords)

# # seg_list = jieba.cut_for_search(prompt) # 搜索引擎模式
# seg_list = jieba.cut(prompt, cut_all=True)
# result = " ".join(seg_list)
# keywords = [result] * 3
# print("keywords", keywords)

search_threads = []
urls_set = []
if config.USE_GOOGLE:
Expand Down
9 changes: 8 additions & 1 deletion utils/chatgpt2api.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,14 @@ def ask_stream(
max_context_tokens = self.truncate_limit - self.get_token_count(convo_id) - 500
response_role = "function"
if function_call_name == "get_search_results":
prompt = json.loads(full_response)["prompt"]
# g4t 提取的 prompt 有问题
# prompt = json.loads(full_response)["prompt"]
for index in range(len(self.conversation[convo_id])):
if self.conversation[convo_id][-1 - index]["role"] == "user":
prompt = self.conversation[convo_id][-1 - index]["content"]
print("prompt", prompt)
break
# prompt = self.conversation[convo_id][-1]["content"]
# print(self.truncate_limit, self.get_token_count(convo_id), max_context_tokens)
function_response = eval(function_call_name)(prompt, max_context_tokens)
function_response = "web search results: \n" + function_response
Expand Down

0 comments on commit 47e1731

Please sign in to comment.