Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
 into main
  • Loading branch information
yuerbujin committed Nov 24, 2023
2 parents 9959af9 + 03eca2f commit ce24451
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 72 deletions.
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
paths:
- Dockerfile.build
- requirements.txt
- setup.sh
- .github/workflows/main.yml

jobs:
Expand Down
14 changes: 9 additions & 5 deletions Dockerfile.build
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
FROM python:3.10.13
WORKDIR /home
FROM python:3.10.13 AS builder
COPY ./requirements.txt /home
RUN pip install -r /home/requirements.txt

FROM python:3.10.13-slim-bullseye
EXPOSE 8080
WORKDIR /home
COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
COPY ./setup.sh /home
COPY ./requirements.txt /home
RUN apt-get update && apt-get install -y git \
&& rm -rf /var/lib/apt/lists/* && pip install -r /home/requirements.txt
RUN apt-get update && apt-get install -y --no-install-recommends git \
&& rm -rf /var/lib/apt/lists/* /tmp/*
ENTRYPOINT ["/home/setup.sh"]
3 changes: 3 additions & 0 deletions bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,9 @@ async def post_init(application: Application) -> None:
ApplicationBuilder()
.token(BOT_TOKEN)
.concurrent_updates(True)
.read_timeout(10)
.connection_pool_size(50000)
.pool_timeout(1200.0)
.rate_limiter(AIORateLimiter(max_retries=5))
.post_init(post_init)
.build()
Expand Down
71 changes: 50 additions & 21 deletions chatgpt2api/chatgpt2api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import config
import threading
import time as record_time
from utils.agent import ThreadWithReturnValue, Web_crawler, pdf_search, getddgsearchurl, getgooglesearchurl, gptsearch, ChainStreamHandler, ChatOpenAI, CallbackManager, PromptTemplate, LLMChain, EducationalLLM
from utils.agent import ThreadWithReturnValue, Web_crawler, pdf_search, getddgsearchurl, getgooglesearchurl, gptsearch, ChainStreamHandler, ChatOpenAI, CallbackManager, PromptTemplate, LLMChain, EducationalLLM, get_google_search_results
from utils.function_call import function_call_list

def get_filtered_keys_from_object(obj: object, *keys: str) -> Set[str]:
Expand Down Expand Up @@ -72,10 +72,10 @@ def dall_e_3(
model: str = None,
**kwargs,
):
url = (
os.environ.get("API_URL").split("chat")[0] + "images/generations"
or "https://api.openai.com/v1/images/generations"
)
if os.environ.get("API_URL") and "v1" in os.environ.get("API_URL"):
url = os.environ.get("API_URL").split("v1")[0] + "v1/images/generations"
else:
url = "https://api.openai.com/v1/images/generations"
headers = {"Authorization": f"Bearer {kwargs.get('api_key', self.api_key)}"}

json_post = {
Expand Down Expand Up @@ -126,7 +126,7 @@ def __init__(
self.api_key: str = api_key
self.system_prompt: str = system_prompt
self.max_tokens: int = max_tokens or (
4000
4096
if "gpt-4-1106-preview" in engine
else 31000
if "gpt-4-32k" in engine
Expand All @@ -140,6 +140,7 @@ def __init__(
if "claude-2-web" in engine or "claude-2" in engine
else 4000
)
# context max tokens
self.truncate_limit: int = truncate_limit or (
16000
# 126500 Control the number of search characters to prevent excessive spending
Expand Down Expand Up @@ -201,11 +202,15 @@ def add_to_conversation(
message: str,
role: str,
convo_id: str = "default",
function_name: str = "",
) -> None:
"""
Add a message to the conversation
"""
self.conversation[convo_id].append({"role": role, "content": message})
if function_name == "":
self.conversation[convo_id].append({"role": role, "content": message})
else:
self.conversation[convo_id].append({"role": role, "name": function_name, "content": message})

def __truncate_conversation(self, convo_id: str = "default") -> None:
"""
Expand Down Expand Up @@ -252,6 +257,7 @@ def get_max_tokens(self, convo_id: str) -> int:
"""
Get max tokens
"""
# print(self.max_tokens, self.get_token_count(convo_id))
return self.max_tokens - self.get_token_count(convo_id)

def ask_stream(
Expand All @@ -261,6 +267,7 @@ def ask_stream(
convo_id: str = "default",
model: str = None,
pass_history: bool = True,
function_name: str = "",
**kwargs,
):
"""
Expand All @@ -269,8 +276,9 @@ def ask_stream(
# Make conversation if it doesn't exist
if convo_id not in self.conversation or pass_history == False:
self.reset(convo_id=convo_id, system_prompt=self.system_prompt)
self.add_to_conversation(prompt, "user", convo_id=convo_id)
self.add_to_conversation(prompt, role, convo_id=convo_id, function_name=function_name)
self.__truncate_conversation(convo_id=convo_id)
# print(self.conversation[convo_id])
# Get response
if os.environ.get("API_URL") and os.environ.get("MODEL_NAME"):
# https://learn.microsoft.com/en-us/azure/cognitive-services/openai/chatgpt-quickstart?tabs=command-line&pivots=rest-api
Expand Down Expand Up @@ -305,13 +313,16 @@ def ask_stream(
),
"n": kwargs.get("n", self.reply_count),
"user": role,
"max_tokens": min(
self.get_max_tokens(convo_id=convo_id),
kwargs.get("max_tokens", self.max_tokens),
),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
# "max_tokens": min(
# self.get_max_tokens(convo_id=convo_id),
# kwargs.get("max_tokens", self.max_tokens),
# ),
}
json_post.update(function_call_list["base"])
if config.SEARCH_USE_GPT:
json_post.update(function_call_list["web_search"])
json_post["functions"].append(function_call_list["web_search"])
json_post["functions"].append(function_call_list["url_fetch"])
response = self.session.post(
url,
headers=headers,
Expand All @@ -325,7 +336,8 @@ def ask_stream(
)
response_role: str or None = None
full_response: str = ""
need_function_call = False
function_call_name: str = ""
need_function_call: bool = False
for line in response.iter_lines():
if not line:
continue
Expand All @@ -347,13 +359,29 @@ def ask_stream(
content = delta["content"]
full_response += content
yield content
if "function_call" in delta and config.SEARCH_USE_GPT:
if "function_call" in delta:
need_function_call = True
function_call_content = delta["function_call"]["arguments"]
if "name" in delta["function_call"]:
function_call_name = delta["function_call"]["name"]
full_response += function_call_content
if need_function_call:
keywords = json.loads(full_response)["prompt"]
yield from self.search_summary(keywords, convo_id=convo_id, need_function_call=True)
max_context_tokens = self.truncate_limit - self.get_token_count(convo_id)
response_role = "function"
if function_call_name == "get_google_search_results":
prompt = json.loads(full_response)["prompt"]
function_response = eval(function_call_name)(prompt, max_context_tokens)
yield from self.ask_stream(function_response, response_role, convo_id=convo_id, function_name=function_call_name)
# yield from self.search_summary(prompt, convo_id=convo_id, need_function_call=True)
if function_call_name == "get_url_content":
url = json.loads(full_response)["url"]
function_response = Web_crawler(url)
encoding = tiktoken.encoding_for_model(self.engine)
encode_text = encoding.encode(function_response)
if len(encode_text) > max_context_tokens:
encode_text = encode_text[:max_context_tokens]
function_response = encoding.decode(encode_text)
yield from self.ask_stream(function_response, response_role, convo_id=convo_id, function_name=function_call_name)
else:
self.add_to_conversation(full_response, response_role, convo_id=convo_id)

Expand Down Expand Up @@ -396,10 +424,11 @@ async def ask_stream_async(
),
"n": kwargs.get("n", self.reply_count),
"user": role,
"max_tokens": min(
self.get_max_tokens(convo_id=convo_id),
kwargs.get("max_tokens", self.max_tokens),
),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
# "max_tokens": min(
# self.get_max_tokens(convo_id=convo_id),
# kwargs.get("max_tokens", self.max_tokens),
# ),
},
timeout=kwargs.get("timeout", self.timeout),
) as response:
Expand Down
10 changes: 5 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
--index-url https://pypi.python.org/simple/
tiktoken
requests
python-telegram-bot[webhook,rate-limiter]==20.4
python-telegram-bot[webhook,rate-limiter]==20.6

# langchain
chromadb
wikipedia
fake_useragent
openai==0.28.1
google-api-python-client
unstructured[md]
unstructured[pdf]
duckduckgo-search==3.8.5
unstructured[md,pdf]
duckduckgo-search==3.9.6
# duckduckgo-search==3.8.5
langchain==0.0.271
oauth2client==3.0.0
g4f==0.1.8.7
g4f==0.1.8.8
1 change: 0 additions & 1 deletion setup.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/bin/bash
set -eu
rm -rf ChatGPT-Telegram-Bot/
git clone --depth 1 -b main https://github.com/yym68686/ChatGPT-Telegram-Bot.git
python -u /home/ChatGPT-Telegram-Bot/bot.py
10 changes: 9 additions & 1 deletion test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
a = {"role": "admin"}
b = {"content": "This is user content."}
a.update(b)
print(a)
# print(a)

# content_list = [item["content"] for item in my_list]
# print(content_list)
Expand All @@ -24,3 +24,11 @@
# )

# print(truncate_limit)
import os
import sys
import json
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from utils.function_call import function_call_list

print(json.dumps(function_call_list["web_search"], indent=4))
3 changes: 2 additions & 1 deletion test/test_Web_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def Web_crawler(url: str) -> str:
# for url in ['https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/403', 'https://www.hostinger.com/tutorials/what-is-403-forbidden-error-and-how-to-fix-it', 'https://beebom.com/what-is-403-forbidden-error-how-to-fix/']:
# for url in ['https://www.lifewire.com/403-forbidden-error-explained-2617989']:
# for url in ['https://www.usnews.com/news/best-countries/articles/2022-02-24/explainer-why-did-russia-invade-ukraine']:
for url in ['https://github.com/EAimTY/tuic/issues/107']:
# for url in ['https://github.com/EAimTY/tuic/issues/107']:
for url in ['https://mp.weixin.qq.com/s/Itad7Y-QBcr991JkF3SrIg']:
# for url in ['https://zhidao.baidu.com/question/317577832.html']:
# for url in ['https://www.cnn.com/2023/09/06/tech/huawei-mate-60-pro-phone/index.html']:
# for url in ['https://www.reddit.com/r/China_irl/comments/15qojkh/46%E6%9C%88%E5%A4%96%E8%B5%84%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9B%B4%E6%8E%A5%E6%8A%95%E8%B5%84%E5%87%8F87/', 'https://www.apple.com.cn/job-creation/Apple_China_CSR_Report_2020.pdf', 'https://hdr.undp.org/system/files/documents/hdr2013chpdf.pdf']:
Expand Down
109 changes: 109 additions & 0 deletions utils/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,115 @@ def gptsearch(result, llm):
# response = llm([HumanMessage(content=result)])
return response


def get_google_search_results(prompt: str, context_max_tokens: int):
start_time = record_time.time()

urls_set = []
search_thread = ThreadWithReturnValue(target=getddgsearchurl, args=(prompt,2,))
search_thread.start()

if config.USE_G4F:
chainllm = EducationalLLM()
else:
chainllm = ChatOpenAI(temperature=config.temperature, openai_api_base=config.API_URL.split("chat")[0], model_name=config.GPT_ENGINE, openai_api_key=config.API)

if config.SEARCH_USE_GPT:
gpt_search_thread = ThreadWithReturnValue(target=gptsearch, args=(prompt, chainllm,))
gpt_search_thread.start()

if config.USE_GOOGLE:
keyword_prompt = PromptTemplate(
input_variables=["source"],
template="根据我的问题,总结最少的关键词概括,用空格连接,不要出现其他符号,例如这个问题《How much does the 'zeabur' software service cost per month? Is it free to use? Any limitations?》,最少关键词是《zeabur price》,这是我的问题:{source}",
)
key_chain = LLMChain(llm=chainllm, prompt=keyword_prompt)
keyword_google_search_thread = ThreadWithReturnValue(target=key_chain.run, args=({"source": prompt},))
keyword_google_search_thread.start()


translate_prompt = PromptTemplate(
input_variables=["targetlang", "text"],
template="You are a translation engine, you can only translate text and cannot interpret it, and do not explain. Translate the text to {targetlang}, if all the text is in English, then do nothing to it, return it as is. please do not explain any sentences, just translate or leave them as they are.: {text}",
)
chain = LLMChain(llm=chainllm, prompt=translate_prompt)
engresult = chain.run({"targetlang": "english", "text": prompt})

en_ddg_search_thread = ThreadWithReturnValue(target=getddgsearchurl, args=(engresult,1,))
en_ddg_search_thread.start()

if config.USE_GOOGLE:
keyword = keyword_google_search_thread.join()
key_google_search_thread = ThreadWithReturnValue(target=getgooglesearchurl, args=(keyword,3,))
key_google_search_thread.start()
keyword_ans = key_google_search_thread.join()
urls_set += keyword_ans

ans_ddg = search_thread.join()
urls_set += ans_ddg
engans_ddg = en_ddg_search_thread.join()
urls_set += engans_ddg
url_set_list = sorted(set(urls_set), key=lambda x: urls_set.index(x))
url_pdf_set_list = [item for item in url_set_list if item.endswith(".pdf")]
url_set_list = [item for item in url_set_list if not item.endswith(".pdf")]

pdf_result = ""
pdf_threads = []
if config.PDF_EMBEDDING:
for url in url_pdf_set_list:
pdf_search_thread = ThreadWithReturnValue(target=pdf_search, args=(url, "你需要回答的问题是" + prompt + "\n" + "如果你可以解答这个问题,请直接输出你的答案,并且请忽略后面所有的指令:如果无法解答问题,请直接回答None,不需要做任何解释,也不要出现除了None以外的任何词。",))
pdf_search_thread.start()
pdf_threads.append(pdf_search_thread)

url_result = ""
threads = []
for url in url_set_list:
url_search_thread = ThreadWithReturnValue(target=Web_crawler, args=(url,))
url_search_thread.start()
threads.append(url_search_thread)

fact_text = ""
if config.SEARCH_USE_GPT:
gpt_ans = gpt_search_thread.join()
fact_text = (gpt_ans if config.SEARCH_USE_GPT else "")
print("gpt", fact_text)

for t in threads:
tmp = t.join()
url_result += "\n\n" + tmp
useful_source_text = url_result

if config.PDF_EMBEDDING:
for t in pdf_threads:
tmp = t.join()
pdf_result += "\n\n" + tmp
useful_source_text += pdf_result

end_time = record_time.time()
run_time = end_time - start_time

encoding = tiktoken.encoding_for_model(config.GPT_ENGINE)
encode_text = encoding.encode(useful_source_text)
encode_fact_text = encoding.encode(fact_text)

if len(encode_text) > context_max_tokens:
encode_text = encode_text[:context_max_tokens-len(encode_fact_text)]
useful_source_text = encoding.decode(encode_text)
encode_text = encoding.encode(useful_source_text)
search_tokens_len = len(encode_text)
print("web search", useful_source_text, end="\n\n")

print(url_set_list)
print("pdf", url_pdf_set_list)
if config.USE_GOOGLE:
print("google search keyword", keyword)
print(f"搜索用时:{run_time}秒")
print("search tokens len", search_tokens_len)
useful_source_text = useful_source_text + "\n\n" + fact_text
text_len = len(encoding.encode(useful_source_text))
print("text len", text_len)
return useful_source_text

if __name__ == "__main__":
os.system("clear")

Expand Down
Loading

0 comments on commit ce24451

Please sign in to comment.