Skip to content

Commit

Permalink
Add automatic summarization of web pages based on URL
Browse files Browse the repository at this point in the history
  • Loading branch information
yym68686 committed Nov 23, 2023
1 parent 84c9347 commit d277f5b
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 16 deletions.
50 changes: 35 additions & 15 deletions chatgpt2api/chatgpt2api.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def __init__(
self.api_key: str = api_key
self.system_prompt: str = system_prompt
self.max_tokens: int = max_tokens or (
4000
4096
if "gpt-4-1106-preview" in engine
else 31000
if "gpt-4-32k" in engine
Expand All @@ -140,6 +140,7 @@ def __init__(
if "claude-2-web" in engine or "claude-2" in engine
else 4000
)
# context max tokens
self.truncate_limit: int = truncate_limit or (
16000
# 126500 Control the number of search characters to prevent excessive spending
Expand Down Expand Up @@ -201,11 +202,15 @@ def add_to_conversation(
message: str,
role: str,
convo_id: str = "default",
function_name: str = "",
) -> None:
"""
Add a message to the conversation
"""
self.conversation[convo_id].append({"role": role, "content": message})
if function_name == "":
self.conversation[convo_id].append({"role": role, "content": message})
else:
self.conversation[convo_id].append({"role": role, "name": function_name, "content": message})

def __truncate_conversation(self, convo_id: str = "default") -> None:
"""
Expand Down Expand Up @@ -252,6 +257,7 @@ def get_max_tokens(self, convo_id: str) -> int:
"""
Get max tokens
"""
# print(self.max_tokens, self.get_token_count(convo_id))
return self.max_tokens - self.get_token_count(convo_id)

def ask_stream(
Expand All @@ -261,6 +267,7 @@ def ask_stream(
convo_id: str = "default",
model: str = None,
pass_history: bool = True,
function_name: str = "",
**kwargs,
):
"""
Expand All @@ -269,8 +276,9 @@ def ask_stream(
# Make conversation if it doesn't exist
if convo_id not in self.conversation or pass_history == False:
self.reset(convo_id=convo_id, system_prompt=self.system_prompt)
self.add_to_conversation(prompt, "user", convo_id=convo_id)
self.add_to_conversation(prompt, role, convo_id=convo_id, function_name=function_name)
self.__truncate_conversation(convo_id=convo_id)
# print(self.conversation[convo_id])
# Get response
if os.environ.get("API_URL") and os.environ.get("MODEL_NAME"):
# https://learn.microsoft.com/en-us/azure/cognitive-services/openai/chatgpt-quickstart?tabs=command-line&pivots=rest-api
Expand Down Expand Up @@ -305,13 +313,15 @@ def ask_stream(
),
"n": kwargs.get("n", self.reply_count),
"user": role,
"max_tokens": min(
self.get_max_tokens(convo_id=convo_id),
kwargs.get("max_tokens", self.max_tokens),
),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
# "max_tokens": min(
# self.get_max_tokens(convo_id=convo_id),
# kwargs.get("max_tokens", self.max_tokens),
# ),
}
if config.SEARCH_USE_GPT:
json_post.update(function_call_list["web_search"])
json_post.update(function_call_list["url_fetch"])
response = self.session.post(
url,
headers=headers,
Expand All @@ -325,7 +335,8 @@ def ask_stream(
)
response_role: str or None = None
full_response: str = ""
need_function_call = False
function_call_name: str = ""
need_function_call: bool = False
for line in response.iter_lines():
if not line:
continue
Expand All @@ -347,13 +358,21 @@ def ask_stream(
content = delta["content"]
full_response += content
yield content
if "function_call" in delta and config.SEARCH_USE_GPT:
if "function_call" in delta:
need_function_call = True
function_call_content = delta["function_call"]["arguments"]
if "name" in delta["function_call"]:
function_call_name = delta["function_call"]["name"]
full_response += function_call_content
if need_function_call:
keywords = json.loads(full_response)["prompt"]
yield from self.search_summary(keywords, convo_id=convo_id, need_function_call=True)
response_role = "function"
if function_call_name == "get_web_search_results":
keywords = json.loads(full_response)["prompt"]
yield from self.search_summary(keywords, convo_id=convo_id, need_function_call=True)
if function_call_name == "get_url_content":
url = json.loads(full_response)["url"]
function_response = Web_crawler(url)
yield from self.ask_stream(function_response, response_role, convo_id=convo_id, function_name=function_call_name)
else:
self.add_to_conversation(full_response, response_role, convo_id=convo_id)

Expand Down Expand Up @@ -396,10 +415,11 @@ async def ask_stream_async(
),
"n": kwargs.get("n", self.reply_count),
"user": role,
"max_tokens": min(
self.get_max_tokens(convo_id=convo_id),
kwargs.get("max_tokens", self.max_tokens),
),
"max_tokens": kwargs.get("max_tokens", self.max_tokens),
# "max_tokens": min(
# self.get_max_tokens(convo_id=convo_id),
# kwargs.get("max_tokens", self.max_tokens),
# ),
},
timeout=kwargs.get("timeout", self.timeout),
) as response:
Expand Down
3 changes: 2 additions & 1 deletion test/test_Web_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def Web_crawler(url: str) -> str:
# for url in ['https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/403', 'https://www.hostinger.com/tutorials/what-is-403-forbidden-error-and-how-to-fix-it', 'https://beebom.com/what-is-403-forbidden-error-how-to-fix/']:
# for url in ['https://www.lifewire.com/403-forbidden-error-explained-2617989']:
# for url in ['https://www.usnews.com/news/best-countries/articles/2022-02-24/explainer-why-did-russia-invade-ukraine']:
for url in ['https://github.com/EAimTY/tuic/issues/107']:
# for url in ['https://github.com/EAimTY/tuic/issues/107']:
for url in ['https://mp.weixin.qq.com/s/Itad7Y-QBcr991JkF3SrIg']:
# for url in ['https://zhidao.baidu.com/question/317577832.html']:
# for url in ['https://www.cnn.com/2023/09/06/tech/huawei-mate-60-pro-phone/index.html']:
# for url in ['https://www.reddit.com/r/China_irl/comments/15qojkh/46%E6%9C%88%E5%A4%96%E8%B5%84%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9B%B4%E6%8E%A5%E6%8A%95%E8%B5%84%E5%87%8F87/', 'https://www.apple.com.cn/job-creation/Apple_China_CSR_Report_2020.pdf', 'https://hdr.undp.org/system/files/documents/hdr2013chpdf.pdf']:
Expand Down
19 changes: 19 additions & 0 deletions utils/function_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@
],
"function_call": "auto"
},
"url_fetch": {
"functions": [
{
"name": "get_url_content",
"description": "Get the webpage content of a URL",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The url to get the webpage content"
}
},
"required": ["url"]
}
}
],
"function_call": "auto"
},
# "web_search": {
# "functions": [
# {
Expand Down

0 comments on commit d277f5b

Please sign in to comment.