diff --git a/bot.py b/bot.py index e7417cf2..2e93abbc 100644 --- a/bot.py +++ b/bot.py @@ -143,6 +143,16 @@ async def getChatGPT(update, context, title, robot, message, use_search=config.S async def search(update, context, title, robot): message = update.message.text if config.NICK is None else update.message.text[botNicKLength:].strip() if update.message.text[:botNicKLength].lower() == botNick else None + print("\033[32m", update.effective_user.username, update.effective_user.id, update.message.text, "\033[0m") + if (len(context.args) == 0): + message = ( + f"格式错误哦~,示例:\n\n" + f"`/search 今天的微博热搜有哪些?`\n\n" + f"👆点击上方命令复制格式\n\n" + ) + await context.bot.send_message(chat_id=update.effective_chat.id, text=escape(message), parse_mode='MarkdownV2', disable_web_page_preview=True) + return + message = ' '.join(context.args) result = title text = message modifytime = 0 diff --git a/test/test.py b/test/test.py index ece1770d..9071bf12 100644 --- a/test/test.py +++ b/test/test.py @@ -48,4 +48,13 @@ def __init__( a = openaiAPI() -print(a.v1_url) \ No newline at end of file +print(a.v1_url) + +def getddgsearchurl(result, numresults=3): + # print("ddg-search", result) + search = DuckDuckGoSearchResults(num_results=numresults) + webresult = search.run(result) + # print("ddgwebresult", webresult) + urls = re.findall(r"(https?://\S+)\]", webresult, re.MULTILINE) + # print("duckduckgo urls", urls) + return urls \ No newline at end of file diff --git a/test/test_ddg.py b/test/test_ddg.py new file mode 100644 index 00000000..31ae94b9 --- /dev/null +++ b/test/test_ddg.py @@ -0,0 +1,50 @@ +import re +import time +import requests +import os +from bs4 import BeautifulSoup +from langchain.tools import DuckDuckGoSearchResults +def getddgsearchurl(result, numresults=3): + search = DuckDuckGoSearchResults(num_results=numresults) + webresult = search.run(result) + urls = re.findall(r"(https?://\S+)\]", webresult, re.MULTILINE) + return urls + +urls = getddgsearchurl("你知道今天有什么热点新闻吗") +print(urls) + +def Web_crawler(url: str) -> str: + """返回链接网址url正文内容,必须是合法的网址""" + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" + } + result = '' + try: + requests.packages.urllib3.disable_warnings() + response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True) + if response.status_code == 404: + print("Page not found:", url) + return "" + content_length = int(response.headers.get('Content-Length', 0)) + if content_length > 5000000: + print("Skipping large file:", url) + return result + soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8') + body = "".join(soup.find('body').get_text().split('\n')) + result = body + except Exception as e: + print('\033[31m') + print("error url", url) + print("error", e) + print('\033[0m') + return result + +start_time = time.time() + +for url in urls: + print(Web_crawler(url)) + print('-----------------------------') +end_time = time.time() +run_time = end_time - start_time +# 打印运行时间 +print(f"程序运行时间:{run_time}秒") \ No newline at end of file