fixed bug: search command is None

yym68686 · Nov 26, 2023 · 09faed6 · 09faed6
1 parent 2663bf0
commit 09faed6
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 1 deletion.
diff --git a/bot.py b/bot.py
@@ -143,6 +143,16 @@ async def getChatGPT(update, context, title, robot, message, use_search=config.S
 
 async def search(update, context, title, robot):
     message = update.message.text if config.NICK is None else update.message.text[botNicKLength:].strip() if update.message.text[:botNicKLength].lower() == botNick else None
+    print("\033[32m", update.effective_user.username, update.effective_user.id, update.message.text, "\033[0m")
+    if (len(context.args) == 0):
+        message = (
+            f"格式错误哦~，示例：\n\n"
+            f"`/search 今天的微博热搜有哪些？`\n\n"
+            f"👆点击上方命令复制格式\n\n"
+        )
+        await context.bot.send_message(chat_id=update.effective_chat.id, text=escape(message), parse_mode='MarkdownV2', disable_web_page_preview=True)
+        return
+    message = ' '.join(context.args)
     result = title
     text = message
     modifytime = 0

diff --git a/test/test.py b/test/test.py
@@ -48,4 +48,13 @@ def __init__(
 
 
 a = openaiAPI()
-print(a.v1_url)
+print(a.v1_url)
+
+def getddgsearchurl(result, numresults=3):
+    # print("ddg-search", result)
+    search = DuckDuckGoSearchResults(num_results=numresults)
+    webresult = search.run(result)
+    # print("ddgwebresult", webresult)
+    urls = re.findall(r"(https?://\S+)\]", webresult, re.MULTILINE)
+    # print("duckduckgo urls", urls)
+    return urls
diff --git a/test/test_ddg.py b/test/test_ddg.py
@@ -0,0 +1,50 @@
+import re
+import time
+import requests
+import os
+from bs4 import BeautifulSoup
+from langchain.tools import DuckDuckGoSearchResults
+def getddgsearchurl(result, numresults=3):
+    search = DuckDuckGoSearchResults(num_results=numresults)
+    webresult = search.run(result)
+    urls = re.findall(r"(https?://\S+)\]", webresult, re.MULTILINE)
+    return urls
+
+urls = getddgsearchurl("你知道今天有什么热点新闻吗")
+print(urls)
+
+def Web_crawler(url: str) -> str:
+    """返回链接网址url正文内容，必须是合法的网址"""
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+    }
+    result = ''
+    try:
+        requests.packages.urllib3.disable_warnings()
+        response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True)
+        if response.status_code == 404:
+            print("Page not found:", url)
+            return ""
+        content_length = int(response.headers.get('Content-Length', 0))
+        if content_length > 5000000:
+            print("Skipping large file:", url)
+            return result
+        soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
+        body = "".join(soup.find('body').get_text().split('\n'))
+        result = body
+    except Exception as e:
+        print('\033[31m')
+        print("error url", url)
+        print("error", e)
+        print('\033[0m')
+    return result
+
+start_time = time.time()
+
+for url in urls:
+    print(Web_crawler(url))
+    print('-----------------------------')
+end_time = time.time()
+run_time = end_time - start_time
+# 打印运行时间
+print(f"程序运行时间：{run_time}秒")