Increase github action docker image automatic build, downgrade oauth2…

…client version to 3.0.0.
yym68686 · Sep 17, 2023 · de7f8b9 · de7f8b9
1 parent 2e2e513
commit de7f8b9
Show file tree

Hide file tree

Showing 4 changed files with 84 additions and 55 deletions.
diff --git a/.github/workflows/fly_deploy.yml b/.github/workflows/fly_deploy.yml
@@ -0,0 +1,25 @@
+name: Fly Deploy
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ main ]
+env:
+  FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
+  APP_NAME: ${{ secrets.APP_NAME }}
+  WEB_HOOK: ${{ secrets.WEB_HOOK }}
+  BOT_TOKEN: ${{ secrets.BOT_TOKEN }}
+  NICK: ${{ secrets.NICK }}
+  API: ${{ secrets.API }}
+  COOKIES: ${{ secrets.COOKIES }}
+jobs:
+  deploy:
+    name: Deploy app
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Deploy
+        run: |
+          sh ./deploy.sh
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -1,25 +1,32 @@
-name: Fly Deploy
+name: Publish Docker Image
+
 on:
   push:
     branches: [ main ]
-  pull_request:
-    # The branches below must be a subset of the branches above
-    branches: [ main ]
-env:
-  FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
-  APP_NAME: ${{ secrets.APP_NAME }}
-  WEB_HOOK: ${{ secrets.WEB_HOOK }}
-  BOT_TOKEN: ${{ secrets.BOT_TOKEN }}
-  NICK: ${{ secrets.NICK }}
-  API: ${{ secrets.API }}
-  COOKIES: ${{ secrets.COOKIES }}
+  release:
+    types: [ published ]
+
 jobs:
-  deploy:
-    name: Deploy app
+  build-and-push:
     runs-on: ubuntu-latest
+
     steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Deploy
-        run: |
-          sh ./deploy.sh
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    - name: Set up Docker Buildx
+      uses: docker/[email protected]
+
+    - name: Login to Docker Hub
+      uses: docker/[email protected]
+      with:
+        username: ${{ secrets.DOCKER_HUB_USERNAME }}
+        password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
+
+    - name: Build and push Docker image
+      uses: docker/[email protected]
+      with:
+        context: .
+        file: Dockerfile.build
+        push: true
+        tags: yym68686/chatgpt:1.0
diff --git a/requirements.txt b/requirements.txt
@@ -14,4 +14,5 @@ google-api-python-client
 unstructured[md]
 unstructured[pdf]
 duckduckgo-search==3.8.5
-langchain==0.0.271
+langchain==0.0.271
+oauth2client==3.0.0
diff --git a/test/test_Web_crawler.py b/test/test_Web_crawler.py
@@ -6,6 +6,24 @@
 from bs4 import BeautifulSoup
 from requests.adapters import HTTPAdapter
 
+def Web_crawler(url: str) -> str:
+    """返回链接网址url正文内容，必须是合法的网址"""
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+    }
+    result = ''
+    try:
+        requests.packages.urllib3.disable_warnings()
+        response = requests.get(url, headers=headers, verify=False)
+        soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
+        body = "".join(soup.find('body').get_text().split('\n'))
+        result = body
+    except Exception as e:
+        print('\033[31m')
+        print("error", e)
+        print('\033[0m')
+    return result
+
 # def Web_crawler(url: str) -> str:
 #     """返回链接网址url正文内容，必须是合法的网址"""
 #     headers = {
@@ -14,49 +32,26 @@
 #     result = ''
 #     try:
 #         requests.packages.urllib3.disable_warnings()
-#         # session = requests.Session()
-#         # session.mount('http://', HTTPAdapter(max_retries=5))
-#         # session.mount('https://', HTTPAdapter(max_retries=5))
-#         # response = session.get(url, headers=headers, verify=False)
-#         response = requests.get(url, headers=headers, verify=False)
+#         response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True)
+#         content_length = int(response.headers.get('Content-Length', 0))
+#         if content_length > 500000:
+#             print("Skipping large file:", url)
+#             return result
+#         content = response.content
+#         detected_encoding = chardet.detect(response.content)['encoding']
+#         decoded_content = response.content.decode(detected_encoding, errors='replace')
 #         # soup = BeautifulSoup(response.text, 'html.parser')
-#         soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
+#         soup = BeautifulSoup(decoded_content, 'lxml')
+#         # soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
 #         body = "".join(soup.find('body').get_text().split('\n'))
 #         result = body
 #     except Exception as e:
 #         print('\033[31m')
+#         print("error url", url)
 #         print("error", e)
 #         print('\033[0m')
 #     return result
 
-def Web_crawler(url: str) -> str:
-    """返回链接网址url正文内容，必须是合法的网址"""
-    headers = {
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
-    }
-    result = ''
-    try:
-        requests.packages.urllib3.disable_warnings()
-        response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True)
-        content_length = int(response.headers.get('Content-Length', 0))
-        if content_length > 500000:
-            print("Skipping large file:", url)
-            return result
-        content = response.content
-        detected_encoding = chardet.detect(response.content)['encoding']
-        decoded_content = response.content.decode(detected_encoding, errors='replace')
-        # soup = BeautifulSoup(response.text, 'html.parser')
-        soup = BeautifulSoup(decoded_content, 'lxml')
-        # soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
-        body = "".join(soup.find('body').get_text().split('\n'))
-        result = body
-    except Exception as e:
-        print('\033[31m')
-        print("error url", url)
-        print("error", e)
-        print('\033[0m')
-    return result
-
 # def Web_crawler(url: str) -> str:
 #     """返回链接网址url正文内容，必须是合法的网址"""
 #     headers = {
@@ -85,7 +80,8 @@ def Web_crawler(url: str) -> str:
 # for url in ['https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/403', 'https://www.hostinger.com/tutorials/what-is-403-forbidden-error-and-how-to-fix-it', 'https://beebom.com/what-is-403-forbidden-error-how-to-fix/']:
 # for url in ['https://www.lifewire.com/403-forbidden-error-explained-2617989']:
 # for url in ['https://www.usnews.com/news/best-countries/articles/2022-02-24/explainer-why-did-russia-invade-ukraine']:
-for url in ['https://zhidao.baidu.com/question/317577832.html']:
+# for url in ['https://zhidao.baidu.com/question/317577832.html']:
+for url in ['https://www.cnn.com/2023/09/06/tech/huawei-mate-60-pro-phone/index.html']:
 # for url in ['https://www.reddit.com/r/China_irl/comments/15qojkh/46%E6%9C%88%E5%A4%96%E8%B5%84%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9B%B4%E6%8E%A5%E6%8A%95%E8%B5%84%E5%87%8F87/', 'https://www.apple.com.cn/job-creation/Apple_China_CSR_Report_2020.pdf', 'https://hdr.undp.org/system/files/documents/hdr2013chpdf.pdf']:
 # for url in ['https://www.airuniversity.af.edu/JIPA/Display/Article/3111127/the-uschina-trade-war-vietnam-emerges-as-the-greatest-winner/']:
 # for url in ['https://zhuanlan.zhihu.com/p/646786536', 'https://zh.wikipedia.org/wiki/%E4%BF%84%E7%BE%85%E6%96%AF%E5%85%A5%E4%BE%B5%E7%83%8F%E5%85%8B%E8%98%AD', 'https://stock.finance.sina.com.cn/usstock/quotes/aapl.html']: