Skip to content

Commit

Permalink
Increase github action docker image automatic build, downgrade oauth2…
Browse files Browse the repository at this point in the history
…client version to 3.0.0.
  • Loading branch information
yym68686 committed Sep 17, 2023
1 parent 2e2e513 commit de7f8b9
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 55 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/fly_deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Fly Deploy
on:
push:
branches: [ main ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ main ]
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
APP_NAME: ${{ secrets.APP_NAME }}
WEB_HOOK: ${{ secrets.WEB_HOOK }}
BOT_TOKEN: ${{ secrets.BOT_TOKEN }}
NICK: ${{ secrets.NICK }}
API: ${{ secrets.API }}
COOKIES: ${{ secrets.COOKIES }}
jobs:
deploy:
name: Deploy app
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Deploy
run: |
sh ./deploy.sh
45 changes: 26 additions & 19 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
name: Fly Deploy
name: Publish Docker Image

on:
push:
branches: [ main ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ main ]
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
APP_NAME: ${{ secrets.APP_NAME }}
WEB_HOOK: ${{ secrets.WEB_HOOK }}
BOT_TOKEN: ${{ secrets.BOT_TOKEN }}
NICK: ${{ secrets.NICK }}
API: ${{ secrets.API }}
COOKIES: ${{ secrets.COOKIES }}
release:
types: [ published ]

jobs:
deploy:
name: Deploy app
build-and-push:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v2
- name: Deploy
run: |
sh ./deploy.sh
- name: Checkout repository
uses: actions/checkout@v3

- name: Set up Docker Buildx
uses: docker/[email protected]

- name: Login to Docker Hub
uses: docker/[email protected]
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

- name: Build and push Docker image
uses: docker/[email protected]
with:
context: .
file: Dockerfile.build
push: true
tags: yym68686/chatgpt:1.0
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ google-api-python-client
unstructured[md]
unstructured[pdf]
duckduckgo-search==3.8.5
langchain==0.0.271
langchain==0.0.271
oauth2client==3.0.0
66 changes: 31 additions & 35 deletions test/test_Web_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter

def Web_crawler(url: str) -> str:
"""返回链接网址url正文内容,必须是合法的网址"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
result = ''
try:
requests.packages.urllib3.disable_warnings()
response = requests.get(url, headers=headers, verify=False)
soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
body = "".join(soup.find('body').get_text().split('\n'))
result = body
except Exception as e:
print('\033[31m')
print("error", e)
print('\033[0m')
return result

# def Web_crawler(url: str) -> str:
# """返回链接网址url正文内容,必须是合法的网址"""
# headers = {
Expand All @@ -14,49 +32,26 @@
# result = ''
# try:
# requests.packages.urllib3.disable_warnings()
# # session = requests.Session()
# # session.mount('http://', HTTPAdapter(max_retries=5))
# # session.mount('https://', HTTPAdapter(max_retries=5))
# # response = session.get(url, headers=headers, verify=False)
# response = requests.get(url, headers=headers, verify=False)
# response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True)
# content_length = int(response.headers.get('Content-Length', 0))
# if content_length > 500000:
# print("Skipping large file:", url)
# return result
# content = response.content
# detected_encoding = chardet.detect(response.content)['encoding']
# decoded_content = response.content.decode(detected_encoding, errors='replace')
# # soup = BeautifulSoup(response.text, 'html.parser')
# soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
# soup = BeautifulSoup(decoded_content, 'lxml')
# # soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
# body = "".join(soup.find('body').get_text().split('\n'))
# result = body
# except Exception as e:
# print('\033[31m')
# print("error url", url)
# print("error", e)
# print('\033[0m')
# return result

def Web_crawler(url: str) -> str:
"""返回链接网址url正文内容,必须是合法的网址"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
result = ''
try:
requests.packages.urllib3.disable_warnings()
response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True)
content_length = int(response.headers.get('Content-Length', 0))
if content_length > 500000:
print("Skipping large file:", url)
return result
content = response.content
detected_encoding = chardet.detect(response.content)['encoding']
decoded_content = response.content.decode(detected_encoding, errors='replace')
# soup = BeautifulSoup(response.text, 'html.parser')
soup = BeautifulSoup(decoded_content, 'lxml')
# soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
body = "".join(soup.find('body').get_text().split('\n'))
result = body
except Exception as e:
print('\033[31m')
print("error url", url)
print("error", e)
print('\033[0m')
return result

# def Web_crawler(url: str) -> str:
# """返回链接网址url正文内容,必须是合法的网址"""
# headers = {
Expand Down Expand Up @@ -85,7 +80,8 @@ def Web_crawler(url: str) -> str:
# for url in ['https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/403', 'https://www.hostinger.com/tutorials/what-is-403-forbidden-error-and-how-to-fix-it', 'https://beebom.com/what-is-403-forbidden-error-how-to-fix/']:
# for url in ['https://www.lifewire.com/403-forbidden-error-explained-2617989']:
# for url in ['https://www.usnews.com/news/best-countries/articles/2022-02-24/explainer-why-did-russia-invade-ukraine']:
for url in ['https://zhidao.baidu.com/question/317577832.html']:
# for url in ['https://zhidao.baidu.com/question/317577832.html']:
for url in ['https://www.cnn.com/2023/09/06/tech/huawei-mate-60-pro-phone/index.html']:
# for url in ['https://www.reddit.com/r/China_irl/comments/15qojkh/46%E6%9C%88%E5%A4%96%E8%B5%84%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9B%B4%E6%8E%A5%E6%8A%95%E8%B5%84%E5%87%8F87/', 'https://www.apple.com.cn/job-creation/Apple_China_CSR_Report_2020.pdf', 'https://hdr.undp.org/system/files/documents/hdr2013chpdf.pdf']:
# for url in ['https://www.airuniversity.af.edu/JIPA/Display/Article/3111127/the-uschina-trade-war-vietnam-emerges-as-the-greatest-winner/']:
# for url in ['https://zhuanlan.zhihu.com/p/646786536', 'https://zh.wikipedia.org/wiki/%E4%BF%84%E7%BE%85%E6%96%AF%E5%85%A5%E4%BE%B5%E7%83%8F%E5%85%8B%E8%98%AD', 'https://stock.finance.sina.com.cn/usstock/quotes/aapl.html']:
Expand Down

0 comments on commit de7f8b9

Please sign in to comment.