-
Notifications
You must be signed in to change notification settings - Fork 230
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update g4f version to 0.1.6.7. fixed bug: pdf load error
- Loading branch information
Showing
4 changed files
with
74 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,4 +16,4 @@ unstructured[pdf] | |
duckduckgo-search==3.8.5 | ||
langchain==0.0.271 | ||
oauth2client==3.0.0 | ||
g4f==0.1.6.6 | ||
g4f==0.1.6.7 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from langchain.chat_models import ChatOpenAI | ||
from langchain.schema import HumanMessage | ||
|
||
def gptsearch(result, llm): | ||
response = llm([HumanMessage(content=result)]) | ||
response = response.content | ||
return response | ||
|
||
|
||
print(gptsearch("鲁迅和周树人为什么打架", chainllm)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# import requests | ||
# import urllib.parse | ||
# import os | ||
# import sys | ||
# sys.path.append(os.getcwd()) | ||
# import config | ||
|
||
# from langchain.chat_models import ChatOpenAI | ||
# from langchain.embeddings.openai import OpenAIEmbeddings | ||
# from langchain.vectorstores import Chroma | ||
# from langchain.text_splitter import CharacterTextSplitter | ||
# from langchain.document_loaders import UnstructuredPDFLoader | ||
# from langchain.chains import RetrievalQA | ||
|
||
|
||
# def get_doc_from_url(url): | ||
# filename = urllib.parse.unquote(url.split("/")[-1]) | ||
# response = requests.get(url, stream=True) | ||
# with open(filename, 'wb') as f: | ||
# for chunk in response.iter_content(chunk_size=1024): | ||
# f.write(chunk) | ||
# return filename | ||
|
||
# def pdf_search(docurl, query_message, model="gpt-3.5-turbo"): | ||
# chatllm = ChatOpenAI(temperature=0.5, openai_api_base=config.API_URL.split("chat")[0], model_name=model, openai_api_key=os.environ.get('API', None)) | ||
# embeddings = OpenAIEmbeddings(openai_api_base=config.API_URL.split("chat")[0], openai_api_key=os.environ.get('API', None)) | ||
# filename = get_doc_from_url(docurl) | ||
# docpath = os.getcwd() + "/" + filename | ||
# loader = UnstructuredPDFLoader(docpath) | ||
# print(docpath) | ||
# documents = loader.load() | ||
# os.remove(docpath) | ||
# # 初始化加载器 | ||
# text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=25) | ||
# # 切割加载的 document | ||
# split_docs = text_splitter.split_documents(documents) | ||
# vector_store = Chroma.from_documents(split_docs, embeddings) | ||
# # 创建问答对象 | ||
# qa = RetrievalQA.from_chain_type(llm=chatllm, chain_type="stuff", retriever=vector_store.as_retriever(),return_source_documents=True) | ||
# # 进行问答 | ||
# result = qa({"query": query_message}) | ||
# return result['result'] | ||
|
||
# pdf_search("https://www.nsfc.gov.cn/csc/20345/22468/pdf/2001/%E5%86%BB%E7%BB%93%E8%A3%82%E9%9A%99%E7%A0%82%E5%B2%A9%E4%BD%8E%E5%91%A8%E5%BE%AA%E7%8E%AF%E5%8A%A8%E5%8A%9B%E7%89%B9%E6%80%A7%E8%AF%95%E9%AA%8C%E7%A0%94%E7%A9%B6.pdf", "端水实验的目的是什么?") | ||
|
||
from PyPDF2 import PdfReader | ||
|
||
def has_text(pdf_path): | ||
with open(pdf_path, 'rb') as file: | ||
pdf = PdfReader(file) | ||
page = pdf.pages[0] | ||
text = page.extract_text() | ||
return text | ||
|
||
pdf_path = '/Users/yanyuming/Downloads/GitHub/ChatGPT-Telegram-Bot/冻结裂隙砂岩低周循环动力特性试验研究.pdf' | ||
print(has_text(pdf_path)) |