From 25220cc60501792dcb68c68a5b1fdc6873f9b611 Mon Sep 17 00:00:00 2001 From: oldwinter Date: Wed, 9 Aug 2023 17:35:41 +0800 Subject: [PATCH 1/3] aa --- ...n\347\232\204\345\211\257\346\234\2542.py" | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 "tmp-test-gpt-cr/obsidian\347\232\204\345\211\257\346\234\2542.py" diff --git "a/tmp-test-gpt-cr/obsidian\347\232\204\345\211\257\346\234\2542.py" "b/tmp-test-gpt-cr/obsidian\347\232\204\345\211\257\346\234\2542.py" new file mode 100644 index 00000000..3ebb80c7 --- /dev/null +++ "b/tmp-test-gpt-cr/obsidian\347\232\204\345\211\257\346\234\2542.py" @@ -0,0 +1,62 @@ +## 1.终端里设置环境变量 + +# export OPENAI_API_TYPE=azure +# export OPENAI_API_VERSION=2023-05-15 +# export OPENAI_API_BASE=https://ingtubeopenai.openai.azure.com +# export OPENAI_API_KEY=ea31775d794e47beb2f6cd479817ce81 + +# export PINECONE_API_KEY=d0e32935-ca46-4a82-be38-34cc17dbdcce +# export PINECONE_ENV=gcp-starter + +## 2.加载原始csv数据 + +# llm(documents1[0].page_content) + + +from langchain.document_loaders import ObsidianLoader + +loader = ObsidianLoader("/Users/yingtu/知识库/ingtube") +documents = loader.load() + +## 3.embddings对象模型初始化,实际调用在后面。 +from langchain.embeddings import OpenAIEmbeddings +embeddings = OpenAIEmbeddings( + client="", + model="text-embedding-ada-002", + deployment="ingtube-ada", + # input="texts", + # chunk_size=1 + show_progress_bar=True, +) + +## 4.pinecone初始化 +import pinecone +import os +# PINECONE_API_KEY="d0e32935-ca46-4a82-be38-34cc17dbdcce" +# PINECONE_ENV="gcp-starter" + +# initialize pinecone + +pinecone.init( + api_key=os.getenv('PINECONE_API_KEY'), # find at app.pinecone.io + environment=os.getenv('PINECONE_ENV'), # next to api key in console +) + +index_name = "ingtube-test" + +if index_name not in pinecone.list_indexes(): + pinecone.create_index( + name=index_name, + metric='cosine', + dimension=1536 +) + + +# 将documents按照每16个元素为一组进行分割 +chunks = [documents[i:i + 16] for i in range(0, len(documents), 16)] + +from langchain.vectorstores import Pinecone +## 5.循环调用Pinecone.from_documents方法,从embedding接口生成数据,同时存储向量数据至pinecone +for chunk in chunks: + Pinecone.from_documents(chunk, embeddings, index_name=index_name) + From 93dc4aabee6e4d3952c9fbdd24ce3e306cd3a57b Mon Sep 17 00:00:00 2001 From: oldwinter Date: Wed, 9 Aug 2023 17:42:06 +0800 Subject: [PATCH 2/3] aab --- ...n\347\232\204\345\211\257\346\234\2542.py" | 32 ------------------- 1 file changed, 32 deletions(-) diff --git "a/tmp-test-gpt-cr/obsidian\347\232\204\345\211\257\346\234\2542.py" "b/tmp-test-gpt-cr/obsidian\347\232\204\345\211\257\346\234\2542.py" index 3ebb80c7..b0462f90 100644 --- "a/tmp-test-gpt-cr/obsidian\347\232\204\345\211\257\346\234\2542.py" +++ "b/tmp-test-gpt-cr/obsidian\347\232\204\345\211\257\346\234\2542.py" @@ -28,35 +28,3 @@ # chunk_size=1 show_progress_bar=True, ) - -## 4.pinecone初始化 -import pinecone -import os -# PINECONE_API_KEY="d0e32935-ca46-4a82-be38-34cc17dbdcce" -# PINECONE_ENV="gcp-starter" - -# initialize pinecone - -pinecone.init( - api_key=os.getenv('PINECONE_API_KEY'), # find at app.pinecone.io - environment=os.getenv('PINECONE_ENV'), # next to api key in console -) - -index_name = "ingtube-test" - -if index_name not in pinecone.list_indexes(): - pinecone.create_index( - name=index_name, - metric='cosine', - dimension=1536 -) - - -# 将documents按照每16个元素为一组进行分割 -chunks = [documents[i:i + 16] for i in range(0, len(documents), 16)] - -from langchain.vectorstores import Pinecone -## 5.循环调用Pinecone.from_documents方法,从embedding接口生成数据,同时存储向量数据至pinecone -for chunk in chunks: - Pinecone.from_documents(chunk, embeddings, index_name=index_name) - From 53213e02a490897306294b0d0fcee6bb2c5d065a Mon Sep 17 00:00:00 2001 From: oldwinter Date: Wed, 9 Aug 2023 17:43:59 +0800 Subject: [PATCH 3/3] aabd --- .github/workflows/chatgpt-cr.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/chatgpt-cr.yaml b/.github/workflows/chatgpt-cr.yaml index 9926310a..a381bbc1 100644 --- a/.github/workflows/chatgpt-cr.yaml +++ b/.github/workflows/chatgpt-cr.yaml @@ -23,5 +23,5 @@ jobs: PROMPT: "请检查以下代码差异是否有混淆或不规范之处:" # example: Please check if there are any confusions or irregularities in the following code diff: top_p: 1 # https://platform.openai.com/docs/api-reference/chat/create#chat/create-top_p temperature: 1 # https://platform.openai.com/docs/api-reference/chat/create#chat/create-temperature - max_tokens: 10000 - MAX_PATCH_LENGTH: 10000 # if the patch/diff length is large than MAX_PATCH_LENGTH, will be ignored and won't review. By default, with no MAX_PATCH_LENGTH set, there is also no limit for the patch/diff length. \ No newline at end of file + max_tokens: 1024 + MAX_PATCH_LENGTH: 1024 # if the patch/diff length is large than MAX_PATCH_LENGTH, will be ignored and won't review. By default, with no MAX_PATCH_LENGTH set, there is also no limit for the patch/diff length.