Skip to content

Commit

Permalink
Merge pull request #78 from oldwinter/temp
Browse files Browse the repository at this point in the history
test3
  • Loading branch information
oldwinter committed Aug 9, 2023
2 parents d299793 + dd104e1 commit cf22e04
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 78 deletions.

This file was deleted.

22 changes: 0 additions & 22 deletions tmp-test-gpt-cr/2022年度网络流行语.md

This file was deleted.

This file was deleted.

15 changes: 0 additions & 15 deletions tmp-test-gpt-cr/80%时间输入,20%时间输出.md

This file was deleted.

62 changes: 62 additions & 0 deletions tmp-test-gpt-cr/obsidian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
## 1.终端里设置环境变量

# export OPENAI_API_TYPE=azure
# export OPENAI_API_VERSION=2023-05-15
# export OPENAI_API_BASE=https://ingtubeopenai.openai.azure.com
# export OPENAI_API_KEY=ea31775d794e47beb2f6cd479817ce81

# export PINECONE_API_KEY=d0e32935-ca46-4a82-be38-34cc17dbdcce
# export PINECONE_ENV=gcp-starter

## 2.加载原始csv数据

# llm(documents1[0].page_content)


from langchain.document_loaders import ObsidianLoader

loader = ObsidianLoader("/Users/yingtu/知识库/ingtube")
documents = loader.load()

## 3.embddings对象模型初始化,实际调用在后面。
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(
client="",
model="text-embedding-ada-002",
deployment="ingtube-ada",
# input="texts",
# chunk_size=1
show_progress_bar=True,
)

## 4.pinecone初始化
import pinecone
import os
# PINECONE_API_KEY="d0e32935-ca46-4a82-be38-34cc17dbdcce"
# PINECONE_ENV="gcp-starter"

# initialize pinecone

pinecone.init(
api_key=os.getenv('PINECONE_API_KEY'), # find at app.pinecone.io
environment=os.getenv('PINECONE_ENV'), # next to api key in console
)

index_name = "ingtube-test"

if index_name not in pinecone.list_indexes():
pinecone.create_index(
name=index_name,
metric='cosine',
dimension=1536
)


# 将documents按照每16个元素为一组进行分割
chunks = [documents[i:i + 16] for i in range(0, len(documents), 16)]

from langchain.vectorstores import Pinecone
## 5.循环调用Pinecone.from_documents方法,从embedding接口生成数据,同时存储向量数据至pinecone
for chunk in chunks:
Pinecone.from_documents(chunk, embeddings, index_name=index_name)

Empty file.

0 comments on commit cf22e04

Please sign in to comment.