-
Notifications
You must be signed in to change notification settings - Fork 0
/
interntest.py
82 lines (70 loc) · 1.98 KB
/
interntest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import weaviate
import requests
import json
import re
client = weaviate.Client(
url="https://intern-test-7m4gez13.weaviate.network",
auth_client_secret=weaviate.AuthApiKey(api_key="0umkIbBNSASTraxdcPNW7cR4cqmpBivVzbyA"),
additional_headers={
"X-OpenAI-Api-Key": "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"
}
)
weaviate_url = 'https://intern-test-7m4gez13.weaviate.network'
class_obj = {
"class": "testsArticle1",
"vectorizer": "text2vec-openai",
"properties": [
{
"name": "title",
"dataType": ["string"],
},
{
"name": "author",
"dataType": ["string"],
},
{
"name": "text",
"dataType": ["string"],
}
]
}
def import_data_to_weaviate(data):
for item in data:
article_obj = {
"class": "testsArticle1",
"properties": {
"title": item.get("title", ""),
"author": item.get("author", ""),
"text": item.get("full_text", "")
}
}
client.schema.create_class(class_obj)
def read_and_parse_md_file(md_file_path):
data = []
current_item = {}
with open(md_file_path, 'r', encoding='utf-8') as md_file:
for line in md_file:
if line.startswith('---'):
if current_item:
data.append(current_item)
current_item = {}
else:
match = re.match(r'^(\w+): (.+)$', line)
if match:
current_item[match.group(1)] = match.group(2)
return data
# 第一个 Markdown 文件
data1 = read_and_parse_md_file('test-dataset-1.md')
# 第二个 Markdown 文件
data2 = read_and_parse_md_file('test-dataset-2.md')
response = (
client.query
.get("testsArticle1", ["title", "text"])
.with_hybrid(
query="创新",
properties=["text"],
)
.with_limit(3)
.do()
)
print(json.dumps(response, indent=2))