-
Notifications
You must be signed in to change notification settings - Fork 6
/
answer.py
128 lines (103 loc) · 3.98 KB
/
answer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import json
import openai
import csv
import os
from dotenv import load_dotenv
load_dotenv()
embeddings_filename = "embeddings.csv"
company_name = "Dreamboats.ai"
def calculate_similarity(vec1, vec2):
# Calculates the cosine similarity between two vectors.
dot_product = sum([vec1[i] * vec2[i] for i in range(len(vec1))])
magnitude1 = sum([vec1[i] ** 2 for i in range(len(vec1))]) ** 0.5
magnitude2 = sum([vec2[i] ** 2 for i in range(len(vec2))]) ** 0.5
return dot_product / (magnitude1 * magnitude2)
def chat():
start_chat = True
while True:
openai.api_key = os.environ.get('OPENAI_KEY')
if start_chat:
print("Welcome to the", company_name, "Knowledge Base. How can I help you?")
start_chat = False
print("Type 'quit' to exit.")
else:
print("Any Other Questions?")
question = input("> ")
if question == "quit":
break
# Exit the loop if the user presses enter without typing anything
if not question:
break
response = openai.Embedding.create(
model="text-embedding-ada-002",
input=[question]
)
try:
question_embedding = response['data'][0]["embedding"]
except Exception as e:
print(e.message)
continue
# Store the similarity scores as the code loops through the CSV
similarity_array = []
# Loop through the CSV and calculate the cosine-similarity between
# the question vector and each text embedding
with open(embeddings_filename) as f:
reader = csv.DictReader(f)
for row in reader:
# Extract the embedding from the column and parse it back into a list
text_embedding = json.loads(row['embedding'])
# Add the similarity score to the array
similarity_array.append(calculate_similarity(question_embedding, text_embedding))
# Return the index of the highest similarity score
index_of_max = similarity_array.index(max(similarity_array))
# Used to store the original text
original_text = ""
# Loop through the CSV and find the text which matches the highest
# similarity score
with open(embeddings_filename) as f:
reader = csv.DictReader(f)
for rowno, row in enumerate(reader):
if rowno == index_of_max:
original_text = row['text']
system_prompt = f"""
You are an AI assistant. You work for #{company_name}. You will be asked questions from a
customer and will answer in a helpful and friendly manner.
You will be provided company information from #{company_name} under the
[Article] section. The customer question will be provided under the
[Question] section. You will answer the customers questions based on the
article. Only provide the answer to the query don't respond with completed part of question.
Answer in points and not in long paragraphs
If the users question is not answered by the article you will respond with
'I'm sorry I don't know.
'
"""
question_prompt = f"""
[Article]
{original_text}
[Question]
{question}
"""
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": question_prompt
}
],
temperature=0.2,
max_tokens=2000,
)
try:
answer = response['choices'][0]['message']['content']
except Exception as e:
print(e.message)
continue
print("\n\033[32mSupport:\033[0m")
print("\033[32m{}\033[0m".format(answer.lstrip()))
print("Goodbye! Come back if you have any more questions. :)")
chat()