-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassistant.py
234 lines (188 loc) · 8.68 KB
/
assistant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import openai
import os
import streamlit as st
import time
import logging as log
import re
import json
import PyPDF2
import openpyxl
import pandas as pd
import io
import clipboard
QE_ASSISTANT_ID = os.getenv("QE_ASSISTANT_ID")
client = openai
# API KEY
openai.api_key = os.getenv("QE_AI_OPENAI_API_KEY")
# https://www.youtube.com/watch?v=2lGrFbpHpz4&t=1385s
# New function for OpenAI interaction
def ask_openai(user_message, file_ids=None, thread_id=None, chat=None):
if not thread_id:
thread = client.beta.threads.create()
thread_id = thread.id
attachments = [
{
"file_id": file_id,
"tools": [
{
"type": "file_search"
}
]
}
for file_id in file_ids
] if file_ids else []
message = client.beta.threads.messages.create(
thread_id=thread_id,
role="user",
content=user_message,
attachments=attachments
)
run = client.beta.threads.runs.create_and_poll(
thread_id=thread_id,
assistant_id=os.getenv("QE_ASSISTANT_ID")
)
if run.status == 'completed':
messages = client.beta.threads.messages.list(
thread_id=thread_id
)
log.info(f"OpenAI usage: {run.usage}")
total_tokens = run.usage.total_tokens
log.info(f"OpenAI asst response: {messages.data[0]}")
assistant_response = messages.data[0].content[0].text.value
# remove text like "【12:0†source】" from content value
assistant_response = re.sub(r"【\d+:\d+†source】", "", assistant_response)
return assistant_response, thread_id, None, total_tokens
else:
raise Exception(f"Failed to get response {run.status} from OpenAI: {run}")
def extract_text_from_file(uploaded_file):
file_type = uploaded_file.type
file_text = ""
# Extract text based on file type
if file_type == "application/pdf":
reader = PyPDF2.PdfReader(uploaded_file)
for page in reader.pages:
file_text += page.extract_text()
# elif file_type == "text/csv":
# df = pd.read_csv(uploaded_file)
# file_text = df.to_string()
elif file_type == "application/json":
file_text = json.dumps(json.load(uploaded_file), indent=2)
elif file_type == "text/plain":
file_text = uploaded_file.read().decode('utf-8')
return file_text
# Function to reset Kiwi
def reset_kiwi():
for key in list(st.session_state.keys()):
del st.session_state[key]
st.rerun()
# Function to remove Markdown formatting (like italics, bold, etc.)
def clean_markdown(text):
# Remove italics (single * or _)
text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)
# Remove bold (double ** or __)
text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)
return text
def extract_test_cases(text: str) -> list:
# Clean up any Markdown formatting (e.g., italics, bold)
text = clean_markdown(text)
test_case_patterns = [
r'(Test\s*Case\s*\d+\s*[:-].*?)(?=Test\s*Case|$)',
r'(Scenario\s*\d+\s*[:-].*?)(?=Scenario|$)',
r'(\d+\.\s*Test\s*Case.*?)(?=\d+\.\s*Test|$)'
]
test_cases = []
for pattern in test_case_patterns:
matches = re.finditer(pattern, text, re.DOTALL | re.IGNORECASE)
for match in matches:
case_text = match.group(1).strip()
# Extract components
title_match = re.search(r"(?:Test Case \d+:|Scenario \d+:)\s*(.*?)(?=\n|$)", case_text)
objective_match = re.search(
r"Objective:\s*(.*?)(?=\n\s*(?:Preconditions?:|\s*Steps?:|\s*Expected Result?:)|$)", case_text,
re.DOTALL)
preconditions_match = re.search(r"Preconditions?:\s*(.*?)(?=\n\s*(?:Steps?:|\s*Expected Result?:)|$)",
case_text, re.DOTALL)
# Adjusted regex to capture the full Steps section, including multi-line content (like payloads)
steps_match = re.search(r"Steps?:\s*(.*?)(?=\n\s*(?:Expected Result?:|$))", case_text, re.DOTALL)
expected_match = re.search(r"Expected Result:\s*(.*?)(?=\n\s*(?:Test Case|Scenario|$|\Z)|\n[-]{2,}|\n{2,})",
case_text, re.DOTALL)
# Process steps as a list, adjusting for step numbers already present
steps = []
if steps_match:
step_matches = re.finditer(r"(\d+\.\s*.*?)(?=\n\s*\d+\.|\n\n|$)", steps_match.group(1), re.DOTALL)
for step in step_matches:
# Strip the step number if it's already included
step_text = step.group(1).strip()
# Remove the leading number (e.g., "1. " or "2. ")
step_text = re.sub(r"^\d+\.\s*", "", step_text)
steps.append(step_text)
# Append extracted components
test_cases.append({
"title": title_match.group(1).strip() if title_match else "No Title Found",
"objective": objective_match.group(1).strip() if objective_match else "No Objective Found",
"preconditions": preconditions_match.group(1).strip() if preconditions_match else "No Preconditions Found",
"steps": steps if steps else "No Steps Found",
"expected_results": expected_match.group(1).strip() if expected_match else "No Expected Results Found"
})
return test_cases
def download_test_cases(messages):
# Initialize or reset the download_format state variable
if "download_format" not in st.session_state:
st.session_state.download_format = False # False when no format is selected
for message in reversed(messages):
if message["role"] == "assistant":
structured_data = extract_test_cases(message["content"])
# If no test cases were found, display an error and return
if not structured_data:
st.sidebar.error("No test cases found in most recent Kiwi response.")
return False
format_type = st.sidebar.selectbox("Select format", ("CSV", "XLSX", "TXT"), help="Only the most recent Kiwi response is analyzed when finding test cases.")
st.sidebar.markdown(f"Found **{len(structured_data)} test cases** generated from most recent Kiwi response")
# Ensure all required columns exist in the structured data
columns = ['title', 'objective', 'preconditions', 'steps', 'expected_results']
for col in columns:
if col not in structured_data[0]:
structured_data[0][col] = "No {} data found.".format(col)
if format_type in ["CSV", "XLSX"]:
df = pd.DataFrame(structured_data)
# Ensure steps are in the correct format (a list of strings)
df['steps'] = df['steps'].apply(
lambda steps: '\n'.join([f"{i + 1}) {step}" for i, step in enumerate(steps)]))
if format_type == "CSV":
data = df.to_csv(index=False).encode('utf-8')
mime = "text/csv"
else:
buffer = io.BytesIO()
df.to_excel(buffer, index=False, engine='openpyxl')
data = buffer.getvalue()
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
else:
text_content = ""
for case in structured_data:
text_content += f"Title: {case['title']}\n"
text_content += f"Objective: {case['objective']}\n"
text_content += f"Preconditions: {case['preconditions']}\n"
text_content += "Steps:\n"
for i, step in enumerate(case['steps'], 1):
text_content += f" {i}. {step}\n"
text_content += f"Expected Results: {case['expected_results']}\n\n"
text_content += "=" * 50 + "\n\n"
data = text_content.encode('utf-8')
mime = "text/plain"
col1, col2 = st.sidebar.columns(2)
with col1:
st.download_button(
label=f"Download as {format_type}",
data=data,
file_name=f"test_cases.{format_type.lower()}",
mime=mime,
key="download_button"
)
with col2:
if st.button("Close"):
st.session_state.download_format = False
st.rerun()
return True
# If no test cases are found, show an error message
st.sidebar.error("No test cases found in most recent Kiwi response.")
return False