-
Notifications
You must be signed in to change notification settings - Fork 8
/
app.py
285 lines (241 loc) · 9.52 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
from __future__ import annotations
import streamlit as st
import requests
import os
from pathlib import Path
from langchain.agents import initialize_agent, tool
from langchain.agents import AgentType
from langchain.agents import load_tools, initialize_agent, AgentType
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.schema.output_parser import OutputParserException
from langchain.utilities import GraphQLAPIWrapper
from callbacks.capturing_callback_handler import CapturingCallbackHandler, playback_callbacks
from callbacks.streamlit_callback_handler import StreamlitCallbackHandler
from utils.clear_results import with_clear_container
st.set_page_config(
page_title="Guac-AI-Mole",
page_icon="🥑",
initial_sidebar_state="collapsed",
)
runs_dir = Path(__file__).parent / "runs"
runs_dir.mkdir(exist_ok=True)
SAVED_SESSIONS = {}
# Populate saved sessions from runs_dir
for path in runs_dir.glob("*.pickle"):
with open(path, "rb") as f:
SAVED_SESSIONS[path.stem] = path
"# 🥑 Guac-AI-Mole"
"Charting the Course for Secure Software Supply Chain"
"Ask questions about your software supply chain and get answers from the Guac-AI-Mole!"
openai_api_key = os.getenv("OPENAI_API_KEY")
user_openai_api_key = st.sidebar.text_input(
"OpenAI API Key", type="password", help="Set this to your own OpenAI API key.", value=openai_api_key
)
openai_api_endpoint = os.getenv("OPENAI_API_ENDPOINT")
user_openai_api_endpoint = st.sidebar.text_input(
"OpenAI API Endpoint", type="default", help="Set this to your own OpenAI endpoint.", value=openai_api_endpoint
)
openai_api_model = os.getenv("OPENAI_API_MODEL")
user_openai_model = st.sidebar.text_input(
"OpenAI Model", type="default", help="Set this to your own OpenAI model or deployment name.", value=openai_api_model
)
graphql_endpoint = os.getenv("GUAC_GRAPHQL_ENDPOINT")
user_graphql_endpoint = st.sidebar.text_input(
"GUAC GraphQL Endpoint", type="default", help="Set this to your own GUAC GraphQL endpoint.", value=graphql_endpoint
)
def get_schema():
"""Query the api for its schema"""
global user_graphql_endpoint
query = """
query IntrospectionQuery {
__schema {
types {
name
kind
fields {
name
type {
name
kind
ofType {
name
kind
}
}
}
}
}
}"""
request = requests.post(user_graphql_endpoint, json={"query": query})
json_output = request.json()
# Simplify the schema
simplified_schema = {}
for type_info in json_output["data"]["__schema"]["types"]:
if not type_info["name"].startswith("__"):
fields = type_info.get("fields")
if fields is not None and fields is not []:
simplified_schema[type_info["name"]] = {
"kind": type_info["kind"],
"fields": ", ".join(
[
field["name"]
for field in fields
if not field["name"].startswith("__")
]
),
}
else:
simplified_schema[type_info["name"]] = {
"kind": type_info["kind"],
}
return simplified_schema
@tool
def answer_question(query: str):
"""Answer a question using graphql API"""
global user_graphql_endpoint
graphql_fields = (
get_schema()
)
image_example = """
## List running images using terminal tool
kubectl get pods --all-namespaces -o go-template --template='{{range .items}}{{range .spec.containers}}{{.image}} {{end}}{{end}}'
"""
gql_examples = """
## Use this query when user asks what are dependencies of an image. When querying for the dependencies of a given package, you must specify the package field. When the query is about images, the oci package type should be used.
query IsDependencyQ1 {
IsDependency(isDependencySpec: { package: { type: "oci" name: "alpine" }}) {
dependencyPackage {
type
namespaces {
namespace
names {
name
}
}
}
}
}
## Use this query when user asks what images depend on a package (like logrus). When querying for packages that depends on another package, you must specify the dependencyPackage field. When the query is about images, the oci package type should be used.
query IsDependencyQ2 {
IsDependency(isDependencySpec: {
package: { type: "oci" }
dependencyPackage: { name: "logrus" }
}) {
package {
namespaces {
namespace
names {
name
}
}
}
}
}
## Use this query when user asks about a vulnerability id, this will return a package that has the vulnerability. You must query further with IsDependencyQ2 to see what images includes this package.
query CertifyVulnQ1 {
CertifyVuln(certifyVulnSpec: {vulnerability: {vulnerabilityID: "dsa-5122-1"}}) {
package {
namespaces {
names {
name
}
}
}
}
}
"""
prompt = f"""
Do NOT, under any circumstances, use ``` anywhere.
To check if an image is running, use the terminal tool to list all running images with kubectl. Example:
{image_example} Only execute this based on the graphql answer, determine if the image is running.
Consider the syntax as image name followed by a dash and tag. For example, if 'bar-latest' is returned as part of graphql query, and terminal output contains 'foo/bar:latest' then consider it as running.
Here are some example queries for the graphql endpoint described below:
{gql_examples}
Answer the following question: {query} by using either terminal or the graphql database that has this schema {graphql_fields}. action_input should not contain a seperate query key. action_input should only have the query itself."""
try:
result = agent.run(prompt)
except Exception as e:
prompt += f"\n\nThere was an error with the request.\nError: {e}\n\nPlease reformat GraphQL query (avoid issues with backticks if possible)."
result = agent.run(prompt)
return result
tools = []
llm = None
if user_openai_api_key:
enable_custom = True
if user_openai_api_endpoint.endswith("azure.com"):
print("Using Azure LLM")
llm = AzureChatOpenAI(
openai_api_key=user_openai_api_key,
openai_api_base=user_openai_api_endpoint,
openai_api_version="2023-08-01-preview",
openai_api_type="azure",
deployment_name=user_openai_model,
temperature=0,
streaming=True,
)
else:
print("Using OpenAI or LocalAI LLM")
llm = ChatOpenAI(
openai_api_key=user_openai_api_key,
openai_api_base=user_openai_api_endpoint,
model_name=user_openai_model,
temperature=0,
streaming=True,
)
tools = load_tools(
["graphql", "terminal"],
graphql_endpoint=user_graphql_endpoint,
llm=llm,
)
# Initialize agent
agent = initialize_agent(
tools,
llm,
agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
)
else:
enable_custom = False
with st.form(key="form"):
if not enable_custom:
"Ask one of the sample questions, or enter your API Key in the sidebar to ask your own custom questions."
prefilled = (
st.selectbox(
"Sample questions",
sorted([key.replace("_", " ") for key in SAVED_SESSIONS.keys()]),
)
or ""
)
user_input = ""
if enable_custom:
user_input = st.text_input("Or, ask your own question")
if not user_input:
user_input = prefilled
submit_clicked = st.form_submit_button("Submit Question")
output_container = st.empty()
if with_clear_container(submit_clicked):
output_container = output_container.container()
output_container.chat_message("user").write(user_input)
answer_container = output_container.chat_message("assistant", avatar="🥑")
st_callback = StreamlitCallbackHandler(answer_container)
# If we've saved this question, play it back instead of actually running LangChain
# (so that we don't exhaust our API calls unnecessarily)
path_user_input = "_".join(user_input.split(" "))
if path_user_input in SAVED_SESSIONS.keys():
print(f"Playing saved session: {user_input}")
session_name = SAVED_SESSIONS[path_user_input]
session_path = Path(__file__).parent / "runs" / session_name
print(f"Playing saved session: {session_path}")
answer = playback_callbacks(
[st_callback], str(session_path), max_pause_time=1)
else:
print(f"Running LangChain: {user_input} because not in SAVED_SESSIONS")
capturing_callback = CapturingCallbackHandler()
try:
answer = answer_question(user_input, callbacks=[
st_callback, capturing_callback])
except OutputParserException as e:
answer = e.args[0].split("LLM output: ")[1]
pickle_filename = user_input.replace(" ", "_") + ".pickle"
capturing_callback.dump_records_to_file(runs_dir / pickle_filename)
answer_container.write(answer)