Skip to content

Commit c9ee7d9

Browse files
szymondudycztryptofanikbjornengdahl
authored andcommitted
UI for presenting not indexed files (#8483)
Co-authored-by: Albert Roethel <[email protected]> Co-authored-by: bjornengdahl <[email protected]> GitOrigin-RevId: 7eb146176a18d96de482d74e159f13393c3df09a
1 parent 426d383 commit c9ee7d9

File tree

2 files changed

+38
-19
lines changed

2 files changed

+38
-19
lines changed
Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,2 @@
11
streamlit==1.37.0
22
load_dotenv==0.1.0
3-
nest_asyncio==1.6.0
4-
aiohttp==3.9.5
5-
beautifulsoup4==4.12.3
6-
openai==1.35.10

examples/pipelines/demo-question-answering/ui/ui.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import logging
44
import os
55

6-
import requests
76
import streamlit as st
87
from dotenv import load_dotenv
8+
from pathway.xpacks.llm.document_store import IndexingStatus
99
from pathway.xpacks.llm.question_answering import RAGClient
1010

1111
load_dotenv()
@@ -71,9 +71,25 @@
7171
question = st.text_input(label="", placeholder="Ask your question?")
7272

7373

74-
def get_options_list(metadata_list: list[dict], opt_key: str) -> list:
74+
def get_indexed_files(metadata_list: list[dict], opt_key: str) -> list:
7575
"""Get all available options in a specific metadata key."""
76-
options = set(map(lambda x: x[opt_key], metadata_list))
76+
only_indexed_files = [
77+
file
78+
for file in metadata_list
79+
if file["_indexing_status"] == IndexingStatus.INDEXED
80+
]
81+
options = set(map(lambda x: x[opt_key], only_indexed_files))
82+
return list(options)
83+
84+
85+
def get_ingested_files(metadata_list: list[dict], opt_key: str) -> list:
86+
"""Get all available options in a specific metadata key."""
87+
not_indexed_files = [
88+
file
89+
for file in metadata_list
90+
if file["_indexing_status"] == IndexingStatus.INGESTED
91+
]
92+
options = set(map(lambda x: x[opt_key], not_indexed_files))
7793
return list(options)
7894

7995

@@ -83,7 +99,8 @@ def get_options_list(metadata_list: list[dict], opt_key: str) -> list:
8399

84100
st.session_state["document_meta_list"] = document_meta_list
85101

86-
available_files = get_options_list(st.session_state["document_meta_list"], "path")
102+
indexed_files = get_indexed_files(st.session_state["document_meta_list"], "path")
103+
ingested_files = get_ingested_files(st.session_state["document_meta_list"], "path")
87104

88105

89106
with st.sidebar:
@@ -92,11 +109,18 @@ def get_options_list(metadata_list: list[dict], opt_key: str) -> list:
92109
icon=":material/code:",
93110
)
94111

95-
file_names = [i.split("/")[-1] for i in available_files]
112+
indexed_file_names = [i.split("/")[-1] for i in indexed_files]
113+
ingested_file_names = [i.split("/")[-1] for i in ingested_files]
96114

97115
markdown_table = "| Indexed files |\n| --- |\n"
98-
for file_name in file_names:
116+
for file_name in indexed_file_names:
99117
markdown_table += f"| {file_name} |\n"
118+
119+
if len(ingested_file_names) > 0:
120+
markdown_table += "| Files being processed |\n| --- |\n"
121+
for file_name in ingested_file_names:
122+
markdown_table += f"| {file_name} |\n"
123+
100124
st.markdown(markdown_table, unsafe_allow_html=True)
101125

102126
st.button("⟳ Refresh", use_container_width=True)
@@ -140,14 +164,6 @@ def get_options_list(metadata_list: list[dict], opt_key: str) -> list:
140164
st.markdown(css, unsafe_allow_html=True)
141165

142166

143-
def send_post_request(
144-
url: str, data: dict, headers: dict = {}, timeout: int | None = None
145-
):
146-
response = requests.post(url, json=data, headers=headers, timeout=timeout)
147-
response.raise_for_status()
148-
return response.json()
149-
150-
151167
if question:
152168
logger.info(
153169
{
@@ -157,8 +173,9 @@ def send_post_request(
157173
)
158174

159175
with st.spinner("Retrieving response..."):
160-
api_response = conn.answer(question)
176+
api_response = conn.answer(question, return_context_docs=True)
161177
response = api_response["response"]
178+
context_docs = api_response["context_docs"]
162179

163180
logger.info(
164181
{
@@ -172,3 +189,9 @@ def send_post_request(
172189

173190
st.markdown(f"**Answering question:** {question}")
174191
st.markdown(f"""{response}""")
192+
with st.expander(label="Context documents"):
193+
st.markdown("Documents sent to LLM as context:\n")
194+
for i, doc in enumerate(context_docs):
195+
st.markdown(
196+
f"{i+1}. Path: {doc['metadata']['path']}\n ```\n{doc['text']}\n```"
197+
)

0 commit comments

Comments
 (0)