3
3
import logging
4
4
import os
5
5
6
- import requests
7
6
import streamlit as st
8
7
from dotenv import load_dotenv
8
+ from pathway .xpacks .llm .document_store import IndexingStatus
9
9
from pathway .xpacks .llm .question_answering import RAGClient
10
10
11
11
load_dotenv ()
71
71
question = st .text_input (label = "" , placeholder = "Ask your question?" )
72
72
73
73
74
- def get_options_list (metadata_list : list [dict ], opt_key : str ) -> list :
74
+ def get_indexed_files (metadata_list : list [dict ], opt_key : str ) -> list :
75
75
"""Get all available options in a specific metadata key."""
76
- options = set (map (lambda x : x [opt_key ], metadata_list ))
76
+ only_indexed_files = [
77
+ file
78
+ for file in metadata_list
79
+ if file ["_indexing_status" ] == IndexingStatus .INDEXED
80
+ ]
81
+ options = set (map (lambda x : x [opt_key ], only_indexed_files ))
82
+ return list (options )
83
+
84
+
85
+ def get_ingested_files (metadata_list : list [dict ], opt_key : str ) -> list :
86
+ """Get all available options in a specific metadata key."""
87
+ not_indexed_files = [
88
+ file
89
+ for file in metadata_list
90
+ if file ["_indexing_status" ] == IndexingStatus .INGESTED
91
+ ]
92
+ options = set (map (lambda x : x [opt_key ], not_indexed_files ))
77
93
return list (options )
78
94
79
95
@@ -83,7 +99,8 @@ def get_options_list(metadata_list: list[dict], opt_key: str) -> list:
83
99
84
100
st .session_state ["document_meta_list" ] = document_meta_list
85
101
86
- available_files = get_options_list (st .session_state ["document_meta_list" ], "path" )
102
+ indexed_files = get_indexed_files (st .session_state ["document_meta_list" ], "path" )
103
+ ingested_files = get_ingested_files (st .session_state ["document_meta_list" ], "path" )
87
104
88
105
89
106
with st .sidebar :
@@ -92,11 +109,18 @@ def get_options_list(metadata_list: list[dict], opt_key: str) -> list:
92
109
icon = ":material/code:" ,
93
110
)
94
111
95
- file_names = [i .split ("/" )[- 1 ] for i in available_files ]
112
+ indexed_file_names = [i .split ("/" )[- 1 ] for i in indexed_files ]
113
+ ingested_file_names = [i .split ("/" )[- 1 ] for i in ingested_files ]
96
114
97
115
markdown_table = "| Indexed files |\n | --- |\n "
98
- for file_name in file_names :
116
+ for file_name in indexed_file_names :
99
117
markdown_table += f"| { file_name } |\n "
118
+
119
+ if len (ingested_file_names ) > 0 :
120
+ markdown_table += "| Files being processed |\n | --- |\n "
121
+ for file_name in ingested_file_names :
122
+ markdown_table += f"| { file_name } |\n "
123
+
100
124
st .markdown (markdown_table , unsafe_allow_html = True )
101
125
102
126
st .button ("⟳ Refresh" , use_container_width = True )
@@ -140,14 +164,6 @@ def get_options_list(metadata_list: list[dict], opt_key: str) -> list:
140
164
st .markdown (css , unsafe_allow_html = True )
141
165
142
166
143
- def send_post_request (
144
- url : str , data : dict , headers : dict = {}, timeout : int | None = None
145
- ):
146
- response = requests .post (url , json = data , headers = headers , timeout = timeout )
147
- response .raise_for_status ()
148
- return response .json ()
149
-
150
-
151
167
if question :
152
168
logger .info (
153
169
{
@@ -157,8 +173,9 @@ def send_post_request(
157
173
)
158
174
159
175
with st .spinner ("Retrieving response..." ):
160
- api_response = conn .answer (question )
176
+ api_response = conn .answer (question , return_context_docs = True )
161
177
response = api_response ["response" ]
178
+ context_docs = api_response ["context_docs" ]
162
179
163
180
logger .info (
164
181
{
@@ -172,3 +189,9 @@ def send_post_request(
172
189
173
190
st .markdown (f"**Answering question:** { question } " )
174
191
st .markdown (f"""{ response } """ )
192
+ with st .expander (label = "Context documents" ):
193
+ st .markdown ("Documents sent to LLM as context:\n " )
194
+ for i , doc in enumerate (context_docs ):
195
+ st .markdown (
196
+ f"{ i + 1 } . Path: { doc ['metadata' ]['path' ]} \n ```\n { doc ['text' ]} \n ```"
197
+ )
0 commit comments