1
+ from langchain .embeddings import SentenceTransformerEmbeddings #HuggingFaceInstructEmbeddings
2
+ from langchain .vectorstores import FAISS
3
+ import os
4
+ import copy
5
+ import pprint
6
+ #import google.generativeai as palm
7
+ from langchain .llms import GooglePalm
8
+ from langchain import PromptTemplate
9
+ from langchain .document_loaders import PyPDFLoader
10
+ from langchain .text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain .chains import RetrievalQA
12
+ from langchain .chains .summarize import load_summarize_chain
13
+ from tempfile import NamedTemporaryFile
14
+ import streamlit as st
15
+ from ingest import pageextract
16
+ import warnings
17
+ warnings .filterwarnings ("ignore" )
18
+
19
+
20
+
21
+ MODES = ["Page-By-Page" ,"Complete" ]
22
+
23
+ FILES = os .listdir ("data" )
24
+
25
+
26
+ if 'count' not in st .session_state :
27
+ st .session_state .count = 0
28
+
29
+ if 'mode' not in st .session_state :
30
+ st .session_state .view = False
31
+ st .session_state .mode = MODES [0 ]
32
+ st .session_state .page = 0
33
+
34
+
35
+ @st .cache_resource
36
+ def getapi ():
37
+ return str (open ("API.txt" ,"r" ,encoding = 'utf-8' ).read ())
38
+
39
+
40
+ PALM_API = getapi ()
41
+ #palm.configure(api_key=PALM_API)
42
+
43
+
44
+ @st .cache_resource
45
+ def getmodel ():
46
+ llm = GooglePalm (google_api_key = PALM_API ,temperature = 0 ,max_output_tokens = 4000 )
47
+ return llm
48
+
49
+ @st .cache_resource
50
+ def getprompt ():
51
+ template = """Use the information to elaborate in points about the user's query.
52
+ If user mentions something not in the 'Context', just answer that you don't know.
53
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
54
+
55
+ Context: {context}
56
+
57
+ Query: {question}
58
+
59
+ Only return the helpful answer below and nothing else.
60
+
61
+ Helpful answer:
62
+ """
63
+ prompt = PromptTemplate (
64
+ template = template ,
65
+ input_variables = ['context' , 'question' ])
66
+ return prompt
67
+
68
+ def parseresult (result ):
69
+
70
+ PARSED = copy .deepcopy (result )
71
+ docs = PARSED ['source_documents' ]
72
+ sourcepage = []
73
+ for d in docs :
74
+ sourcepage .append (d .metadata ['page' ])
75
+ PARSED ['source_pages' ]= copy .deepcopy (sourcepage )
76
+ del sourcepage ,result
77
+ return PARSED
78
+
79
+ def getsources (result ):
80
+ sources = []
81
+ for s in result ['source_documents' ]:
82
+ sources .append (f"{ s .metadata } " )
83
+ return sources
84
+
85
+ def startview ():
86
+ #st.runtime.legacy_caching.clear_cache()
87
+ st .session_state .page = 0
88
+ st .session_state .mode = mode
89
+ try :
90
+ st .session_state .data = getData ()
91
+ except :
92
+ st .write ("ERROR IN LOADING DATA." )
93
+
94
+
95
+ def resetview ():
96
+ st .session_state .view = False
97
+ st .session_state .mode = mode
98
+ st .session_state .page = 0
99
+
100
+
101
+
102
+
103
+ def getdata (fi ):
104
+ #print(fi.path)
105
+ text_splitter = RecursiveCharacterTextSplitter (separators = ["\n \n " ,"\n " ,"." ," " ],chunk_size = 2000 , chunk_overlap = 100 )
106
+ texts = []
107
+ with NamedTemporaryFile (dir = '.' , suffix = '.pdf' ,delete = False ) as f :
108
+ f .write (fi .getbuffer ())
109
+ #print("DAADD>>>",f.name)
110
+ texts = texts + copy .deepcopy (text_splitter .split_documents (PyPDFLoader (f .name ).load ()))
111
+ #your_function_which_takes_a_path()
112
+ #print(f"{fi}>>>>><<<<<{texts[0:20]}")
113
+ os .remove (f .name )
114
+ return fi .read ()
115
+ mode = st .sidebar .radio ("Pick one" , MODES ,on_change = resetview )
116
+ file = st .sidebar .selectbox ("Pick one" , FILES )
117
+ def getData ():
118
+ texts = []
119
+ text_splitter = RecursiveCharacterTextSplitter (separators = ["\n \n " ,"\n " ,"." ," " ],chunk_size = 1000 , chunk_overlap = 100 )
120
+ texts = texts + copy .deepcopy (text_splitter .split_documents (PyPDFLoader ("data/" + file ).load ()))
121
+ print (">>Data recieved." )
122
+ #print(texts[0])
123
+ st .session_state .dummy = copy .deepcopy (texts [0 ])
124
+ return pageextract (texts )#print(f"*******{texts[0]}")
125
+
126
+ def sliderch ():
127
+ st .session_state .page = BP
128
+
129
+ st .session_state .file = file
130
+ st .title (f'{ mode } Summary' )
131
+
132
+ #file=st.sidebar.file_uploader("Upload a CSV")#,on_change=getdata)
133
+ prompt = False #st.sidebar.text_input("Enter query")
134
+ but = st .sidebar .button ("Click me" ,on_click = startview )
135
+ #st.write(but)
136
+ if (but or st .session_state .view ) and (st .session_state .mode == MODES [0 ]) and file :
137
+ col1 , col2 , col3 = st .columns ([1 , 3 , 3 ])
138
+ BB = col1 .button ("Next page" )
139
+ PP = col1 .button ("Prev page" )
140
+ BP = col1 .slider ("Pick a page" , 0 , len (st .session_state .data ),on_change = sliderch )
141
+ #print(BP)
142
+ try :
143
+ chain = load_summarize_chain (getmodel (), chain_type = "stuff" )
144
+ except :
145
+ st .write ("ERROR IN LOADING MODEL." )
146
+ col3 .header ("Summary : " )
147
+ #tt=getData(file)
148
+ #no=st.number_input("Pick a page", 0, tt[-1].metadata['page'])
149
+ #print(f"%%%%%%%{file}")
150
+ st .session_state .view = True
151
+ try :
152
+ if BB :
153
+ st .session_state .page += 1
154
+ if st .session_state .page >= len (st .session_state .data ):
155
+ st .session_state .page = len (st .session_state .data )- 1
156
+ if PP :
157
+ st .session_state .page -= 1
158
+ if st .session_state .page < 0 :
159
+ st .session_state .page = 0
160
+ col2 .header (f"Page { st .session_state .page } Preview: " )
161
+
162
+ col2 .write (st .session_state .data [st .session_state .page ].page_content [0 :600 ]+ " ......." )
163
+ st .session_state .dummy .page_content = st .session_state .data [st .session_state .page ].page_content
164
+ col3 .write (chain .run ([st .session_state .dummy ]))
165
+ except :
166
+ col3 .write ("ISSUES IN GENERATING SUMMARY" )
167
+ #st.write(file.read())
168
+ elif (but or st .session_state .view ) and (st .session_state .mode == MODES [1 ]) and file :
169
+ col1 ,col2 = st .columns ([2 ,3 ])
170
+ col1 .write ("WORK IN" )
171
+ col2 .write ("PROGRESS " )
172
+ st .session_state .view = True
173
+ st .session_state .file = file
0 commit comments