1111import requests
1212import uvicorn
1313from fastapi import FastAPI
14- from langchain_community .document_loaders import Docx2txtLoader , PyPDFLoader
14+ from langchain_community .document_loaders import Docx2txtLoader , PyPDFLoader , UnstructuredURLLoader
15+ from urllib .parse import urlparse
1516
1617# Configure logging
1718logging .basicConfig (level = logging .INFO )
@@ -90,6 +91,38 @@ def read_video_file(self, file):
9091 logger .info (">>> Reading video file: %s" , file .name )
9192 base64_str = self .encode_file_to_base64 (file )
9293 return self .generate_summary (base64_str , document_type = "video" )
94+
95+ def is_valid_url (self , url ):
96+ try :
97+ result = urlparse (url )
98+ return all ([result .scheme , result .netloc ])
99+ except ValueError :
100+ return False
101+
102+ def read_url (self , url ):
103+ """Read and process the content of a url.
104+
105+ Args:
106+ url: The url to be read as a document.
107+
108+ Returns:
109+ str: The content of the website or an error message if the url is unsupported.
110+ """
111+
112+ self .page_content = ""
113+
114+ logger .info (">>> Reading url: %s" , url )
115+ if self .is_valid_url (url = url ):
116+ loader = UnstructuredURLLoader ([url ])
117+ page = loader .load ()
118+ self .page_content = [content .page_content for content in page ][0 ]
119+ else :
120+ msg = f"Invalid URL '{ url } '. Make sure the link provided is a valid URL.url"
121+ logger .error (msg )
122+ return msg
123+
124+ return self .page_content
125+
93126
94127 def generate_summary (self , doc_content , document_type = "text" ):
95128 """Generate a summary for the given document content.
@@ -220,6 +253,11 @@ def render(self):
220253 label = "Please upload Video file (.mp4)" , file_types = [".mp4" ], process_function = self .read_video_file
221254 )
222255
256+ # URL Upload UI
257+ url_ui = self .create_upload_ui (
258+ label = "Please upload a url" , file_types = [], process_function = self .read_url
259+ )
260+
223261 # Render all the UI in separate tabs
224262 with gr .Blocks () as self .demo :
225263 gr .Markdown ("# Doc Summary" )
@@ -232,6 +270,8 @@ def render(self):
232270 audio_ui .render ()
233271 with gr .TabItem ("Upload Video" ):
234272 video_ui .render ()
273+ with gr .TabItem ("Upload URL" ):
274+ url_ui .render ()
235275
236276 return self .demo
237277
0 commit comments