Skip to content

Commit fb8d7c2

Browse files
committed
added read_url function
Signed-off-by: okhleif-IL <[email protected]>
1 parent 031cf6e commit fb8d7c2

File tree

1 file changed

+41
-1
lines changed

1 file changed

+41
-1
lines changed

DocSum/ui/gradio/docsum_ui_gradio.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
import requests
1212
import uvicorn
1313
from fastapi import FastAPI
14-
from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader
14+
from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader, UnstructuredURLLoader
15+
from urllib.parse import urlparse
1516

1617
# Configure logging
1718
logging.basicConfig(level=logging.INFO)
@@ -90,6 +91,38 @@ def read_video_file(self, file):
9091
logger.info(">>> Reading video file: %s", file.name)
9192
base64_str = self.encode_file_to_base64(file)
9293
return self.generate_summary(base64_str, document_type="video")
94+
95+
def is_valid_url(self, url):
96+
try:
97+
result = urlparse(url)
98+
return all([result.scheme, result.netloc])
99+
except ValueError:
100+
return False
101+
102+
def read_url(self, url):
103+
"""Read and process the content of a url.
104+
105+
Args:
106+
url: The url to be read as a document.
107+
108+
Returns:
109+
str: The content of the website or an error message if the url is unsupported.
110+
"""
111+
112+
self.page_content = ""
113+
114+
logger.info(">>> Reading url: %s", url)
115+
if self.is_valid_url(url=url):
116+
loader = UnstructuredURLLoader([url])
117+
page = loader.load()
118+
self.page_content = [content.page_content for content in page][0]
119+
else:
120+
msg = f"Invalid URL '{url}'. Make sure the link provided is a valid URL.url"
121+
logger.error(msg)
122+
return msg
123+
124+
return self.page_content
125+
93126

94127
def generate_summary(self, doc_content, document_type="text"):
95128
"""Generate a summary for the given document content.
@@ -220,6 +253,11 @@ def render(self):
220253
label="Please upload Video file (.mp4)", file_types=[".mp4"], process_function=self.read_video_file
221254
)
222255

256+
# URL Upload UI
257+
url_ui = self.create_upload_ui(
258+
label="Please upload a url", file_types=[], process_function=self.read_url
259+
)
260+
223261
# Render all the UI in separate tabs
224262
with gr.Blocks() as self.demo:
225263
gr.Markdown("# Doc Summary")
@@ -232,6 +270,8 @@ def render(self):
232270
audio_ui.render()
233271
with gr.TabItem("Upload Video"):
234272
video_ui.render()
273+
with gr.TabItem("Upload URL"):
274+
url_ui.render()
235275

236276
return self.demo
237277

0 commit comments

Comments
 (0)