Skip to content

Commit 14ab055

Browse files
committed
Merge branch 'webscrape_refactor_v2' of https://github.com/UIUC-Chatbot/ai-ta-backend into webscrape_refactor_v2
2 parents 60dd668 + faf3516 commit 14ab055

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

ai_ta_backend/main.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic
1414
from ai_ta_backend.vector_database import Ingest
15-
from ai_ta_backend.web_scrape import mit_course_download, WebScrape
15+
from ai_ta_backend.web_scrape import WebScrape, mit_course_download
1616

1717
app = Flask(__name__)
1818
CORS(app)
@@ -324,7 +324,8 @@ def scrape() -> Response:
324324
max_urls: int = request.args.get('max_urls', default=100, type=int)
325325
max_depth: int = request.args.get('max_depth', default=2, type=int)
326326
timeout: int = request.args.get('timeout', default=3, type=int)
327-
stay_on_baseurl: bool | None = request.args.get('stay_on_baseurl', default=True, type=bool)
327+
# stay_on_baseurl = request.args.get('stay_on_baseurl', default='', type=str)
328+
stay_on_baseurl: bool = request.args.get('stay_on_baseurl', default=True, type=lambda x: x.lower() == 'true')
328329

329330
if url == '' or max_urls == -1 or max_depth == -1 or timeout == -1 or course_name == '' or stay_on_baseurl is None:
330331
# proper web error "400 Bad request"
@@ -335,10 +336,10 @@ def scrape() -> Response:
335336
)
336337

337338
# print all input params
338-
print(f"Web scrape!")
339-
print(f"Url: {url}")
339+
print(f"Web scrape: {url}")
340340
print(f"Max Urls: {max_urls}")
341341
print(f"Max Depth: {max_depth}")
342+
print(f"Stay on BaseURL: {stay_on_baseurl}")
342343
print(f"Timeout in Seconds ⏰: {timeout}")
343344

344345
scraper = WebScrape()

0 commit comments

Comments
 (0)