12
12
13
13
from ai_ta_backend .nomic_logging import get_nomic_map , log_convo_to_nomic
14
14
from ai_ta_backend .vector_database import Ingest
15
- from ai_ta_backend .web_scrape import mit_course_download , WebScrape
15
+ from ai_ta_backend .web_scrape import WebScrape , mit_course_download
16
16
17
17
app = Flask (__name__ )
18
18
CORS (app )
@@ -324,7 +324,8 @@ def scrape() -> Response:
324
324
max_urls : int = request .args .get ('max_urls' , default = 100 , type = int )
325
325
max_depth : int = request .args .get ('max_depth' , default = 2 , type = int )
326
326
timeout : int = request .args .get ('timeout' , default = 3 , type = int )
327
- stay_on_baseurl : bool | None = request .args .get ('stay_on_baseurl' , default = True , type = bool )
327
+ # stay_on_baseurl = request.args.get('stay_on_baseurl', default='', type=str)
328
+ stay_on_baseurl : bool = request .args .get ('stay_on_baseurl' , default = True , type = lambda x : x .lower () == 'true' )
328
329
329
330
if url == '' or max_urls == - 1 or max_depth == - 1 or timeout == - 1 or course_name == '' or stay_on_baseurl is None :
330
331
# proper web error "400 Bad request"
@@ -335,10 +336,10 @@ def scrape() -> Response:
335
336
)
336
337
337
338
# print all input params
338
- print (f"Web scrape!" )
339
- print (f"Url: { url } " )
339
+ print (f"Web scrape: { url } " )
340
340
print (f"Max Urls: { max_urls } " )
341
341
print (f"Max Depth: { max_depth } " )
342
+ print (f"Stay on BaseURL: { stay_on_baseurl } " )
342
343
print (f"Timeout in Seconds ⏰: { timeout } " )
343
344
344
345
scraper = WebScrape ()
0 commit comments