Skip to content

Commit

Permalink
Merge branch 'main' into canvas
Browse files Browse the repository at this point in the history
  • Loading branch information
Maxwell-Lindsey committed Oct 23, 2023
2 parents 65b2ba1 + 817eb2e commit acd61e7
Show file tree
Hide file tree
Showing 5 changed files with 752 additions and 496 deletions.
16 changes: 9 additions & 7 deletions ai_ta_backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic
from ai_ta_backend.vector_database import Ingest
from ai_ta_backend.web_scrape import main_crawler, mit_course_download
from ai_ta_backend.web_scrape import WebScrape, mit_course_download
from ai_ta_backend.canvas import CanvasAPI

app = Flask(__name__)
Expand Down Expand Up @@ -327,7 +327,9 @@ def scrape() -> Response:
max_urls: int = request.args.get('max_urls', default=100, type=int)
max_depth: int = request.args.get('max_depth', default=2, type=int)
timeout: int = request.args.get('timeout', default=3, type=int)
stay_on_baseurl: bool | None = request.args.get('stay_on_baseurl', default=True, type=bool)
# stay_on_baseurl = request.args.get('stay_on_baseurl', default='', type=str)
stay_on_baseurl: bool = request.args.get('stay_on_baseurl', default=True, type=lambda x: x.lower() == 'true')
depth_or_breadth:str = request.args.get('depth_or_breadth', default='breadth', type=str)

if url == '' or max_urls == -1 or max_depth == -1 or timeout == -1 or course_name == '' or stay_on_baseurl is None:
# proper web error "400 Bad request"
Expand All @@ -338,14 +340,14 @@ def scrape() -> Response:
)

# print all input params
print(f"Web scrape!")
print(f"Url: {url}")
print(f"Web scrape: {url}")
print(f"Max Urls: {max_urls}")
print(f"Max Depth: {max_depth}")
print(f"Stay on BaseURL: {stay_on_baseurl}")
print(f"Timeout in Seconds ⏰: {timeout}")
print(f"Stay on baseurl: {stay_on_baseurl}")

success_fail_dict = main_crawler(url, course_name, max_urls, max_depth, timeout, stay_on_baseurl)

scraper = WebScrape()
success_fail_dict = scraper.main_crawler(url, course_name, max_urls, max_depth, timeout, stay_on_baseurl, depth_or_breadth)

response = jsonify(success_fail_dict)
response.headers.add('Access-Control-Allow-Origin', '*')
Expand Down
5 changes: 3 additions & 2 deletions ai_ta_backend/nomic_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,9 @@ def log_convo_to_nomic(course_name: str, conversation) -> str:

# add embeddings to the project
project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True)
project.add_embeddings(embeddings=np.array(embeddings), data=pd.DataFrame(metadata))
project.rebuild_maps()
with project.wait_for_project_lock():
project.add_embeddings(embeddings=np.array(embeddings), data=pd.DataFrame(metadata))
project.rebuild_maps()

except Exception as e:
# if project doesn't exist, create it
Expand Down
Loading

0 comments on commit acd61e7

Please sign in to comment.