From 29172b35b393a48069e47392856a2deaa3a4f3b5 Mon Sep 17 00:00:00 2001 From: Robert Shelton Date: Thu, 25 Jul 2024 11:55:32 -0400 Subject: [PATCH] more cleanup --- backend/arxivsearch/api/routes/papers.py | 2 +- backend/arxivsearch/db/load.py | 2 +- backend/arxivsearch/db/redis_helpers.py | 2 +- backend/arxivsearch/main.py | 1 - .../schema/{similarity.py => models.py} | 15 ++++++++++++--- backend/arxivsearch/schema/provider.py | 9 --------- .../arxivsearch/tests/api/routes/test_papers.py | 3 +-- backend/arxivsearch/tests/utils/seed.py | 2 +- backend/arxivsearch/utils/embeddings.py | 7 ++----- backend/entrypoint.sh | 5 ----- backend/scripts.py | 4 ---- docker-local-redis.yml | 11 ----------- frontend/package.json | 2 +- 13 files changed, 20 insertions(+), 45 deletions(-) rename backend/arxivsearch/schema/{similarity.py => models.py} (78%) delete mode 100644 backend/arxivsearch/schema/provider.py delete mode 100644 backend/entrypoint.sh diff --git a/backend/arxivsearch/api/routes/papers.py b/backend/arxivsearch/api/routes/papers.py index c805c6c..2b81b72 100644 --- a/backend/arxivsearch/api/routes/papers.py +++ b/backend/arxivsearch/api/routes/papers.py @@ -8,7 +8,7 @@ from arxivsearch import config from arxivsearch.db import redis_helpers -from arxivsearch.schema.similarity import ( +from arxivsearch.schema.models import ( PaperSimilarityRequest, SearchResponse, UserTextSimilarityRequest, diff --git a/backend/arxivsearch/db/load.py b/backend/arxivsearch/db/load.py index 70f69b0..d47c1bd 100644 --- a/backend/arxivsearch/db/load.py +++ b/backend/arxivsearch/db/load.py @@ -10,7 +10,7 @@ from arxivsearch import config from arxivsearch.db import redis_helpers -from arxivsearch.schema.provider import Provider +from arxivsearch.schema.models import Provider logger = logging.getLogger(__name__) diff --git a/backend/arxivsearch/db/redis_helpers.py b/backend/arxivsearch/db/redis_helpers.py index 015a2d3..c1ae308 100644 --- a/backend/arxivsearch/db/redis_helpers.py +++ b/backend/arxivsearch/db/redis_helpers.py @@ -23,7 +23,7 @@ def get_schema(): return IndexSchema.from_yaml(os.path.join(dir_path, "index.yaml")) -def get_index(): +def get_test_index(): dir_path = os.path.dirname(os.path.realpath(__file__)) index = SearchIndex.from_yaml(os.path.join(dir_path, "index.yaml")) index.connect(redis_url=config.REDIS_URL) diff --git a/backend/arxivsearch/main.py b/backend/arxivsearch/main.py index e3e4686..9a1ed80 100644 --- a/backend/arxivsearch/main.py +++ b/backend/arxivsearch/main.py @@ -56,7 +56,6 @@ def main(): } ) - # uvicorn.run(app, **server_attr) uvicorn.run("arxivsearch.main:app", **server_attr) diff --git a/backend/arxivsearch/schema/similarity.py b/backend/arxivsearch/schema/models.py similarity index 78% rename from backend/arxivsearch/schema/similarity.py rename to backend/arxivsearch/schema/models.py index 8f9414c..b782b15 100644 --- a/backend/arxivsearch/schema/similarity.py +++ b/backend/arxivsearch/schema/models.py @@ -1,6 +1,15 @@ -from pydantic import BaseModel, Field +from enum import Enum +from pydantic import BaseModel -from arxivsearch.schema.provider import Provider +from arxivsearch.schema.models import Provider + + +class Provider(str, Enum): + """Embedding model provider""" + + huggingface = "huggingface" + openai = "openai" + cohere = "cohere" class BaseRequest(BaseModel): @@ -20,7 +29,7 @@ class UserTextSimilarityRequest(BaseRequest): class Paper(BaseModel): - paper_id: str # = Field(alias="id") + paper_id: str authors: str categories: str year: str diff --git a/backend/arxivsearch/schema/provider.py b/backend/arxivsearch/schema/provider.py deleted file mode 100644 index 649b37d..0000000 --- a/backend/arxivsearch/schema/provider.py +++ /dev/null @@ -1,9 +0,0 @@ -from enum import Enum - - -class Provider(str, Enum): - """Embedding model provider""" - - huggingface = "huggingface" - openai = "openai" - cohere = "cohere" diff --git a/backend/arxivsearch/tests/api/routes/test_papers.py b/backend/arxivsearch/tests/api/routes/test_papers.py index 18f9da2..2e3a738 100644 --- a/backend/arxivsearch/tests/api/routes/test_papers.py +++ b/backend/arxivsearch/tests/api/routes/test_papers.py @@ -1,9 +1,8 @@ import pytest -import pytest_asyncio from httpx import AsyncClient from arxivsearch.main import app -from arxivsearch.schema.similarity import ( +from arxivsearch.schema.models import ( PaperSimilarityRequest, UserTextSimilarityRequest, ) diff --git a/backend/arxivsearch/tests/utils/seed.py b/backend/arxivsearch/tests/utils/seed.py index d14a55e..f5b7e98 100644 --- a/backend/arxivsearch/tests/utils/seed.py +++ b/backend/arxivsearch/tests/utils/seed.py @@ -20,6 +20,6 @@ def seed_test_db(): paper["openai"] = np.array(paper["openai"], dtype=np.float32).tobytes() paper["cohere"] = np.array(paper["cohere"], dtype=np.float32).tobytes() - index = redis_helpers.get_index() + index = redis_helpers.get_test_index() index.load(data=papers, id_field="paper_id") return papers diff --git a/backend/arxivsearch/utils/embeddings.py b/backend/arxivsearch/utils/embeddings.py index f374ac8..3c66502 100644 --- a/backend/arxivsearch/utils/embeddings.py +++ b/backend/arxivsearch/utils/embeddings.py @@ -8,7 +8,7 @@ ) from arxivsearch import config -from arxivsearch.schema.provider import Provider +from arxivsearch.schema.models import Provider def preprocess_text(text: str) -> str: @@ -43,10 +43,7 @@ class Embeddings: def __init__(self): self.oai_vectorizer = OpenAITextVectorizer(model=config.OPENAI_EMBEDDING_MODEL) self.co_vectorizer = CohereTextVectorizer(model=config.COHERE_EMBEDDING_MODEL) - self.hf_vectorizer = HFTextVectorizer( - model=config.SENTENCE_TRANSFORMER_MODEL - ) # resume_download=True - # self.hf_vectorizer = None + self.hf_vectorizer = HFTextVectorizer(model=config.SENTENCE_TRANSFORMER_MODEL) async def get(self, provider: str, text: str): """ diff --git a/backend/entrypoint.sh b/backend/entrypoint.sh deleted file mode 100644 index d8e902f..0000000 --- a/backend/entrypoint.sh +++ /dev/null @@ -1,5 +0,0 @@ -#! /usr/bin/env bash - -# python -m arxivsearch.db.load - -python -m arxivsearch.main \ No newline at end of file diff --git a/backend/scripts.py b/backend/scripts.py index c8cb816..9ee5ff1 100644 --- a/backend/scripts.py +++ b/backend/scripts.py @@ -9,10 +9,6 @@ def start_app(): # load data subprocess.run(["python", "-m", "arxivsearch.db.load"], check=True) # start app - # subprocess.run( - # ["uvicorn", "arxivsearch.main:app", "--port", "8888", "--host", "0.0.0.0"], - # check=True, - # ) subprocess.run(["python", "-m", "arxivsearch.main"], check=True) diff --git a/docker-local-redis.yml b/docker-local-redis.yml index 3a179a9..33edc66 100644 --- a/docker-local-redis.yml +++ b/docker-local-redis.yml @@ -21,20 +21,9 @@ services: ports: - "6379:6379" - "8001:8001" - # volumes: - # - redis-vector-db:/data healthcheck: test: ["CMD", "redis-cli", "-h", "localhost", "-p", "6379", "ping"] interval: 2s timeout: 1m30s retries: 5 start_period: 5s - - -# volumes: -# redis-vector-db: -# driver: local -# driver_opts: -# type: none -# device: data -# o: bind diff --git a/frontend/package.json b/frontend/package.json index 73d0d0c..604c307 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,5 +1,5 @@ { - "name": "my-app", + "name": "redis-arXiv-search", "version": "0.1.0", "private": true, "dependencies": {