Skip to content

Commit

Permalink
Saving embeddings as array for easier loading (#97) -deploy
Browse files Browse the repository at this point in the history
  • Loading branch information
krugergui authored Oct 29, 2023
1 parent b812780 commit 90f7be4
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
8 changes: 7 additions & 1 deletion src/backend/api/matching_algorithm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from typing import List
import torch
from sklearn.metrics.pairwise import cosine_similarity
Expand All @@ -22,11 +23,16 @@ def get_free_text_match(
float: a number from -1 to 1 indicating cosine similarity
"""


if (candidate_embeddings) == None or (job_embeddings) == None:
return 0

if type(candidate_embeddings) == str or type(job_embeddings) == str:
if candidate_embeddings == "" or type(job_embeddings) == "":
return 0

candidate_embeddings = json.loads(candidate_embeddings)
job_embeddings = json.loads(job_embeddings)

return cosine_similarity(candidate_embeddings, job_embeddings)[0][0]

Expand Down
3 changes: 2 additions & 1 deletion src/backend/api/tokenization_n_embedding.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import torch
from typing import List
from transformers import AutoTokenizer, AutoModel
Expand Down Expand Up @@ -43,7 +44,7 @@ def generate_embeddings(text: str, model_name: str=MODEL_NAME) -> List[List]:
with torch.no_grad():
text_outputs = model(**text_tokens)
text_embeddings = text_outputs.last_hidden_state.mean(dim=1)
return text_embeddings
return json.dumps(text_embeddings.tolist())

if __name__=="__main__":
s = ""
Expand Down

0 comments on commit 90f7be4

Please sign in to comment.