-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmovie_recs.py
41 lines (32 loc) · 1.27 KB
/
movie_recs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pymongo
import requests
import os
from dotenv import load_dotenv
client = pymongo.MongoClient(os.getenv('CONNECTION_STRING'))
db = client.sample_mflix
collection = db.movies
hf_token = os.getenv('HF_TOKEN')
embedding_url = "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2"
def generate_embedding(text: str) -> list[float]:
response = requests.post(
embedding_url,
headers={"Authorization": f"Bearer {hf_token}"},
json={"inputs": text})
if response.status_code != 200:
raise ValueError(f"Request failed with status code {response.status_code}: {response.text}")
return response.json()
# for doc in collection.find({'plot':{"$exists": True}}).limit(50):
# doc['plot_embedding_hf'] = generate_embedding(doc['plot'])
# collection.replace_one({'_id': doc['_id']}, doc)
query = "imaginary characters from outer space at war"
results = collection.aggregate([
{"$vectorSearch": {
"queryVector": generate_embedding(query),
"path": "plot_embedding_hf",
"numCandidates": 100,
"limit": 4,
"index": "PlotSemanticSearch",
}}
]);
for document in results:
print(f'Movie Name: {document["title"]},\nMovie Plot: {document["plot"]}\n')