Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Book recommender #127

Merged
merged 18 commits into from
Mar 1, 2024
18 changes: 18 additions & 0 deletions examples/panel/book-recommender/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Book Recommender

To run this example you need to set the `OPENAI_API_KEY` environment variable.

```bash
export OPENAI_API_KEY=<your_api_key>
```

Create a zip from all the files and follow the instructions for deploying a [Panel](https://docs.cloud.ploomber.io/en/latest/apps/panel.html) application.
You also need to set `OPENAI_API_KEY` as an [environment variable](https://docs.cloud.ploomber.io/en/latest/user-guide/env-vars.html) while deploying the application.

To re-generate embeddings run the below command:

```bash
python rag_book_recommender.py
```

Copy the `embeddings.json` file to the `assets/` folder.
132 changes: 132 additions & 0 deletions examples/panel/book-recommender/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""
Demonstrates how to use the `ChatInterface` and a `callback` function to respond.

The chatbot Assistant echoes back the message entered by the User.

https://github.com/holoviz-topics/panel-chat-examples/blob/main/docs/examples/basics/basic_chat.py
"""
edublancas marked this conversation as resolved.
Show resolved Hide resolved

import panel as pn
from openai import OpenAI
from scipy.spatial import KDTree
import numpy as np

from rag_book_recommender import get_book_description_by_title, EmbeddingsStore, get_authors, get_embeddings


client = OpenAI()

pn.extension()

store = EmbeddingsStore()
all_authors = get_authors()
edublancas marked this conversation as resolved.
Show resolved Hide resolved


def detect_author(user_query):
system_prompt = f"""
You're a system that determines the author in user query.

You need to return only he author name.Please fix any typo if possible
"""
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": "What are some books by Sandra Boynton"},
{"role": "system", "content": "Sandra Boynton"},
{"role": "user", "content": user_query},
],
seed=42,
n=1,
)
author = response.choices[0].message.content.upper()
return author if author in all_authors else ""


def book_recommender_agent(user_query, verbose=False):
"""An agent that can retrieve news by topic and summarizes them"""
edublancas marked this conversation as resolved.
Show resolved Hide resolved
# determine the topic based on the query
embeddings_json = get_embeddings()
author = detect_author(user_query)
edublancas marked this conversation as resolved.
Show resolved Hide resolved
titles = []
if author:
titles = all_authors[author]
if verbose:
print(f"Found these titles: {titles} by author: {author}")

filtered_embeddings_by_title = {}
for title in titles:
title_embedding = embeddings_json.get(title, None)
if title_embedding:
filtered_embeddings_by_title[title] = title_embedding
if filtered_embeddings_by_title:
embeddings_json = filtered_embeddings_by_title

titles = []
embeddings = []
for key, value in embeddings_json.items():
if value:
titles.append(key)
embeddings.append(value)
kdtree = KDTree(np.array(embeddings))
_, indexes = kdtree.query(store.get_one(user_query), k=min(len(titles), 3))

print(type(indexes))
edublancas marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(indexes, np.int64):
indexes = [indexes]
titles_relevant = [titles[i] for i in indexes if titles[i] != "null"]
print(titles_relevant)
descriptions_relevant = [get_book_description_by_title(title) for title in titles_relevant]

recommendation_text = ""
for i, value in enumerate(titles_relevant):
recommendation_text = f"{recommendation_text}{value}: {descriptions_relevant[i]}\n\n##"

system_prompt = f"""
You are a helpful book recommendation system that can recommend users books based on their inputs.

Here are the top relevant titles and descriptions (separated by ##) in the format titles: descriptions,
use these to generate your answer,
and disregard books that are not relevant to user's input. You can display 5 or less recommendations.:

{recommendation_text}

You should also create a summary of the description and format the answer properly.
You can display a maximum of 5 recommendations.
Please do not suggest any books outside this list.
"""

if verbose:
print(f"System prompt: {system_prompt}")

response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_query},
],
seed=42,
n=1,
)

return response.choices[0].message.content


def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
return book_recommender_agent(contents)


chat_interface = pn.chat.ChatInterface(callback=callback)
chat_interface.send(
"I am a book recommendation engine! "
"You may ask questions like: \n* Recommend books by Dan Brown.\n"
"* Suggest some books based in the Victorian era.\n\n"
"You can deploy your own by signing up at https://ploomber.io",
user="System",
respond=False,
)

pn.template.MaterialTemplate(
title="Book Recommender",
main=[chat_interface],
).servable()
edublancas marked this conversation as resolved.
Show resolved Hide resolved
Binary file not shown.
edublancas marked this conversation as resolved.
Show resolved Hide resolved
Binary file not shown.
89 changes: 89 additions & 0 deletions examples/panel/book-recommender/rag_book_recommender.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
edublancas marked this conversation as resolved.
Show resolved Hide resolved
import json
import pickle
import pandas as pd

from pathlib import Path
from openai import OpenAI

client = OpenAI()

file_path = os.path.join('assets', 'goodreads.csv')
edublancas marked this conversation as resolved.
Show resolved Hide resolved
df = pd.read_csv(file_path)


file_path = os.path.join('assets', 'title_to_description.pkl')
edublancas marked this conversation as resolved.
Show resolved Hide resolved
with open(file_path, 'rb') as file:
DESCRIPTIONS = pickle.load(file)


def get_authors():
file_path = os.path.join('assets', 'author_to_title.pkl')
edublancas marked this conversation as resolved.
Show resolved Hide resolved
with open(file_path, 'rb') as file:
authors = pickle.load(file)
return authors


def get_embeddings():
edublancas marked this conversation as resolved.
Show resolved Hide resolved
file_path = os.path.join('assets', 'embeddings.json')
with open(file_path, "r", encoding="utf-8") as file:
embeddings_json = json.load(file)
return embeddings_json


def get_book_description_by_title(title):
return DESCRIPTIONS[title.upper()]


class EmbeddingsStore:
def __init__(self):
self._path = Path("embeddings_another.json")
edublancas marked this conversation as resolved.
Show resolved Hide resolved

if not self._path.exists():
self._data = {}
else:
self._data = json.loads(self._path.read_text())

def get_one(self, text, title=None):
if text in self._data:
return self._data[text]
edublancas marked this conversation as resolved.
Show resolved Hide resolved

try:
response = client.embeddings.create(input=text, model="text-embedding-3-small")

embedding = response.data[0].embedding

self._data[title] = embedding
self._path.write_text(json.dumps(self._data))

return embedding
except Exception:
self._data[title] = []
self._path.write_text(json.dumps(self._data))
return None

def get_many(self, content, title):
return [self.get_one(text, title) for text in content]

def __len__(self):
return len(self._data)

def clear(self):
if self._path.exists():
self._path.unlink()
self._data = {}


def compute_embeddings():
store = EmbeddingsStore()
store.clear()

for index, row in df.iterrows():
edublancas marked this conversation as resolved.
Show resolved Hide resolved
print(f"Index: {index}")
store.get_one(row["description"], row["title"])


if __name__ == "__main__":

# compute embeddings
compute_embeddings()
4 changes: 4 additions & 0 deletions examples/panel/book-recommender/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
panel
edublancas marked this conversation as resolved.
Show resolved Hide resolved
openai
scipy
numpy