Skip to content

Commit

Permalink
Add tracking files
Browse files Browse the repository at this point in the history
  • Loading branch information
Eva-Claire committed Jul 31, 2024
1 parent a72da58 commit c3584f2
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 76 deletions.
Binary file modified .DS_Store
Binary file not shown.
270 changes: 194 additions & 76 deletions .ipynb_checkpoints/app-checkpoint.py
Original file line number Diff line number Diff line change
@@ -1,129 +1,247 @@
import os
import streamlit as st
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
import requests
import pickle
import requests
from googleapiclient.discovery import build
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Set page config
st.set_page_config(page_title='STREAMFLIX', page_icon="🎬", layout='wide')

# Load your data
# Load data and model
@st.cache_data
def load_data():
df = pd.read_csv('movies_data/movies.csv')
ratings = pd.read_csv('movies_data/ratings.csv')
return df, ratings

# Train your model
@st.cache_resource
def train_model(ratings):
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId_x', 'movieId', 'rating']], reader)
model = SVD()
model.fit(data.build_full_trainset())
return model

# Get recommendations
def get_recommendations(model, df, user_ratings, n=5, genre=None):
new_user_id = df['userId_x'].max() + 1
movies_to_predict = df[~df['movieId'].isin([x[0] for x in user_ratings])]['movieId'].unique()

predictions = []
for movie_id in movies_to_predict:
predicted_rating = model.predict(new_user_id, movie_id).est
predictions.append((movie_id, predicted_rating))

recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)

if genre:
genre_recommendations = [
(movie_id, rating) for movie_id, rating in recommendations
if genre.lower() in df[df['movieId'] == movie_id]['genres'].iloc[0].lower()
]
return genre_recommendations[:n]
else:
def load_data_and_model():
try:
collab_df = pd.read_csv('modelling_data/collab_movies.csv')
content_df = pd.read_csv('modelling_data/content_movies.csv')
merged_df = pd.merge(collab_df, content_df, on='movieId').drop_duplicates(subset=['movieId'])
with open('pickle_files/collaborative_model1.pkl', 'rb') as f:
collab_model = pickle.load(f)
return merged_df, collab_model
except Exception as e:
st.error(f"Error loading data and model: {e}")
return None, None

merged_df, collab_model = load_data_and_model()

class CollabBasedModel:
def __init__(self, collab_df, model):
self.df = collab_df
self.model = model

def get_recommendations(self, user_ratings, n=5):
new_user_id = self.df['user_id'].max() + 1
# Create a DataFrame for new user ratings
new_ratings_df = pd.DataFrame(user_ratings, columns=['movieId', 'rating'])
new_ratings_df['user_id'] = new_user_id

# Append new user ratings to the dataset
self.df = pd.concat([self.df, new_ratings_df[['user_id', 'movieId', 'rating']]], ignore_index=True)

# Train the model with the updated dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(self.df[['user_id', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()
self.model.fit(trainset)

# Get recommendations
movies_to_predict = self.df[~self.df['movieId'].isin([x[0] for x in user_ratings])]['movieId'].unique()
predictions = [(movie_id, self.model.predict(new_user_id, movie_id).est) for movie_id in movies_to_predict]
recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)
return recommendations[:n]

# Fetch movie poster
@st.cache_data
def fetch_poster(movie_id):
url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key=your_api_key"
response = requests.get(url)
data = response.json()
return "https://image.tmdb.org/t/p/w500/" + data.get('poster_path', '')
def fetch_poster(movie_title):
try:
tmdb_api_key = os.getenv('TMDB_API_KEY')
url = f"https://api.themoviedb.org/3/search/movie?api_key={tmdb_api_key}&query={movie_title}"
response = requests.get(url)
data = response.json()
if data['results']:
poster_path = data['results'][0].get('poster_path', '')
return "https://image.tmdb.org/t/p/w500/" + poster_path if poster_path else "https://via.placeholder.com/500x750.png?text=No+Poster+Available"
else:
return "https://via.placeholder.com/500x750.png?text=No+Poster+Available"
except Exception as e:
st.warning(f"Error fetching poster for movie {movie_title}: {e}")
return "https://via.placeholder.com/500x750.png?text=No+Poster+Available"

# Get trailer URL
@st.cache_data
def get_trailer_url(movie_title):
try:
youtube_api_key = os.getenv('YOUTUBE_API_KEY')
youtube = build('youtube', 'v3', developerKey=youtube_api_key)

# Search for the movie trailer
search_response = youtube.search().list(
q=f"{movie_title} official trailer",
type='video',
part='id,snippet',
maxResults=1
).execute()

# Get the first search result
if search_response['items']:
video_id = search_response['items'][0]['id']['videoId']
return f"https://www.youtube.com/watch?v={video_id}"
else:
return None
except Exception as e:
st.warning(f"Error fetching trailer for {movie_title}: {e}")
return None

# Main app
def main():
st.title("🎬 Streamflix: Hybrid Movie Recommendation System")

# Load data
df, ratings = load_data()
model = train_model(ratings)
st.title("🎬 Streamflix Movie Recommendation System")

# Sidebar
st.sidebar.title('Navigation')
page = st.sidebar.radio('Go to', ['Home', 'Get Recommendations', 'Search Movie'])
page = st.sidebar.radio('Go to', ['Home', 'Get Recommendations', 'Search Movies', 'About'])

if page == 'Home':
st.header('🔥 Top Trending Movies')
top_movies = df.sort_values('popularity', ascending=False).head(10)

for _, movie in top_movies.iterrows():
top_movies = merged_df['movieId'].value_counts().head(20).index
for movie_id in top_movies:
movie = merged_df[merged_df['movieId'] == movie_id].iloc[0]
col1, col2 = st.columns([1, 3])
with col1:
poster_url = fetch_poster(movie['id'])
poster_url = fetch_poster(movie['title'])
st.image(poster_url, width=150)
with col2:
st.subheader(movie['title'])
st.write(f"Genres: {movie['genres']}")
st.write(f"Average Rating: {movie['vote_average']:.1f}/10")
if st.button(f"Rate {movie['title']}", key=f"rate_{movie['id']}"):
rating = st.slider('Your rating', 0.5, 5.0, 3.0, 0.5, key=f"slider_{movie['id']}")
st.write(f"Release Year: {movie['release_year']}")
if st.button(f"Rate {movie['title']}", key=f"rate_{movie_id}"):
rating = st.number_input('Your rating', min_value=0.5, max_value=5.0, value=3.0, step=0.5, key=f"slider_{movie_id}")
st.write(f"You rated {movie['title']} {rating} stars!")
st.write(''---'')
st.write('---')

elif page == 'Get Recommendations':
st.header('🎯 Get Personalized Recommendations')
user_id = st.number_input('Please enter your user ID', min_value=1, step=1)
genres = st.multiselect('Select genres', df['genres'].explode().unique())

num_ratings = 6
num_recommendations = 5

# Initialize session state for user ratings and sampled movies
if 'user_ratings' not in st.session_state:
st.session_state.user_ratings = [3.0] * num_ratings # Use float here

if 'sampled_movies' not in st.session_state:
st.session_state.sampled_movies = merged_df.sample(num_ratings).reset_index(drop=True)

# Show all six movies for rating
for i in range(num_ratings):
movie = st.session_state.sampled_movies.iloc[i]
st.write(f"\nMovie: {movie['title']} ({movie['release_year']})")
st.write(f"Genre: {movie['genres']}")
st.session_state.user_ratings[i] = st.number_input(
f"Rate {movie['title']}",
min_value=0.5, # Float
max_value=5.0, # Float
value=float(st.session_state.user_ratings[i]), # Convert to float
step=0.5, # Float
key=f"rating_{movie['movieId']}"
)

if st.button('Get Recommendations'):
recommendations = get_recommendations(user_id, model, df, ratings)
if genres:
recommendations = recommendations[recommendations['genres'].apply(lambda x: any(genre in x for genre in genres))]
model = CollabBasedModel(merged_df, collab_model)
user_ratings = [(st.session_state.sampled_movies.iloc[i]['movieId'], st.session_state.user_ratings[i]) for i in range(num_ratings)]
recommendations = model.get_recommendations(user_ratings, n=num_recommendations)

st.subheader('Your Recommended Movies:')
for _, movie in recommendations.iterrows():
for movie_id, score in recommendations:
movie = merged_df[merged_df['movieId'] == movie_id].iloc[0]
col1, col2 = st.columns([1, 3])
with col1:
poster_url = fetch_poster(movie['id'])
poster_url = fetch_poster(movie['title'])
st.image(poster_url, width=150)
with col2:
st.write(f'**{movie['title']}**')
st.write(f'Genres: {movie['genres']}')
st.write(f'Average Rating: {movie['vote_average']:.1f}/10')
if st.button(f'Watch Trailer for {movie['title']}', key=f'trailer_{movie['id']}'):
# You would need to implement a function to fetch and display the trailer
st.video('https://www.youtube.com/watch?v=dQw4w9WgXcQ') # Placeholder
st.write(f"**{movie['title']}**")
st.write(f"Genres: {movie['genres']}")
st.write(f"Predicted Rating: {score:.2f}")
trailer_url = get_trailer_url(movie['title'])
if trailer_url:
st.write(f"[Watch Trailer]({trailer_url})")
else:
st.write("Sorry, couldn't find a trailer for this movie.")
st.write('---')

elif page == 'Search Movies':
st.header('🔍 Search Movies')
search_term = st.text_input('Enter a movie title')
if search_term:
results = df[df['title'].str.contains(search_term, case=False)]
for _, movie in results.iterrows():
results = merged_df[merged_df['title'].str.contains(search_term, case=False)]
if results.empty:
st.write("No movies found matching your search term.")
else:
for _, movie in results.iterrows():
col1, col2 = st.columns([1, 3])
with col1:
poster_url = fetch_poster(movie['title'])
st.image(poster_url, width=150)
with col2:
st.subheader(movie['title'])
st.write(f"Genres: {movie['genres']}")
st.write(f"Release Year: {movie['release_year']}")
trailer_url = get_trailer_url(movie['title'])
if trailer_url:
st.write(f"[Watch Trailer]({trailer_url})")
else:
st.write("Sorry, couldn't find a trailer for this movie.")
st.write('---')

st.subheader('Browse by Genre')
genres = merged_df['genres'].str.get_dummies(sep=',').columns.tolist()
genres.insert(0, 'All') # Add 'All' option to the list
selected_genre = st.selectbox('Select a Genre', genres)

if selected_genre:
st.subheader(f'Top 10 {selected_genre.capitalize()} Movies')
if selected_genre == 'All':
genre_results = merged_df.nlargest(10, 'rating')
else:
genre_results = merged_df[merged_df['genres'].str.contains(selected_genre)].nlargest(10, 'rating')
for _, movie in genre_results.iterrows():
col1, col2 = st.columns([1, 3])
with col1:
poster_url = fetch_poster(movie['id'])
poster_url = fetch_poster(movie['title'])
st.image(poster_url, width=150)
with col2:
st.subheader(movie['title'])
st.write(f'Genres: {movie['genres']}')
st.write(f'Average Rating: {movie['vote_average']:.1f}/10')
st.write(f'Overview: {movie['overview'][:200]}...')
st.write(f"Genres: {movie['genres']}")
st.write(f"Release Year: {movie['release_year']}")
trailer_url = get_trailer_url(movie['title'])
if trailer_url:
st.write(f"[Watch Trailer]({trailer_url})")
else:
st.write("Sorry, couldn't find a trailer for this movie.")
st.write('---')

elif page == 'About':
st.header('📚 About Streamflix')
st.write(
"Streamflix is a movie recommendation system that utilizes collaborative filtering methods "
"to provide personalized movie suggestions. Our system uses your ratings and movie genres to recommend "
"movies you might enjoy. Explore top trending movies, get personalized recommendations and search for "
"your favorite films all in one place.")

st.subheader('Developers')
st.write(
"- **Evaclaire Wamitu**\n"
" - [GitHub](https://github.com/Eva-Claire)\n"
" - Email: [[email protected]](mailto:[email protected])\n\n"
"- **Simon Makumi**\n"
" - [GitHub](https://github.com/simonMakumi)\n"
" - Email: [[email protected]](mailto:[email protected])"
)

if __name__ == '__main__':
main()

0 comments on commit c3584f2

Please sign in to comment.