From 6df32301c77df519a1fa0fca044a184d5e8ba343 Mon Sep 17 00:00:00 2001 From: Simon Makumi Date: Wed, 31 Jul 2024 09:58:47 +0300 Subject: [PATCH] Modifying app.py --- app.py | 270 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 194 insertions(+), 76 deletions(-) diff --git a/app.py b/app.py index d523071..c83d023 100644 --- a/app.py +++ b/app.py @@ -1,129 +1,247 @@ +import os import streamlit as st import pandas as pd import numpy as np -from surprise import Dataset, Reader, SVD -import requests import pickle +import requests +from googleapiclient.discovery import build +from surprise import Reader, Dataset, SVD +from surprise.model_selection import train_test_split +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() # Set page config st.set_page_config(page_title='STREAMFLIX', page_icon="🎬", layout='wide') -# Load your data +# Load data and model @st.cache_data -def load_data(): - df = pd.read_csv('movies_data/movies.csv') - ratings = pd.read_csv('movies_data/ratings.csv') - return df, ratings - -# Train your model -@st.cache_resource -def train_model(ratings): - reader = Reader(rating_scale=(1, 5)) - data = Dataset.load_from_df(ratings[['userId_x', 'movieId', 'rating']], reader) - model = SVD() - model.fit(data.build_full_trainset()) - return model - -# Get recommendations -def get_recommendations(model, df, user_ratings, n=5, genre=None): - new_user_id = df['userId_x'].max() + 1 - movies_to_predict = df[~df['movieId'].isin([x[0] for x in user_ratings])]['movieId'].unique() - - predictions = [] - for movie_id in movies_to_predict: - predicted_rating = model.predict(new_user_id, movie_id).est - predictions.append((movie_id, predicted_rating)) - - recommendations = sorted(predictions, key=lambda x: x[1], reverse=True) - - if genre: - genre_recommendations = [ - (movie_id, rating) for movie_id, rating in recommendations - if genre.lower() in df[df['movieId'] == movie_id]['genres'].iloc[0].lower() - ] - return genre_recommendations[:n] - else: +def load_data_and_model(): + try: + collab_df = pd.read_csv('modelling_data/collab_movies.csv') + content_df = pd.read_csv('modelling_data/content_movies.csv') + merged_df = pd.merge(collab_df, content_df, on='movieId').drop_duplicates(subset=['movieId']) + with open('pickle_files/collaborative_model1.pkl', 'rb') as f: + collab_model = pickle.load(f) + return merged_df, collab_model + except Exception as e: + st.error(f"Error loading data and model: {e}") + return None, None + +merged_df, collab_model = load_data_and_model() + +class CollabBasedModel: + def __init__(self, collab_df, model): + self.df = collab_df + self.model = model + + def get_recommendations(self, user_ratings, n=5): + new_user_id = self.df['user_id'].max() + 1 + # Create a DataFrame for new user ratings + new_ratings_df = pd.DataFrame(user_ratings, columns=['movieId', 'rating']) + new_ratings_df['user_id'] = new_user_id + + # Append new user ratings to the dataset + self.df = pd.concat([self.df, new_ratings_df[['user_id', 'movieId', 'rating']]], ignore_index=True) + + # Train the model with the updated dataset + reader = Reader(rating_scale=(1, 5)) + data = Dataset.load_from_df(self.df[['user_id', 'movieId', 'rating']], reader) + trainset = data.build_full_trainset() + self.model.fit(trainset) + + # Get recommendations + movies_to_predict = self.df[~self.df['movieId'].isin([x[0] for x in user_ratings])]['movieId'].unique() + predictions = [(movie_id, self.model.predict(new_user_id, movie_id).est) for movie_id in movies_to_predict] + recommendations = sorted(predictions, key=lambda x: x[1], reverse=True) return recommendations[:n] # Fetch movie poster @st.cache_data -def fetch_poster(movie_id): - url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key=your_api_key" - response = requests.get(url) - data = response.json() - return "https://image.tmdb.org/t/p/w500/" + data.get('poster_path', '') +def fetch_poster(movie_title): + try: + tmdb_api_key = os.getenv('TMDB_API_KEY') + url = f"https://api.themoviedb.org/3/search/movie?api_key={tmdb_api_key}&query={movie_title}" + response = requests.get(url) + data = response.json() + if data['results']: + poster_path = data['results'][0].get('poster_path', '') + return "https://image.tmdb.org/t/p/w500/" + poster_path if poster_path else "https://via.placeholder.com/500x750.png?text=No+Poster+Available" + else: + return "https://via.placeholder.com/500x750.png?text=No+Poster+Available" + except Exception as e: + st.warning(f"Error fetching poster for movie {movie_title}: {e}") + return "https://via.placeholder.com/500x750.png?text=No+Poster+Available" + +# Get trailer URL +@st.cache_data +def get_trailer_url(movie_title): + try: + youtube_api_key = os.getenv('YOUTUBE_API_KEY') + youtube = build('youtube', 'v3', developerKey=youtube_api_key) + + # Search for the movie trailer + search_response = youtube.search().list( + q=f"{movie_title} official trailer", + type='video', + part='id,snippet', + maxResults=1 + ).execute() + + # Get the first search result + if search_response['items']: + video_id = search_response['items'][0]['id']['videoId'] + return f"https://www.youtube.com/watch?v={video_id}" + else: + return None + except Exception as e: + st.warning(f"Error fetching trailer for {movie_title}: {e}") + return None # Main app def main(): - st.title("🎬 Streamflix: Hybrid Movie Recommendation System") - - # Load data - df, ratings = load_data() - model = train_model(ratings) + st.title("🎬 Streamflix Movie Recommendation System") # Sidebar st.sidebar.title('Navigation') - page = st.sidebar.radio('Go to', ['Home', 'Get Recommendations', 'Search Movie']) + page = st.sidebar.radio('Go to', ['Home', 'Get Recommendations', 'Search Movies', 'About']) if page == 'Home': st.header('🔥 Top Trending Movies') - top_movies = df.sort_values('popularity', ascending=False).head(10) - - for _, movie in top_movies.iterrows(): + top_movies = merged_df['movieId'].value_counts().head(20).index + for movie_id in top_movies: + movie = merged_df[merged_df['movieId'] == movie_id].iloc[0] col1, col2 = st.columns([1, 3]) with col1: - poster_url = fetch_poster(movie['id']) + poster_url = fetch_poster(movie['title']) st.image(poster_url, width=150) with col2: st.subheader(movie['title']) st.write(f"Genres: {movie['genres']}") - st.write(f"Average Rating: {movie['vote_average']:.1f}/10") - if st.button(f"Rate {movie['title']}", key=f"rate_{movie['id']}"): - rating = st.slider('Your rating', 0.5, 5.0, 3.0, 0.5, key=f"slider_{movie['id']}") + st.write(f"Release Year: {movie['release_year']}") + if st.button(f"Rate {movie['title']}", key=f"rate_{movie_id}"): + rating = st.number_input('Your rating', min_value=0.5, max_value=5.0, value=3.0, step=0.5, key=f"slider_{movie_id}") st.write(f"You rated {movie['title']} {rating} stars!") - st.write(''---'') + st.write('---') elif page == 'Get Recommendations': st.header('🎯 Get Personalized Recommendations') - user_id = st.number_input('Please enter your user ID', min_value=1, step=1) - genres = st.multiselect('Select genres', df['genres'].explode().unique()) - + num_ratings = 6 + num_recommendations = 5 + + # Initialize session state for user ratings and sampled movies + if 'user_ratings' not in st.session_state: + st.session_state.user_ratings = [3.0] * num_ratings # Use float here + + if 'sampled_movies' not in st.session_state: + st.session_state.sampled_movies = merged_df.sample(num_ratings).reset_index(drop=True) + + # Show all six movies for rating + for i in range(num_ratings): + movie = st.session_state.sampled_movies.iloc[i] + st.write(f"\nMovie: {movie['title']} ({movie['release_year']})") + st.write(f"Genre: {movie['genres']}") + st.session_state.user_ratings[i] = st.number_input( + f"Rate {movie['title']}", + min_value=0.5, # Float + max_value=5.0, # Float + value=float(st.session_state.user_ratings[i]), # Convert to float + step=0.5, # Float + key=f"rating_{movie['movieId']}" + ) + if st.button('Get Recommendations'): - recommendations = get_recommendations(user_id, model, df, ratings) - if genres: - recommendations = recommendations[recommendations['genres'].apply(lambda x: any(genre in x for genre in genres))] - + model = CollabBasedModel(merged_df, collab_model) + user_ratings = [(st.session_state.sampled_movies.iloc[i]['movieId'], st.session_state.user_ratings[i]) for i in range(num_ratings)] + recommendations = model.get_recommendations(user_ratings, n=num_recommendations) + st.subheader('Your Recommended Movies:') - for _, movie in recommendations.iterrows(): + for movie_id, score in recommendations: + movie = merged_df[merged_df['movieId'] == movie_id].iloc[0] col1, col2 = st.columns([1, 3]) with col1: - poster_url = fetch_poster(movie['id']) + poster_url = fetch_poster(movie['title']) st.image(poster_url, width=150) with col2: - st.write(f'**{movie['title']}**') - st.write(f'Genres: {movie['genres']}') - st.write(f'Average Rating: {movie['vote_average']:.1f}/10') - if st.button(f'Watch Trailer for {movie['title']}', key=f'trailer_{movie['id']}'): - # You would need to implement a function to fetch and display the trailer - st.video('https://www.youtube.com/watch?v=dQw4w9WgXcQ') # Placeholder + st.write(f"**{movie['title']}**") + st.write(f"Genres: {movie['genres']}") + st.write(f"Predicted Rating: {score:.2f}") + trailer_url = get_trailer_url(movie['title']) + if trailer_url: + st.write(f"[Watch Trailer]({trailer_url})") + else: + st.write("Sorry, couldn't find a trailer for this movie.") st.write('---') elif page == 'Search Movies': st.header('🔍 Search Movies') search_term = st.text_input('Enter a movie title') if search_term: - results = df[df['title'].str.contains(search_term, case=False)] - for _, movie in results.iterrows(): + results = merged_df[merged_df['title'].str.contains(search_term, case=False)] + if results.empty: + st.write("No movies found matching your search term.") + else: + for _, movie in results.iterrows(): + col1, col2 = st.columns([1, 3]) + with col1: + poster_url = fetch_poster(movie['title']) + st.image(poster_url, width=150) + with col2: + st.subheader(movie['title']) + st.write(f"Genres: {movie['genres']}") + st.write(f"Release Year: {movie['release_year']}") + trailer_url = get_trailer_url(movie['title']) + if trailer_url: + st.write(f"[Watch Trailer]({trailer_url})") + else: + st.write("Sorry, couldn't find a trailer for this movie.") + st.write('---') + + st.subheader('Browse by Genre') + genres = merged_df['genres'].str.get_dummies(sep=',').columns.tolist() + genres.insert(0, 'All') # Add 'All' option to the list + selected_genre = st.selectbox('Select a Genre', genres) + + if selected_genre: + st.subheader(f'Top 10 {selected_genre.capitalize()} Movies') + if selected_genre == 'All': + genre_results = merged_df.nlargest(10, 'rating') + else: + genre_results = merged_df[merged_df['genres'].str.contains(selected_genre)].nlargest(10, 'rating') + for _, movie in genre_results.iterrows(): col1, col2 = st.columns([1, 3]) with col1: - poster_url = fetch_poster(movie['id']) + poster_url = fetch_poster(movie['title']) st.image(poster_url, width=150) with col2: st.subheader(movie['title']) - st.write(f'Genres: {movie['genres']}') - st.write(f'Average Rating: {movie['vote_average']:.1f}/10') - st.write(f'Overview: {movie['overview'][:200]}...') + st.write(f"Genres: {movie['genres']}") + st.write(f"Release Year: {movie['release_year']}") + trailer_url = get_trailer_url(movie['title']) + if trailer_url: + st.write(f"[Watch Trailer]({trailer_url})") + else: + st.write("Sorry, couldn't find a trailer for this movie.") st.write('---') + elif page == 'About': + st.header('📚 About Streamflix') + st.write( + "Streamflix is a movie recommendation system that combines collaborative filtering and content-based methods " + "to provide personalized movie suggestions. Our system uses your ratings and movie genres to recommend " + "movies you might enjoy. Explore top trending movies, get personalized recommendations, and search for " + "your favorite films all in one place.") + + st.subheader('Developers') + st.write( + "- **Evaclaire Wamitu**\n" + " - [GitHub](https://github.com/Eva-Claire)\n" + " - Email: [evamunyika@gmail.com](mailto:evamunyika@gmail.com)\n\n" + "- **Simon Makumi**\n" + " - [GitHub](https://github.com/simonMakumi)\n" + " - Email: [simonmakumi5@gmail.com](mailto:simonmakumi5@gmail.com)" + ) + if __name__ == '__main__': main() \ No newline at end of file