diff --git a/.DS_Store b/.DS_Store index 3f2329e..2b1e2ca 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.ipynb_checkpoints/app-checkpoint.py b/.ipynb_checkpoints/app-checkpoint.py new file mode 100644 index 0000000..d523071 --- /dev/null +++ b/.ipynb_checkpoints/app-checkpoint.py @@ -0,0 +1,129 @@ +import streamlit as st +import pandas as pd +import numpy as np +from surprise import Dataset, Reader, SVD +import requests +import pickle + +# Set page config +st.set_page_config(page_title='STREAMFLIX', page_icon="🎬", layout='wide') + +# Load your data +@st.cache_data +def load_data(): + df = pd.read_csv('movies_data/movies.csv') + ratings = pd.read_csv('movies_data/ratings.csv') + return df, ratings + +# Train your model +@st.cache_resource +def train_model(ratings): + reader = Reader(rating_scale=(1, 5)) + data = Dataset.load_from_df(ratings[['userId_x', 'movieId', 'rating']], reader) + model = SVD() + model.fit(data.build_full_trainset()) + return model + +# Get recommendations +def get_recommendations(model, df, user_ratings, n=5, genre=None): + new_user_id = df['userId_x'].max() + 1 + movies_to_predict = df[~df['movieId'].isin([x[0] for x in user_ratings])]['movieId'].unique() + + predictions = [] + for movie_id in movies_to_predict: + predicted_rating = model.predict(new_user_id, movie_id).est + predictions.append((movie_id, predicted_rating)) + + recommendations = sorted(predictions, key=lambda x: x[1], reverse=True) + + if genre: + genre_recommendations = [ + (movie_id, rating) for movie_id, rating in recommendations + if genre.lower() in df[df['movieId'] == movie_id]['genres'].iloc[0].lower() + ] + return genre_recommendations[:n] + else: + return recommendations[:n] + +# Fetch movie poster +@st.cache_data +def fetch_poster(movie_id): + url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key=your_api_key" + response = requests.get(url) + data = response.json() + return "https://image.tmdb.org/t/p/w500/" + data.get('poster_path', '') + +# Main app +def main(): + st.title("🎬 Streamflix: Hybrid Movie Recommendation System") + + # Load data + df, ratings = load_data() + model = train_model(ratings) + + # Sidebar + st.sidebar.title('Navigation') + page = st.sidebar.radio('Go to', ['Home', 'Get Recommendations', 'Search Movie']) + + if page == 'Home': + st.header('🔥 Top Trending Movies') + top_movies = df.sort_values('popularity', ascending=False).head(10) + + for _, movie in top_movies.iterrows(): + col1, col2 = st.columns([1, 3]) + with col1: + poster_url = fetch_poster(movie['id']) + st.image(poster_url, width=150) + with col2: + st.subheader(movie['title']) + st.write(f"Genres: {movie['genres']}") + st.write(f"Average Rating: {movie['vote_average']:.1f}/10") + if st.button(f"Rate {movie['title']}", key=f"rate_{movie['id']}"): + rating = st.slider('Your rating', 0.5, 5.0, 3.0, 0.5, key=f"slider_{movie['id']}") + st.write(f"You rated {movie['title']} {rating} stars!") + st.write(''---'') + + elif page == 'Get Recommendations': + st.header('🎯 Get Personalized Recommendations') + user_id = st.number_input('Please enter your user ID', min_value=1, step=1) + genres = st.multiselect('Select genres', df['genres'].explode().unique()) + + if st.button('Get Recommendations'): + recommendations = get_recommendations(user_id, model, df, ratings) + if genres: + recommendations = recommendations[recommendations['genres'].apply(lambda x: any(genre in x for genre in genres))] + + st.subheader('Your Recommended Movies:') + for _, movie in recommendations.iterrows(): + col1, col2 = st.columns([1, 3]) + with col1: + poster_url = fetch_poster(movie['id']) + st.image(poster_url, width=150) + with col2: + st.write(f'**{movie['title']}**') + st.write(f'Genres: {movie['genres']}') + st.write(f'Average Rating: {movie['vote_average']:.1f}/10') + if st.button(f'Watch Trailer for {movie['title']}', key=f'trailer_{movie['id']}'): + # You would need to implement a function to fetch and display the trailer + st.video('https://www.youtube.com/watch?v=dQw4w9WgXcQ') # Placeholder + st.write('---') + + elif page == 'Search Movies': + st.header('🔍 Search Movies') + search_term = st.text_input('Enter a movie title') + if search_term: + results = df[df['title'].str.contains(search_term, case=False)] + for _, movie in results.iterrows(): + col1, col2 = st.columns([1, 3]) + with col1: + poster_url = fetch_poster(movie['id']) + st.image(poster_url, width=150) + with col2: + st.subheader(movie['title']) + st.write(f'Genres: {movie['genres']}') + st.write(f'Average Rating: {movie['vote_average']:.1f}/10') + st.write(f'Overview: {movie['overview'][:200]}...') + st.write('---') + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/README.md b/README.md index c00bb9f..1cb25b8 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,9 @@ TF-IDF vectorization and cosine similarity are then encapsulated within a `Conte The `HybridModel` class integrates collaborative filtering and content-based filtering using a weighted average to provide hybrid recommendations with user ratings guiding the process. After training both models, the system generates and prints a list of recommended movies based on user input. The hybrid model with a collab weight of 0.5 produces an RMSE of `1.25`. Different collaborative filtering weights are tested to determine their impact on the hybrid model's performance. starting from 0.2 up to 0.8, the RMSE values decrease indicating improved prediction accuracy. With a weight of `0.2` the RMSE is `1.2559` and with a weight of `0.4` it slightly improves to `1.2523`. The RMSE significantly drops to `1.1263` with a weight of `0.6` and further decreases to `1.1221` with a weight of `0.8`. This suggests that higher collaborative filtering weight tends to enhance the model's accuracy leading to lower prediction errors and helps in identifying the most effective balance between collaborative and content-based filtering. +### Deployment +Streamlit is a Python library used to create web applications for data science projects. In this case, streamlit is used to build an interactive movie recommendation system. The app uses a collaborative filtering model based on the `SVD` (singular value decomposition) algorithm from the Surprise library trained on user-movie ratings. Key features include displaying trending movies, personalized recommendations, movie search functionality and genre-based browsing. The app fetches movie posters from a TMDB API and attempts to show trailers using a YouTube API. The model demonstrates integration of machine learning models with a user-friendly interface including features like rating movies, receiving personalized recommendations and exploring movies by genre. + ### Conclusion The collaborative filtering model with an RMSE of 0.86 outperforms the hybrid model, which shows higher RMSE, suggesting that emphasizing collaborative filtering in a hybrid approach yields better accuracy and recommendation quality. @@ -59,4 +62,52 @@ The collaborative filtering model with an RMSE of 0.86 outperforms the hybrid mo 4. **Explore Advanced Techniques**: Integrate deep learning-based models and other advanced methods to further enhance the system's capabilities and address remaining limitations. -5. **Incorporate User Feedback and Regular Updates**: Integrate user feedback and conduct real-world testing to validate the model's effectiveness ensuring it meets user preferences and expectations. Regularly update the recommendation system with new data and metrics to adapt to evolving user preferences and content. \ No newline at end of file +5. **Incorporate User Feedback and Regular Updates**: Integrate user feedback and conduct real-world testing to validate the model's effectiveness ensuring it meets user preferences and expectations. Regularly update the recommendation system with new data and metrics to adapt to evolving user preferences and content. + +### Installation and Setup + +**Clone the repository** + +**https:** +``` +git clone https://github.com/Eva-Claire/streamflix_recommender_system.git +``` +**ssh:** +``` +git@github.com:Eva-Claire/streamflix_recommender_system.git +``` +**Navigate to the project directory** + +``` +cd streamflix_recommender_system.git +``` +**Install dependencies** +``` +pip install -r requirements.txt +``` +**Execute the app on Streamlit** +``` +streamlit run app.py +``` + +### Repository Setup + +``` +streamflix_movie_recommendation_system/ +│ +├── Cover Page/ +│ ├── Evaclaire M. +│ └── Simon M. +│ +└── Project Files/ + ├── .ipynb_checkpoints/ + ├── modelling_data/ + ├── movies_data/ + ├── project_images/ + ├── .DS_Store + ├── README.md + ├── Streamflix_data_report.docx + ├── app.py + ├── logo.png + └── movie_recommendor.ipynb +``` diff --git a/Recommender_system.pdf b/Recommender_system.pdf new file mode 100644 index 0000000..7500194 Binary files /dev/null and b/Recommender_system.pdf differ diff --git a/Recommender_system.pptx b/Recommender_system.pptx new file mode 100644 index 0000000..a819ef0 Binary files /dev/null and b/Recommender_system.pptx differ diff --git a/pickle_files/collaborative_model1.pkl b/pickle_files/collaborative_model1.pkl new file mode 100644 index 0000000..af9df51 Binary files /dev/null and b/pickle_files/collaborative_model1.pkl differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4f16b1e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +streamlit==1.37.0 +pandas==2.2.2 +numpy==1.26.4 +requests==2.32.3 +google-api-python-client==2.138.0 +scikit-learn==1.5.1 +scipy==1.13.1 +scikit-surprise==1.1.1 +google-auth==2.32.0 +google-auth-httplib2==0.2.0 +google-auth-oauthlib==0.7.2 +httplib2==0.22.0 +python-dotenv==1.0.0 + diff --git a/~$reamflix_data_report.docx b/~$reamflix_data_report.docx new file mode 100644 index 0000000..1215360 Binary files /dev/null and b/~$reamflix_data_report.docx differ