forked from Eva-Claire/streamflix_recommender_system
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request Eva-Claire#3 from Eva-Claire/claire_branch
Claire branch
- Loading branch information
Showing
8 changed files
with
195 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import streamlit as st | ||
import pandas as pd | ||
import numpy as np | ||
from surprise import Dataset, Reader, SVD | ||
import requests | ||
import pickle | ||
|
||
# Set page config | ||
st.set_page_config(page_title='STREAMFLIX', page_icon="🎬", layout='wide') | ||
|
||
# Load your data | ||
@st.cache_data | ||
def load_data(): | ||
df = pd.read_csv('movies_data/movies.csv') | ||
ratings = pd.read_csv('movies_data/ratings.csv') | ||
return df, ratings | ||
|
||
# Train your model | ||
@st.cache_resource | ||
def train_model(ratings): | ||
reader = Reader(rating_scale=(1, 5)) | ||
data = Dataset.load_from_df(ratings[['userId_x', 'movieId', 'rating']], reader) | ||
model = SVD() | ||
model.fit(data.build_full_trainset()) | ||
return model | ||
|
||
# Get recommendations | ||
def get_recommendations(model, df, user_ratings, n=5, genre=None): | ||
new_user_id = df['userId_x'].max() + 1 | ||
movies_to_predict = df[~df['movieId'].isin([x[0] for x in user_ratings])]['movieId'].unique() | ||
|
||
predictions = [] | ||
for movie_id in movies_to_predict: | ||
predicted_rating = model.predict(new_user_id, movie_id).est | ||
predictions.append((movie_id, predicted_rating)) | ||
|
||
recommendations = sorted(predictions, key=lambda x: x[1], reverse=True) | ||
|
||
if genre: | ||
genre_recommendations = [ | ||
(movie_id, rating) for movie_id, rating in recommendations | ||
if genre.lower() in df[df['movieId'] == movie_id]['genres'].iloc[0].lower() | ||
] | ||
return genre_recommendations[:n] | ||
else: | ||
return recommendations[:n] | ||
|
||
# Fetch movie poster | ||
@st.cache_data | ||
def fetch_poster(movie_id): | ||
url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key=your_api_key" | ||
response = requests.get(url) | ||
data = response.json() | ||
return "https://image.tmdb.org/t/p/w500/" + data.get('poster_path', '') | ||
|
||
# Main app | ||
def main(): | ||
st.title("🎬 Streamflix: Hybrid Movie Recommendation System") | ||
|
||
# Load data | ||
df, ratings = load_data() | ||
model = train_model(ratings) | ||
|
||
# Sidebar | ||
st.sidebar.title('Navigation') | ||
page = st.sidebar.radio('Go to', ['Home', 'Get Recommendations', 'Search Movie']) | ||
|
||
if page == 'Home': | ||
st.header('🔥 Top Trending Movies') | ||
top_movies = df.sort_values('popularity', ascending=False).head(10) | ||
|
||
for _, movie in top_movies.iterrows(): | ||
col1, col2 = st.columns([1, 3]) | ||
with col1: | ||
poster_url = fetch_poster(movie['id']) | ||
st.image(poster_url, width=150) | ||
with col2: | ||
st.subheader(movie['title']) | ||
st.write(f"Genres: {movie['genres']}") | ||
st.write(f"Average Rating: {movie['vote_average']:.1f}/10") | ||
if st.button(f"Rate {movie['title']}", key=f"rate_{movie['id']}"): | ||
rating = st.slider('Your rating', 0.5, 5.0, 3.0, 0.5, key=f"slider_{movie['id']}") | ||
st.write(f"You rated {movie['title']} {rating} stars!") | ||
st.write(''---'') | ||
|
||
elif page == 'Get Recommendations': | ||
st.header('🎯 Get Personalized Recommendations') | ||
user_id = st.number_input('Please enter your user ID', min_value=1, step=1) | ||
genres = st.multiselect('Select genres', df['genres'].explode().unique()) | ||
|
||
if st.button('Get Recommendations'): | ||
recommendations = get_recommendations(user_id, model, df, ratings) | ||
if genres: | ||
recommendations = recommendations[recommendations['genres'].apply(lambda x: any(genre in x for genre in genres))] | ||
|
||
st.subheader('Your Recommended Movies:') | ||
for _, movie in recommendations.iterrows(): | ||
col1, col2 = st.columns([1, 3]) | ||
with col1: | ||
poster_url = fetch_poster(movie['id']) | ||
st.image(poster_url, width=150) | ||
with col2: | ||
st.write(f'**{movie['title']}**') | ||
st.write(f'Genres: {movie['genres']}') | ||
st.write(f'Average Rating: {movie['vote_average']:.1f}/10') | ||
if st.button(f'Watch Trailer for {movie['title']}', key=f'trailer_{movie['id']}'): | ||
# You would need to implement a function to fetch and display the trailer | ||
st.video('https://www.youtube.com/watch?v=dQw4w9WgXcQ') # Placeholder | ||
st.write('---') | ||
|
||
elif page == 'Search Movies': | ||
st.header('🔍 Search Movies') | ||
search_term = st.text_input('Enter a movie title') | ||
if search_term: | ||
results = df[df['title'].str.contains(search_term, case=False)] | ||
for _, movie in results.iterrows(): | ||
col1, col2 = st.columns([1, 3]) | ||
with col1: | ||
poster_url = fetch_poster(movie['id']) | ||
st.image(poster_url, width=150) | ||
with col2: | ||
st.subheader(movie['title']) | ||
st.write(f'Genres: {movie['genres']}') | ||
st.write(f'Average Rating: {movie['vote_average']:.1f}/10') | ||
st.write(f'Overview: {movie['overview'][:200]}...') | ||
st.write('---') | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,6 +47,9 @@ TF-IDF vectorization and cosine similarity are then encapsulated within a `Conte | |
The `HybridModel` class integrates collaborative filtering and content-based filtering using a weighted average to provide hybrid recommendations with user ratings guiding the process. After training both models, the system generates and prints a list of recommended movies based on user input. The hybrid model with a collab weight of 0.5 produces an RMSE of `1.25`. | ||
Different collaborative filtering weights are tested to determine their impact on the hybrid model's performance. starting from 0.2 up to 0.8, the RMSE values decrease indicating improved prediction accuracy. With a weight of `0.2` the RMSE is `1.2559` and with a weight of `0.4` it slightly improves to `1.2523`. The RMSE significantly drops to `1.1263` with a weight of `0.6` and further decreases to `1.1221` with a weight of `0.8`. This suggests that higher collaborative filtering weight tends to enhance the model's accuracy leading to lower prediction errors and helps in identifying the most effective balance between collaborative and content-based filtering. | ||
|
||
### Deployment | ||
Streamlit is a Python library used to create web applications for data science projects. In this case, streamlit is used to build an interactive movie recommendation system. The app uses a collaborative filtering model based on the `SVD` (singular value decomposition) algorithm from the Surprise library trained on user-movie ratings. Key features include displaying trending movies, personalized recommendations, movie search functionality and genre-based browsing. The app fetches movie posters from a TMDB API and attempts to show trailers using a YouTube API. The model demonstrates integration of machine learning models with a user-friendly interface including features like rating movies, receiving personalized recommendations and exploring movies by genre. | ||
|
||
### Conclusion | ||
The collaborative filtering model with an RMSE of 0.86 outperforms the hybrid model, which shows higher RMSE, suggesting that emphasizing collaborative filtering in a hybrid approach yields better accuracy and recommendation quality. | ||
|
||
|
@@ -59,4 +62,52 @@ The collaborative filtering model with an RMSE of 0.86 outperforms the hybrid mo | |
|
||
4. **Explore Advanced Techniques**: Integrate deep learning-based models and other advanced methods to further enhance the system's capabilities and address remaining limitations. | ||
|
||
5. **Incorporate User Feedback and Regular Updates**: Integrate user feedback and conduct real-world testing to validate the model's effectiveness ensuring it meets user preferences and expectations. Regularly update the recommendation system with new data and metrics to adapt to evolving user preferences and content. | ||
5. **Incorporate User Feedback and Regular Updates**: Integrate user feedback and conduct real-world testing to validate the model's effectiveness ensuring it meets user preferences and expectations. Regularly update the recommendation system with new data and metrics to adapt to evolving user preferences and content. | ||
|
||
### Installation and Setup | ||
|
||
**Clone the repository** | ||
|
||
**https:** | ||
``` | ||
git clone https://github.com/Eva-Claire/streamflix_recommender_system.git | ||
``` | ||
**ssh:** | ||
``` | ||
[email protected]:Eva-Claire/streamflix_recommender_system.git | ||
``` | ||
**Navigate to the project directory** | ||
|
||
``` | ||
cd streamflix_recommender_system.git | ||
``` | ||
**Install dependencies** | ||
``` | ||
pip install -r requirements.txt | ||
``` | ||
**Execute the app on Streamlit** | ||
``` | ||
streamlit run app.py | ||
``` | ||
|
||
### Repository Setup | ||
|
||
``` | ||
streamflix_movie_recommendation_system/ | ||
│ | ||
├── Cover Page/ | ||
│ ├── Evaclaire M. | ||
│ └── Simon M. | ||
│ | ||
└── Project Files/ | ||
├── .ipynb_checkpoints/ | ||
├── modelling_data/ | ||
├── movies_data/ | ||
├── project_images/ | ||
├── .DS_Store | ||
├── README.md | ||
├── Streamflix_data_report.docx | ||
├── app.py | ||
├── logo.png | ||
└── movie_recommendor.ipynb | ||
``` |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
streamlit==1.37.0 | ||
pandas==2.2.2 | ||
numpy==1.26.4 | ||
requests==2.32.3 | ||
google-api-python-client==2.138.0 | ||
scikit-learn==1.5.1 | ||
scipy==1.13.1 | ||
scikit-surprise==1.1.1 | ||
google-auth==2.32.0 | ||
google-auth-httplib2==0.2.0 | ||
google-auth-oauthlib==0.7.2 | ||
httplib2==0.22.0 | ||
python-dotenv==1.0.0 | ||
|
Binary file not shown.