Skip to content

Commit

Permalink
Merge pull request Eva-Claire#3 from Eva-Claire/claire_branch
Browse files Browse the repository at this point in the history
Claire branch
  • Loading branch information
Eva-Claire authored Jul 30, 2024
2 parents ac5bda4 + 3f8c211 commit 2261c37
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 1 deletion.
Binary file modified .DS_Store
Binary file not shown.
129 changes: 129 additions & 0 deletions .ipynb_checkpoints/app-checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import streamlit as st
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
import requests
import pickle

# Set page config
st.set_page_config(page_title='STREAMFLIX', page_icon="🎬", layout='wide')

# Load your data
@st.cache_data
def load_data():
df = pd.read_csv('movies_data/movies.csv')
ratings = pd.read_csv('movies_data/ratings.csv')
return df, ratings

# Train your model
@st.cache_resource
def train_model(ratings):
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId_x', 'movieId', 'rating']], reader)
model = SVD()
model.fit(data.build_full_trainset())
return model

# Get recommendations
def get_recommendations(model, df, user_ratings, n=5, genre=None):
new_user_id = df['userId_x'].max() + 1
movies_to_predict = df[~df['movieId'].isin([x[0] for x in user_ratings])]['movieId'].unique()

predictions = []
for movie_id in movies_to_predict:
predicted_rating = model.predict(new_user_id, movie_id).est
predictions.append((movie_id, predicted_rating))

recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)

if genre:
genre_recommendations = [
(movie_id, rating) for movie_id, rating in recommendations
if genre.lower() in df[df['movieId'] == movie_id]['genres'].iloc[0].lower()
]
return genre_recommendations[:n]
else:
return recommendations[:n]

# Fetch movie poster
@st.cache_data
def fetch_poster(movie_id):
url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key=your_api_key"
response = requests.get(url)
data = response.json()
return "https://image.tmdb.org/t/p/w500/" + data.get('poster_path', '')

# Main app
def main():
st.title("🎬 Streamflix: Hybrid Movie Recommendation System")

# Load data
df, ratings = load_data()
model = train_model(ratings)

# Sidebar
st.sidebar.title('Navigation')
page = st.sidebar.radio('Go to', ['Home', 'Get Recommendations', 'Search Movie'])

if page == 'Home':
st.header('🔥 Top Trending Movies')
top_movies = df.sort_values('popularity', ascending=False).head(10)

for _, movie in top_movies.iterrows():
col1, col2 = st.columns([1, 3])
with col1:
poster_url = fetch_poster(movie['id'])
st.image(poster_url, width=150)
with col2:
st.subheader(movie['title'])
st.write(f"Genres: {movie['genres']}")
st.write(f"Average Rating: {movie['vote_average']:.1f}/10")
if st.button(f"Rate {movie['title']}", key=f"rate_{movie['id']}"):
rating = st.slider('Your rating', 0.5, 5.0, 3.0, 0.5, key=f"slider_{movie['id']}")
st.write(f"You rated {movie['title']} {rating} stars!")
st.write(''---'')

elif page == 'Get Recommendations':
st.header('🎯 Get Personalized Recommendations')
user_id = st.number_input('Please enter your user ID', min_value=1, step=1)
genres = st.multiselect('Select genres', df['genres'].explode().unique())

if st.button('Get Recommendations'):
recommendations = get_recommendations(user_id, model, df, ratings)
if genres:
recommendations = recommendations[recommendations['genres'].apply(lambda x: any(genre in x for genre in genres))]

st.subheader('Your Recommended Movies:')
for _, movie in recommendations.iterrows():
col1, col2 = st.columns([1, 3])
with col1:
poster_url = fetch_poster(movie['id'])
st.image(poster_url, width=150)
with col2:
st.write(f'**{movie['title']}**')
st.write(f'Genres: {movie['genres']}')
st.write(f'Average Rating: {movie['vote_average']:.1f}/10')
if st.button(f'Watch Trailer for {movie['title']}', key=f'trailer_{movie['id']}'):
# You would need to implement a function to fetch and display the trailer
st.video('https://www.youtube.com/watch?v=dQw4w9WgXcQ') # Placeholder
st.write('---')

elif page == 'Search Movies':
st.header('🔍 Search Movies')
search_term = st.text_input('Enter a movie title')
if search_term:
results = df[df['title'].str.contains(search_term, case=False)]
for _, movie in results.iterrows():
col1, col2 = st.columns([1, 3])
with col1:
poster_url = fetch_poster(movie['id'])
st.image(poster_url, width=150)
with col2:
st.subheader(movie['title'])
st.write(f'Genres: {movie['genres']}')
st.write(f'Average Rating: {movie['vote_average']:.1f}/10')
st.write(f'Overview: {movie['overview'][:200]}...')
st.write('---')

if __name__ == '__main__':
main()
53 changes: 52 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ TF-IDF vectorization and cosine similarity are then encapsulated within a `Conte
The `HybridModel` class integrates collaborative filtering and content-based filtering using a weighted average to provide hybrid recommendations with user ratings guiding the process. After training both models, the system generates and prints a list of recommended movies based on user input. The hybrid model with a collab weight of 0.5 produces an RMSE of `1.25`.
Different collaborative filtering weights are tested to determine their impact on the hybrid model's performance. starting from 0.2 up to 0.8, the RMSE values decrease indicating improved prediction accuracy. With a weight of `0.2` the RMSE is `1.2559` and with a weight of `0.4` it slightly improves to `1.2523`. The RMSE significantly drops to `1.1263` with a weight of `0.6` and further decreases to `1.1221` with a weight of `0.8`. This suggests that higher collaborative filtering weight tends to enhance the model's accuracy leading to lower prediction errors and helps in identifying the most effective balance between collaborative and content-based filtering.

### Deployment
Streamlit is a Python library used to create web applications for data science projects. In this case, streamlit is used to build an interactive movie recommendation system. The app uses a collaborative filtering model based on the `SVD` (singular value decomposition) algorithm from the Surprise library trained on user-movie ratings. Key features include displaying trending movies, personalized recommendations, movie search functionality and genre-based browsing. The app fetches movie posters from a TMDB API and attempts to show trailers using a YouTube API. The model demonstrates integration of machine learning models with a user-friendly interface including features like rating movies, receiving personalized recommendations and exploring movies by genre.

### Conclusion
The collaborative filtering model with an RMSE of 0.86 outperforms the hybrid model, which shows higher RMSE, suggesting that emphasizing collaborative filtering in a hybrid approach yields better accuracy and recommendation quality.

Expand All @@ -59,4 +62,52 @@ The collaborative filtering model with an RMSE of 0.86 outperforms the hybrid mo

4. **Explore Advanced Techniques**: Integrate deep learning-based models and other advanced methods to further enhance the system's capabilities and address remaining limitations.

5. **Incorporate User Feedback and Regular Updates**: Integrate user feedback and conduct real-world testing to validate the model's effectiveness ensuring it meets user preferences and expectations. Regularly update the recommendation system with new data and metrics to adapt to evolving user preferences and content.
5. **Incorporate User Feedback and Regular Updates**: Integrate user feedback and conduct real-world testing to validate the model's effectiveness ensuring it meets user preferences and expectations. Regularly update the recommendation system with new data and metrics to adapt to evolving user preferences and content.

### Installation and Setup

**Clone the repository**

**https:**
```
git clone https://github.com/Eva-Claire/streamflix_recommender_system.git
```
**ssh:**
```
[email protected]:Eva-Claire/streamflix_recommender_system.git
```
**Navigate to the project directory**

```
cd streamflix_recommender_system.git
```
**Install dependencies**
```
pip install -r requirements.txt
```
**Execute the app on Streamlit**
```
streamlit run app.py
```

### Repository Setup

```
streamflix_movie_recommendation_system/
├── Cover Page/
│ ├── Evaclaire M.
│ └── Simon M.
└── Project Files/
├── .ipynb_checkpoints/
├── modelling_data/
├── movies_data/
├── project_images/
├── .DS_Store
├── README.md
├── Streamflix_data_report.docx
├── app.py
├── logo.png
└── movie_recommendor.ipynb
```
Binary file added Recommender_system.pdf
Binary file not shown.
Binary file added Recommender_system.pptx
Binary file not shown.
Binary file added pickle_files/collaborative_model1.pkl
Binary file not shown.
14 changes: 14 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
streamlit==1.37.0
pandas==2.2.2
numpy==1.26.4
requests==2.32.3
google-api-python-client==2.138.0
scikit-learn==1.5.1
scipy==1.13.1
scikit-surprise==1.1.1
google-auth==2.32.0
google-auth-httplib2==0.2.0
google-auth-oauthlib==0.7.2
httplib2==0.22.0
python-dotenv==1.0.0

Binary file added ~$reamflix_data_report.docx
Binary file not shown.

0 comments on commit 2261c37

Please sign in to comment.