Merge pull request Eva-Claire#3 from Eva-Claire/claire_branch

Claire branch
AtomHarris · Jul 30, 2024 · 2261c37 · 2261c37
2 parents ac5bda4 + 3f8c211
commit 2261c37
Show file tree

Hide file tree

Showing 8 changed files with 195 additions and 1 deletion.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.ipynb_checkpoints/app-checkpoint.py b/.ipynb_checkpoints/app-checkpoint.py
@@ -0,0 +1,129 @@
+import streamlit as st
+import pandas as pd
+import numpy as np
+from surprise import Dataset, Reader, SVD
+import requests
+import pickle
+
+# Set page config
+st.set_page_config(page_title='STREAMFLIX', page_icon="🎬", layout='wide')
+
+# Load your data
+@st.cache_data
+def load_data():
+    df = pd.read_csv('movies_data/movies.csv')
+    ratings = pd.read_csv('movies_data/ratings.csv')
+    return df, ratings
+
+# Train your model
+@st.cache_resource
+def train_model(ratings):
+    reader = Reader(rating_scale=(1, 5))
+    data = Dataset.load_from_df(ratings[['userId_x', 'movieId', 'rating']], reader)
+    model = SVD()
+    model.fit(data.build_full_trainset())
+    return model
+
+# Get recommendations
+def get_recommendations(model, df, user_ratings, n=5, genre=None):
+    new_user_id = df['userId_x'].max() + 1
+    movies_to_predict = df[~df['movieId'].isin([x[0] for x in user_ratings])]['movieId'].unique()
+
+    predictions = []
+    for movie_id in movies_to_predict:
+        predicted_rating = model.predict(new_user_id, movie_id).est
+        predictions.append((movie_id, predicted_rating))
+
+    recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)
+
+    if genre:
+        genre_recommendations = [
+            (movie_id, rating) for movie_id, rating in recommendations
+            if genre.lower() in df[df['movieId'] == movie_id]['genres'].iloc[0].lower()
+        ]
+        return genre_recommendations[:n]
+    else:
+        return recommendations[:n]
+
+# Fetch movie poster
+@st.cache_data
+def fetch_poster(movie_id):
+    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key=your_api_key"
+    response = requests.get(url)
+    data = response.json()
+    return "https://image.tmdb.org/t/p/w500/" + data.get('poster_path', '')
+
+# Main app
+def main():
+    st.title("🎬 Streamflix: Hybrid Movie Recommendation System")
+
+    # Load data
+    df, ratings = load_data()
+    model = train_model(ratings)
+
+    # Sidebar
+    st.sidebar.title('Navigation')
+    page = st.sidebar.radio('Go to', ['Home', 'Get Recommendations', 'Search Movie'])
+
+    if page == 'Home':
+        st.header('🔥 Top Trending Movies')
+        top_movies = df.sort_values('popularity', ascending=False).head(10)
+
+        for _, movie in top_movies.iterrows():
+            col1, col2 = st.columns([1, 3])
+            with col1:
+                poster_url = fetch_poster(movie['id'])
+                st.image(poster_url, width=150)
+            with col2:
+                st.subheader(movie['title'])
+                st.write(f"Genres: {movie['genres']}")
+                st.write(f"Average Rating: {movie['vote_average']:.1f}/10")
+                if st.button(f"Rate {movie['title']}", key=f"rate_{movie['id']}"):
+                    rating = st.slider('Your rating', 0.5, 5.0, 3.0, 0.5, key=f"slider_{movie['id']}")
+                    st.write(f"You rated {movie['title']} {rating} stars!")
+            st.write(''---'')
+
+    elif page == 'Get Recommendations':
+        st.header('🎯 Get Personalized Recommendations')
+        user_id = st.number_input('Please enter your user ID', min_value=1, step=1)
+        genres = st.multiselect('Select genres', df['genres'].explode().unique())
+
+        if st.button('Get Recommendations'):
+            recommendations = get_recommendations(user_id, model, df, ratings)
+            if genres:
+                recommendations = recommendations[recommendations['genres'].apply(lambda x: any(genre in x for genre in genres))]
+
+            st.subheader('Your Recommended Movies:')
+            for _, movie in recommendations.iterrows():
+                col1, col2 = st.columns([1, 3])
+                with col1:
+                    poster_url = fetch_poster(movie['id'])
+                    st.image(poster_url, width=150)
+                with col2:
+                    st.write(f'**{movie['title']}**')
+                    st.write(f'Genres: {movie['genres']}')
+                    st.write(f'Average Rating: {movie['vote_average']:.1f}/10')
+                    if st.button(f'Watch Trailer for {movie['title']}', key=f'trailer_{movie['id']}'):
+                        # You would need to implement a function to fetch and display the trailer
+                        st.video('https://www.youtube.com/watch?v=dQw4w9WgXcQ')  # Placeholder
+                st.write('---')
+
+    elif page == 'Search Movies':
+        st.header('🔍 Search Movies')
+        search_term = st.text_input('Enter a movie title')
+        if search_term:
+            results = df[df['title'].str.contains(search_term, case=False)]
+            for _, movie in results.iterrows():
+                col1, col2 = st.columns([1, 3])
+                with col1:
+                    poster_url = fetch_poster(movie['id'])
+                    st.image(poster_url, width=150)
+                with col2:
+                    st.subheader(movie['title'])
+                    st.write(f'Genres: {movie['genres']}')
+                    st.write(f'Average Rating: {movie['vote_average']:.1f}/10')
+                    st.write(f'Overview: {movie['overview'][:200]}...')
+                st.write('---')
+
+if __name__ == '__main__':
+    main()
diff --git a/README.md b/README.md
@@ -47,6 +47,9 @@ TF-IDF vectorization and cosine similarity are then encapsulated within a `Conte
 The `HybridModel` class integrates collaborative filtering and content-based filtering using a weighted average to provide hybrid recommendations with user ratings guiding the process. After training both models, the system generates and prints a list of recommended movies based on user input. The hybrid model with a collab weight of 0.5 produces an RMSE of `1.25`.
 Different collaborative filtering weights are tested to determine their impact on the hybrid model's performance. starting from 0.2 up to 0.8, the RMSE values decrease indicating improved prediction accuracy. With a weight of `0.2` the RMSE is `1.2559` and with a weight of `0.4` it slightly improves to `1.2523`. The RMSE significantly drops to `1.1263` with a weight of `0.6` and further decreases to `1.1221` with a weight of `0.8`. This suggests that higher collaborative filtering weight tends to enhance the model's accuracy leading to lower prediction errors and helps in identifying the most effective balance between collaborative and content-based filtering.
 
+### Deployment
+Streamlit is a Python library used to create web applications for data science projects. In this case, streamlit is used to build an interactive movie recommendation system. The app uses a collaborative filtering model based on the `SVD` (singular value decomposition) algorithm from the Surprise library trained on user-movie ratings. Key features include displaying trending movies, personalized recommendations, movie search functionality and genre-based browsing. The app fetches movie posters from a TMDB API and attempts to show trailers using a YouTube API. The model demonstrates integration of machine learning models with a user-friendly interface including features like rating movies, receiving personalized recommendations and exploring movies by genre. 
+
 ### Conclusion
 The collaborative filtering model with an RMSE of 0.86 outperforms the hybrid model, which shows higher RMSE, suggesting that emphasizing collaborative filtering in a hybrid approach yields better accuracy and recommendation quality.
 
@@ -59,4 +62,52 @@ The collaborative filtering model with an RMSE of 0.86 outperforms the hybrid mo
 
 4. **Explore Advanced Techniques**: Integrate deep learning-based models and other advanced methods to further enhance the system's capabilities and address remaining limitations.
 
-5. **Incorporate User Feedback and Regular Updates**: Integrate user feedback and conduct real-world testing to validate the model's effectiveness ensuring it meets user preferences and expectations. Regularly update the recommendation system with new data and metrics to adapt to evolving user preferences and content.
+5. **Incorporate User Feedback and Regular Updates**: Integrate user feedback and conduct real-world testing to validate the model's effectiveness ensuring it meets user preferences and expectations. Regularly update the recommendation system with new data and metrics to adapt to evolving user preferences and content.
+
+### Installation and Setup
+
+**Clone the repository**
+
+**https:**
+```
+git clone https://github.com/Eva-Claire/streamflix_recommender_system.git
+```
+**ssh:**
+```
+[email protected]:Eva-Claire/streamflix_recommender_system.git
+```
+**Navigate to the project directory**
+
+```
+cd streamflix_recommender_system.git
+```
+**Install dependencies**
+```
+pip install -r requirements.txt
+```
+**Execute the app on Streamlit**
+```
+streamlit run app.py
+```
+
+### Repository Setup
+
+```
+streamflix_movie_recommendation_system/
+│
+├── Cover Page/
+│   ├── Evaclaire M.
+│   └── Simon M.
+│
+└── Project Files/
+    ├── .ipynb_checkpoints/
+    ├── modelling_data/
+    ├── movies_data/
+    ├── project_images/
+    ├── .DS_Store
+    ├── README.md
+    ├── Streamflix_data_report.docx
+    ├── app.py
+    ├── logo.png
+    └── movie_recommendor.ipynb
+```
diff --git a/Recommender_system.pdf b/Recommender_system.pdf
diff --git a/Recommender_system.pptx b/Recommender_system.pptx
diff --git a/pickle_files/collaborative_model1.pkl b/pickle_files/collaborative_model1.pkl
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,14 @@
+streamlit==1.37.0
+pandas==2.2.2
+numpy==1.26.4
+requests==2.32.3
+google-api-python-client==2.138.0
+scikit-learn==1.5.1
+scipy==1.13.1
+scikit-surprise==1.1.1
+google-auth==2.32.0
+google-auth-httplib2==0.2.0
+google-auth-oauthlib==0.7.2
+httplib2==0.22.0
+python-dotenv==1.0.0
+
diff --git a/~$reamflix_data_report.docx b/~$reamflix_data_report.docx