diff --git a/README.md b/README.md index 7e4f2a1..592fc81 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # STREAMFLIX MOVIE RECOMMENDATION SYSTEM + + ### Authors - Group 10 Evaclaire Wamitu - [Email](evamunyika@gmail.com) Simon Makumi - [Email](simonmakumi5@gmail.com) diff --git a/movie_recommendor.ipynb b/movie_recommendor.ipynb index dcb134d..f33c429 100644 --- a/movie_recommendor.ipynb +++ b/movie_recommendor.ipynb @@ -2330,7 +2330,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 19, "id": "4aa1079f-b0ea-4f89-aa54-efb6254c7753", "metadata": {}, "outputs": [ @@ -2338,8 +2338,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "RMSE: 1.4195\n", - "Baseline Model RMSE: 1.4194885572066\n" + "RMSE: 1.4211\n", + "Baseline Model RMSE: 1.4210703762899288\n" ] } ], @@ -2399,7 +2399,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "aae5b17e-07a1-47e1-9c93-afe9fbff1fea", "metadata": {}, "outputs": [ @@ -2409,7 +2409,7 @@ "text": [ "Tuning SVD...\n", "Best SVD parameters: {'n_factors': 100, 'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}\n", - "Best SVD RMSE: 0.8631235009686375\n", + "Best SVD RMSE: 0.8628644889721498\n", "Tuning KNN...\n", "Computing the pearson similarity matrix...\n", "Done computing similarity matrix.\n", @@ -2658,7 +2658,88 @@ "Computing the msd similarity matrix...\n", "Done computing similarity matrix.\n", "Computing the msd similarity matrix...\n", - "Done computing similarity matrix.\n" + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the pearson similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the cosine similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Computing the msd similarity matrix...\n", + "Done computing similarity matrix.\n", + "Best KNN parameters: {'k': 30, 'min_k': 5, 'sim_options': {'name': 'msd', 'user_based': False}}\n", + "Best KNN RMSE: 0.918235381325116\n", + "\n", + "Best model: SVD\n", + "\n", + "Best RMSE: 0.8628644889721498\n" ] } ], @@ -2715,18 +2796,10 @@ " print(f\"\\nBest model: {best_model_name}\")\n", " print(f\"\\nBest RMSE: {best_score}\")\n", " \n", - " # Cross-validate the best model\n", - " print(f\"\\nPerforming cross-validation for the best model: {best_model_name}...\")\n", - " best_model = best_model_class(**best_model_params)\n", - " cv_results = cross_validate(best_model, data, measures=['rmse'], cv=3)\n", - " \n", - " print(f\"\\nCross-validation results for {best_model_name}:\")\n", - " print(f\"Mean RMSE: {cv_results['test_rmse'].mean()}\")\n", - " \n", - " return best_model, best_score\n", + " return best_model_name, best_score\n", "\n", "# Instantiate\n", - "best_model, best_score = grid_search_models(data)" + "best_model_name, best_score = grid_search_models(data)" ] }, { @@ -2754,7 +2827,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Step 8: Create functions for the recommendation system\n", + "# Functions for the recommendation system\n", "def get_user_ratings(df, num_movies=5):\n", " user_ratings = []\n", " for _ in range(num_movies):\n", @@ -2823,10 +2896,6 @@ "from sklearn.metrics import mean_squared_error, mean_absolute_error\n", "import re\n", "\n", - "# Load the data\n", - "df = pd.read_csv('ml-latest-small/movies.csv')\n", - "ratings = pd.read_csv('ml-latest-small/ratings.csv')\n", - "\n", "# Preprocess the data\n", "df['clean_title'] = df['title'].apply(lambda x: re.sub(\"[^a-zA-Z0-9 ]\", \"\", x))\n", "df['features'] = df['clean_title'] + ' ' + df['genres']\n", @@ -2849,8 +2918,6 @@ "\n", "# Collaborative Filtering\n", "def collaborative_filtering(ratings):\n", - " reader = Reader(rating_scale=(0.5, 5))\n", - " data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)\n", " trainset, testset = train_test_split(data, test_size=0.2, random_state=42)\n", " \n", " algo = SVD()\n", @@ -2877,8 +2944,7 @@ "# Evaluation metrics\n", "def evaluate_recommendations(predictions, actual):\n", " rmse = np.sqrt(mean_squared_error(actual, predictions))\n", - " mae = mean_absolute_error(actual, predictions)\n", - " return rmse, mae\n", + " return rmse\n", "\n", "# Main execution\n", "if __name__ == \"__main__\":\n", @@ -2895,9 +2961,7 @@ " print(\"-----------------------\")\n", " algo, cf_predictions = collaborative_filtering(ratings)\n", " cf_rmse = accuracy.rmse(cf_predictions)\n", - " cf_mae = accuracy.mae(cf_predictions)\n", " print(f\"RMSE: {cf_rmse:.4f}\")\n", - " print(f\"MAE: {cf_mae:.4f}\")\n", " print()\n", "\n", " print(\"Hybrid Filtering\")\n", diff --git a/streamflix_recommendation_system.pptx b/streamflix_recommendation_system.pptx index 0277e33..f00d76b 100644 Binary files a/streamflix_recommendation_system.pptx and b/streamflix_recommendation_system.pptx differ