Skip to content

Commit

Permalink
Merge pull request Eva-Claire#12 from Eva-Claire/claire_branch
Browse files Browse the repository at this point in the history
Claire branch
  • Loading branch information
Eva-Claire authored Jul 27, 2024
2 parents e3ce50f + bfb8d96 commit b7fa82a
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 27 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# STREAMFLIX MOVIE RECOMMENDATION SYSTEM



### Authors - Group 10
Evaclaire Wamitu - [Email]([email protected])
Simon Makumi - [Email]([email protected])
Expand Down
118 changes: 91 additions & 27 deletions movie_recommendor.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2330,16 +2330,16 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 19,
"id": "4aa1079f-b0ea-4f89-aa54-efb6254c7753",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE: 1.4195\n",
"Baseline Model RMSE: 1.4194885572066\n"
"RMSE: 1.4211\n",
"Baseline Model RMSE: 1.4210703762899288\n"
]
}
],
Expand Down Expand Up @@ -2399,7 +2399,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"id": "aae5b17e-07a1-47e1-9c93-afe9fbff1fea",
"metadata": {},
"outputs": [
Expand All @@ -2409,7 +2409,7 @@
"text": [
"Tuning SVD...\n",
"Best SVD parameters: {'n_factors': 100, 'n_epochs': 30, 'lr_all': 0.01, 'reg_all': 0.1}\n",
"Best SVD RMSE: 0.8631235009686375\n",
"Best SVD RMSE: 0.8628644889721498\n",
"Tuning KNN...\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
Expand Down Expand Up @@ -2658,7 +2658,88 @@
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n"
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the pearson similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the cosine similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Computing the msd similarity matrix...\n",
"Done computing similarity matrix.\n",
"Best KNN parameters: {'k': 30, 'min_k': 5, 'sim_options': {'name': 'msd', 'user_based': False}}\n",
"Best KNN RMSE: 0.918235381325116\n",
"\n",
"Best model: SVD\n",
"\n",
"Best RMSE: 0.8628644889721498\n"
]
}
],
Expand Down Expand Up @@ -2715,18 +2796,10 @@
" print(f\"\\nBest model: {best_model_name}\")\n",
" print(f\"\\nBest RMSE: {best_score}\")\n",
" \n",
" # Cross-validate the best model\n",
" print(f\"\\nPerforming cross-validation for the best model: {best_model_name}...\")\n",
" best_model = best_model_class(**best_model_params)\n",
" cv_results = cross_validate(best_model, data, measures=['rmse'], cv=3)\n",
" \n",
" print(f\"\\nCross-validation results for {best_model_name}:\")\n",
" print(f\"Mean RMSE: {cv_results['test_rmse'].mean()}\")\n",
" \n",
" return best_model, best_score\n",
" return best_model_name, best_score\n",
"\n",
"# Instantiate\n",
"best_model, best_score = grid_search_models(data)"
"best_model_name, best_score = grid_search_models(data)"
]
},
{
Expand Down Expand Up @@ -2754,7 +2827,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Step 8: Create functions for the recommendation system\n",
"# Functions for the recommendation system\n",
"def get_user_ratings(df, num_movies=5):\n",
" user_ratings = []\n",
" for _ in range(num_movies):\n",
Expand Down Expand Up @@ -2823,10 +2896,6 @@
"from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
"import re\n",
"\n",
"# Load the data\n",
"df = pd.read_csv('ml-latest-small/movies.csv')\n",
"ratings = pd.read_csv('ml-latest-small/ratings.csv')\n",
"\n",
"# Preprocess the data\n",
"df['clean_title'] = df['title'].apply(lambda x: re.sub(\"[^a-zA-Z0-9 ]\", \"\", x))\n",
"df['features'] = df['clean_title'] + ' ' + df['genres']\n",
Expand All @@ -2849,8 +2918,6 @@
"\n",
"# Collaborative Filtering\n",
"def collaborative_filtering(ratings):\n",
" reader = Reader(rating_scale=(0.5, 5))\n",
" data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)\n",
" trainset, testset = train_test_split(data, test_size=0.2, random_state=42)\n",
" \n",
" algo = SVD()\n",
Expand All @@ -2877,8 +2944,7 @@
"# Evaluation metrics\n",
"def evaluate_recommendations(predictions, actual):\n",
" rmse = np.sqrt(mean_squared_error(actual, predictions))\n",
" mae = mean_absolute_error(actual, predictions)\n",
" return rmse, mae\n",
" return rmse\n",
"\n",
"# Main execution\n",
"if __name__ == \"__main__\":\n",
Expand All @@ -2895,9 +2961,7 @@
" print(\"-----------------------\")\n",
" algo, cf_predictions = collaborative_filtering(ratings)\n",
" cf_rmse = accuracy.rmse(cf_predictions)\n",
" cf_mae = accuracy.mae(cf_predictions)\n",
" print(f\"RMSE: {cf_rmse:.4f}\")\n",
" print(f\"MAE: {cf_mae:.4f}\")\n",
" print()\n",
"\n",
" print(\"Hybrid Filtering\")\n",
Expand Down
Binary file modified streamflix_recommendation_system.pptx
Binary file not shown.

0 comments on commit b7fa82a

Please sign in to comment.