diff --git a/machine-learning/movie-recommendation.py b/machine-learning/movie-recommendation.py new file mode 100644 index 00000000..e75e99e9 --- /dev/null +++ b/machine-learning/movie-recommendation.py @@ -0,0 +1,55 @@ +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error +from scipy.sparse.linalg import svds + +# Load movie data +movies_df = pd.read_csv('movies.csv', usecols=['movieId', 'title'], dtype={'movieId': 'int32', 'title': 'str'}) +ratings_df = pd.read_csv('ratings.csv', usecols=['userId', 'movieId', 'rating'], dtype={'userId': 'int32', 'movieId': 'int32', 'rating': 'float32'}) + +# Preprocessing +# Create a user-movie matrix +user_movie_df = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0) + +# Normalize the data by subtracting the mean user rating +mean_user_rating = user_movie_df.mean(axis=1) +ratings_demeaned = user_movie_df.sub(mean_user_rating, axis=0) + +# Singular Value Decomposition +U, sigma, Vt = svds(ratings_demeaned, k=50) +sigma = np.diag(sigma) + +# Making Predictions +all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + mean_user_rating.values.reshape(-1, 1) +preds_df = pd.DataFrame(all_user_predicted_ratings, columns=user_movie_df.columns) + +# Recommend Movies +def recommend_movies(predictions_df, userID, movies_df, original_ratings_df, num_recommendations=5): + user_row_number = userID - 1 + sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False) + + user_data = original_ratings_df[original_ratings_df.userId == userID] + user_full = (user_data.merge(movies_df, how='left', left_on='movieId', right_on='movieId'). + sort_values(['rating'], ascending=False) + ) + + recommendations = (movies_df[~movies_df['movieId'].isin(user_full['movieId'])]. + merge(pd.DataFrame(sorted_user_predictions).reset_index(), how='left', + left_on='movieId', + right_on='movieId'). + rename(columns={user_row_number: 'Predictions'}). + sort_values('Predictions', ascending=False). + iloc[:num_recommendations, :-1] + ) + + return user_full, recommendations + +# Test the recommendation system for a user +user_id = 1 +rated_movies, recommendations = recommend_movies(preds_df, user_id, movies_df, ratings_df, 10) + +print("User has already rated these movies:") +print(rated_movies.head(10)) +print("\nTop 10 movie recommendations:") +print(recommendations) diff --git a/machine-learning/movie-reviews.py b/machine-learning/movie-reviews.py new file mode 100644 index 00000000..4ddfac98 --- /dev/null +++ b/machine-learning/movie-reviews.py @@ -0,0 +1,67 @@ +import pandas as pd +import numpy as np +import re +import nltk +from nltk.corpus import stopwords +from sklearn.model_selection import train_test_split +from tensorflow.keras.preprocessing.text import Tokenizer +from tensorflow.keras.preprocessing.sequence import pad_sequences +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout +from tensorflow.keras.callbacks import EarlyStopping + +# Load dataset +reviews_df = pd.read_csv('movie_reviews.csv', encoding='utf-8') +reviews_df = reviews_df[['review', 'sentiment']] # Assuming 'review' and 'sentiment' columns + +# Text preprocessing +nltk.download('stopwords') +stop_words = set(stopwords.words('english')) + +def clean_text(text): + text = text.lower() + text = re.sub(r"[^a-z0-9\s]", '', text) + text = ' '.join([word for word in text.split() if word not in stop_words]) + return text + +reviews_df['review'] = reviews_df['review'].apply(clean_text) + +# Tokenization +tokenizer = Tokenizer() +tokenizer.fit_on_texts(reviews_df['review']) +sequences = tokenizer.texts_to_sequences(reviews_df['review']) + +# Padding sequences +max_len = max([len(x) for x in sequences]) +X = pad_sequences(sequences, maxlen=max_len) +y = pd.get_dummies(reviews_df['sentiment']).values + +# Train-test split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# Model creation +model = Sequential() +model.add(Embedding(len(tokenizer.word_index) + 1, 100, input_length=max_len)) +model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) +model.add(Dense(64, activation='relu')) +model.add(Dropout(0.5)) +model.add(Dense(y.shape[1], activation='softmax')) + +model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) + +# Model training +early_stopping = EarlyStopping(monitor='val_loss', patience=3) +model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.1, callbacks=[early_stopping]) + +# Evaluate the model +loss, accuracy = model.evaluate(X_test, y_test) +print(f'Test Accuracy: {accuracy:.2f}') + +# Predictions +def predict_sentiment(review): + cleaned_review = clean_text(review) + sequence = tokenizer.texts_to_sequences([cleaned_review]) + padded_sequence = pad_sequences(sequence, maxlen=max_len) + prediction = model.predict(padded_sequence) + sentiment = 'Positive' if np.argmax(prediction) == 1 else 'Negative' + return sentiment diff --git a/machine-learning/plot-summarizer.py b/machine-learning/plot-summarizer.py new file mode 100644 index 00000000..3be490b0 --- /dev/null +++ b/machine-learning/plot-summarizer.py @@ -0,0 +1,65 @@ +import torch +import logging +from transformers import BartTokenizer, BartForConditionalGeneration +import streamlit as st + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class MoviePlotSummarizer: + def __init__(self, model_name='facebook/bart-large-cnn'): + self.tokenizer = BartTokenizer.from_pretrained(model_name) + self.model = BartForConditionalGeneration.from_pretrained(model_name) + + # Summarize a movie plot + def summarize(self, plot_text, max_length=130, min_length=30, style='default'): + try: + # Adjusting the style of summarization based on input + if style == 'verbose': + max_length *= 2 + min_length *= 2 + elif style == 'concise': + max_length //= 2 + min_length //= 2 + + # Tokenize and generate summary + inputs = self.tokenizer.encode("summarize: " + plot_text, return_tensors="pt", max_length=1024, truncation=True) + summary_ids = self.model.generate(inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True) + return self.tokenizer.decode(summary_ids[0], skip_special_tokens=True) + except Exception as e: + logger.error(f"Error in summarizing plot: {e}") + return "Error in summarization process." + +# Streamlit UI +def main(): + st.title("Movie Plot Summarizer") + st.write("Enter a movie plot to get a summarized version.") + + plot_text = st.text_area("Movie Plot", height=250) + max_length = st.slider("Max Summary Length", 30, 300, 130) + min_length = st.slider("Min Summary Length", 10, 150, 30) + style = st.selectbox("Summarization Style", ['default', 'verbose', 'concise']) + + if st.button("Summarize"): + summarizer = MoviePlotSummarizer() + summary = summarizer.summarize(plot_text, max_length=max_length, min_length=min_length, style=style) + st.subheader("Summarized Plot") + st.write(summary) + + if st.button("About"): + st.subheader("About") + st.write("This is a simple movie plot summarizer built using the HuggingFace Transformers library. It uses the BART model to generate the summaries.") + st.write("The model was trained on the CNN/Daily Mail dataset, which contains news articles and their summaries. The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.") + +if __name__ == "__main__": + main() diff --git a/script.js b/script.js index d7827837..41062a16 100644 --- a/script.js +++ b/script.js @@ -1138,13 +1138,6 @@ document.getElementById('side-nav').addEventListener('mouseleave', function() { } }); -document.addEventListener('click', function () { - const sideNav = document.getElementById('side-nav'); - if (!sideNav.classList.contains('manual-toggle')) { - sideNav.style.left = '-250px'; - } -}) - function toggleNav() { const sideNav = document.getElementById('side-nav'); sideNav.classList.toggle('manual-toggle');