Updated backend logic

hoangsonww · Dec 19, 2023 · 1ba8100 · 1ba8100
1 parent dd30551
commit 1ba8100
Show file tree

Hide file tree

Showing 4 changed files with 187 additions and 7 deletions.
diff --git a/machine-learning/movie-recommendation.py b/machine-learning/movie-recommendation.py
@@ -0,0 +1,55 @@
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error
+from scipy.sparse.linalg import svds
+
+# Load movie data
+movies_df = pd.read_csv('movies.csv', usecols=['movieId', 'title'], dtype={'movieId': 'int32', 'title': 'str'})
+ratings_df = pd.read_csv('ratings.csv', usecols=['userId', 'movieId', 'rating'], dtype={'userId': 'int32', 'movieId': 'int32', 'rating': 'float32'})
+
+# Preprocessing
+# Create a user-movie matrix
+user_movie_df = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)
+
+# Normalize the data by subtracting the mean user rating
+mean_user_rating = user_movie_df.mean(axis=1)
+ratings_demeaned = user_movie_df.sub(mean_user_rating, axis=0)
+
+# Singular Value Decomposition
+U, sigma, Vt = svds(ratings_demeaned, k=50)
+sigma = np.diag(sigma)
+
+# Making Predictions
+all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + mean_user_rating.values.reshape(-1, 1)
+preds_df = pd.DataFrame(all_user_predicted_ratings, columns=user_movie_df.columns)
+
+# Recommend Movies
+def recommend_movies(predictions_df, userID, movies_df, original_ratings_df, num_recommendations=5):
+    user_row_number = userID - 1
+    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)
+
+    user_data = original_ratings_df[original_ratings_df.userId == userID]
+    user_full = (user_data.merge(movies_df, how='left', left_on='movieId', right_on='movieId').
+                 sort_values(['rating'], ascending=False)
+                 )
+
+    recommendations = (movies_df[~movies_df['movieId'].isin(user_full['movieId'])].
+                       merge(pd.DataFrame(sorted_user_predictions).reset_index(), how='left',
+                             left_on='movieId',
+                             right_on='movieId').
+                       rename(columns={user_row_number: 'Predictions'}).
+                       sort_values('Predictions', ascending=False).
+                       iloc[:num_recommendations, :-1]
+                       )
+
+    return user_full, recommendations
+
+# Test the recommendation system for a user
+user_id = 1
+rated_movies, recommendations = recommend_movies(preds_df, user_id, movies_df, ratings_df, 10)
+
+print("User has already rated these movies:")
+print(rated_movies.head(10))
+print("\nTop 10 movie recommendations:")
+print(recommendations)
diff --git a/machine-learning/movie-reviews.py b/machine-learning/movie-reviews.py
@@ -0,0 +1,67 @@
+import pandas as pd
+import numpy as np
+import re
+import nltk
+from nltk.corpus import stopwords
+from sklearn.model_selection import train_test_split
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
+from tensorflow.keras.callbacks import EarlyStopping
+
+# Load dataset
+reviews_df = pd.read_csv('movie_reviews.csv', encoding='utf-8')
+reviews_df = reviews_df[['review', 'sentiment']]  # Assuming 'review' and 'sentiment' columns
+
+# Text preprocessing
+nltk.download('stopwords')
+stop_words = set(stopwords.words('english'))
+
+def clean_text(text):
+    text = text.lower()
+    text = re.sub(r"[^a-z0-9\s]", '', text)
+    text = ' '.join([word for word in text.split() if word not in stop_words])
+    return text
+
+reviews_df['review'] = reviews_df['review'].apply(clean_text)
+
+# Tokenization
+tokenizer = Tokenizer()
+tokenizer.fit_on_texts(reviews_df['review'])
+sequences = tokenizer.texts_to_sequences(reviews_df['review'])
+
+# Padding sequences
+max_len = max([len(x) for x in sequences])
+X = pad_sequences(sequences, maxlen=max_len)
+y = pd.get_dummies(reviews_df['sentiment']).values
+
+# Train-test split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# Model creation
+model = Sequential()
+model.add(Embedding(len(tokenizer.word_index) + 1, 100, input_length=max_len))
+model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
+model.add(Dense(64, activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(y.shape[1], activation='softmax'))
+
+model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
+
+# Model training
+early_stopping = EarlyStopping(monitor='val_loss', patience=3)
+model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.1, callbacks=[early_stopping])
+
+# Evaluate the model
+loss, accuracy = model.evaluate(X_test, y_test)
+print(f'Test Accuracy: {accuracy:.2f}')
+
+# Predictions
+def predict_sentiment(review):
+    cleaned_review = clean_text(review)
+    sequence = tokenizer.texts_to_sequences([cleaned_review])
+    padded_sequence = pad_sequences(sequence, maxlen=max_len)
+    prediction = model.predict(padded_sequence)
+    sentiment = 'Positive' if np.argmax(prediction) == 1 else 'Negative'
+    return sentiment
diff --git a/machine-learning/plot-summarizer.py b/machine-learning/plot-summarizer.py
@@ -0,0 +1,65 @@
+import torch
+import logging
+from transformers import BartTokenizer, BartForConditionalGeneration
+import streamlit as st
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class MoviePlotSummarizer:
+    def __init__(self, model_name='facebook/bart-large-cnn'):
+        self.tokenizer = BartTokenizer.from_pretrained(model_name)
+        self.model = BartForConditionalGeneration.from_pretrained(model_name)
+
+    # Summarize a movie plot
+    def summarize(self, plot_text, max_length=130, min_length=30, style='default'):
+        try:
+            # Adjusting the style of summarization based on input
+            if style == 'verbose':
+                max_length *= 2
+                min_length *= 2
+            elif style == 'concise':
+                max_length //= 2
+                min_length //= 2
+
+            # Tokenize and generate summary
+            inputs = self.tokenizer.encode("summarize: " + plot_text, return_tensors="pt", max_length=1024, truncation=True)
+            summary_ids = self.model.generate(inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True)
+            return self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+        except Exception as e:
+            logger.error(f"Error in summarizing plot: {e}")
+            return "Error in summarization process."
+
+# Streamlit UI
+def main():
+    st.title("Movie Plot Summarizer")
+    st.write("Enter a movie plot to get a summarized version.")
+
+    plot_text = st.text_area("Movie Plot", height=250)
+    max_length = st.slider("Max Summary Length", 30, 300, 130)
+    min_length = st.slider("Min Summary Length", 10, 150, 30)
+    style = st.selectbox("Summarization Style", ['default', 'verbose', 'concise'])
+
+    if st.button("Summarize"):
+        summarizer = MoviePlotSummarizer()
+        summary = summarizer.summarize(plot_text, max_length=max_length, min_length=min_length, style=style)
+        st.subheader("Summarized Plot")
+        st.write(summary)
+
+    if st.button("About"):
+        st.subheader("About")
+        st.write("This is a simple movie plot summarizer built using the HuggingFace Transformers library. It uses the BART model to generate the summaries.")
+        st.write("The model was trained on the CNN/Daily Mail dataset, which contains news articles and their summaries. The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+        st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+        st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+        st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+        st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+        st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+        st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+        st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+        st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+        st.write("The model was fine-tuned on the XSUM dataset, which contains summaries of BBC articles.")
+
+if __name__ == "__main__":
+    main()
diff --git a/script.js b/script.js
@@ -1138,13 +1138,6 @@ document.getElementById('side-nav').addEventListener('mouseleave', function() {
     }
 });
 
-document.addEventListener('click', function () {
-    const sideNav = document.getElementById('side-nav');
-    if (!sideNav.classList.contains('manual-toggle')) {
-        sideNav.style.left = '-250px';
-    }
-})
-
 function toggleNav() {
     const sideNav = document.getElementById('side-nav');
     sideNav.classList.toggle('manual-toggle');