hoangsonww · hoangsonww · Jun 18, 2024 · Jun 18, 2024 · Jun 18, 2024 · Jun 18, 2024
diff --git a/MovieVerse-Backend/.idea/workspace.xml b/MovieVerse-Backend/.idea/workspace.xml
diff --git a/MovieVerse-Backend/README.md b/MovieVerse-Backend/README.md
@@ -51,6 +51,7 @@ The microservices architecture of MovieVerse is designed to segregate the applic
 - Redis or RabbitMQ as a broker for Celery
 - BeautifulSoup4 and Requests for web scraping in the Crawler Service
 - Transformers and PyTorch for AI functionalities within the Crawler Service
+- Python 3.8 or higher (and an IDE that supports Python and can run Python scripts)
 
 To satisfy these prerequisites, simply run the following command:
 
@@ -68,7 +69,12 @@ pip install -r requirements.txt
    ```bash
    cd mobile-backend
    ```
-3. Follow the specific installation instructions for each service below.
+3. Create a Virtual Environment (optional but recommended):
+    ```bash
+    python3 -m venv venv
+    source venv/bin/activate
+    ```
+4. Follow the specific installation instructions for each service below.
 
 ### Running the Services
 
@@ -210,7 +216,9 @@ MovieVerse currently uses MongoDB, Redis, and MySQL as its primary databases. To
     [x] Received Hello from RabbitMQ
     ```
 
-Note that these servers are for your local development environment only. For our production environment, our databases might look different (in fact, they do!).
+Note that these servers are for your local development environment only, in order for you to see how our backend services interact with each other.
+
+In our production environment, we use cloud-based services like AWS, Azure, and Google Cloud to host our databases and services. This thus will look different from what you might see on your end.
 
 #### Machine Learning Services
 

diff --git a/MovieVerse-Backend/django_backend/django_backend/settings.py b/MovieVerse-Backend/django_backend/django_backend/settings.py
@@ -23,8 +23,7 @@
 
 # SECURITY WARNING: don't run with debug turned on in production!
 
-ALLOWED_HOSTS = []
-
+ALLOWED_HOSTS = ['127.0.0.1', 'localhost', 'movie-verse.com', 'www.movie-verse.com']
 
 # Application definition
 

diff --git a/MovieVerse-Backend/django_backend/movieverse/static/images/favicon.ico b/MovieVerse-Backend/django_backend/movieverse/static/images/favicon.ico
diff --git a/MovieVerse-Mobile/app/python/crawler/ai/adjust_crawling_strategy.py b/MovieVerse-Mobile/app/python/crawler/ai/adjust_crawling_strategy.py
@@ -0,0 +1,26 @@
+import requests
+
+
+def adjust_crawling_strategy(sentiment_trend, crawling_params):
+    """
+    Adjust crawling parameters based on sentiment trend analysis.
+
+    :param sentiment_trend: DataFrame with sentiment trend analysis.
+    :param crawling_params: Dictionary of current crawling parameters.
+    :return: Adjusted crawling parameters.
+    """
+    recent_trend = sentiment_trend['rolling_avg_sentiment'].iloc[-1]
+    if recent_trend > 0.5:
+        crawling_params['frequency'] *= 1.1  # Increase frequency
+    else:
+        crawling_params['frequency'] *= 0.9  # Decrease frequency
+    return crawling_params
+
+
+def fetch_movie_data(url):
+    headers = {'User-Agent': 'Mozilla/5.0'}
+    response = requests.get(url, headers=headers)
+    if response.status_code == 200:
+        return response.text
+    else:
+        return None
diff --git a/MovieVerse-Mobile/app/python/crawler/ai/content_summarization.py b/MovieVerse-Mobile/app/python/crawler/ai/content_summarization.py
@@ -0,0 +1,14 @@
+from transformers import pipeline
+
+summarizer = pipeline("summarization")
+
+
+def summarize_content(content):
+    """
+    Summarize the content using a pre-trained summarization model.
+
+    :param content: String containing the content to summarize.
+    :return: Summarized content.
+    """
+    summary = summarizer(content, max_length=50, min_length=25, do_sample=False)
+    return summary[0]['summary_text']
diff --git a/MovieVerse-Mobile/app/python/crawler/ai/image_analysis.py b/MovieVerse-Mobile/app/python/crawler/ai/image_analysis.py
@@ -0,0 +1,40 @@
+from PIL import Image
+import requests
+from torchvision import models, transforms
+import torch
+from io import BytesIO
+
+# Load a pretrained image classification model
+model = models.resnet50(pretrained=True)
+model.eval()
+
+# Define image transformations
+transform = transforms.Compose([
+    transforms.Resize(256),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+])
+
+
+def classify_image(image_url):
+    try:
+        response = requests.get(image_url)
+        image = Image.open(BytesIO(response.content))
+        tensor = transform(image).unsqueeze(0)
+
+        with torch.no_grad():
+            outputs = model(tensor)
+            _, predicted = torch.max(outputs, 1)
+
+        # Translate predicted category index to a label here
+        return predicted.item()
+    except Exception as e:
+        raise e
+
+
+def analyze_image(image_url):
+    try:
+        return classify_image(image_url)
+    except Exception as e:
+        raise e
diff --git a/MovieVerse-Mobile/app/python/crawler/ai/sentiment_trend_analysis.py b/MovieVerse-Mobile/app/python/crawler/ai/sentiment_trend_analysis.py
@@ -0,0 +1,16 @@
+import pandas as pd
+from datetime import datetime
+
+
+def analyze_sentiment_trend(sentiment_data):
+    """
+    Analyze sentiment trends over time from collected data.
+
+    :param sentiment_data: List of dictionaries containing 'date' and 'sentiment' keys.
+    :return: DataFrame with sentiment trend analysis.
+    """
+    df = pd.DataFrame(sentiment_data)
+    df['date'] = pd.to_datetime(df['date'])
+    df.sort_values('date', inplace=True)
+    df['rolling_avg_sentiment'] = df['sentiment'].rolling(window=7).mean()
+    return df
diff --git a/MovieVerse-Mobile/app/python/crawler/ai/text_analysis.py b/MovieVerse-Mobile/app/python/crawler/ai/text_analysis.py
@@ -0,0 +1,12 @@
+from transformers import pipeline
+
+# Load a sentiment analysis pipeline
+sentiment_pipeline = pipeline("sentiment-analysis")
+
+
+def analyze_text_sentiment(text):
+    try:
+        result = sentiment_pipeline(text)
+        return result
+    except Exception as e:
+        raise e
diff --git a/MovieVerse-Mobile/app/python/crawler/crawler_orchestrator.py b/MovieVerse-Mobile/app/python/crawler/crawler_orchestrator.py
@@ -0,0 +1,36 @@
+from .scraper import fetch_movie_data
+from .parser import parse_movie_data
+from .ai.text_analysis import analyze_text_sentiment
+from .ai.image_analysis import classify_image
+from .tasks import crawl_movie_data_and_store
+from .models import MovieDetail
+from django.core.exceptions import ObjectDoesNotExist
+
+
+def orchestrate_crawling(url):
+    try:
+        # Step 1: Fetch data
+        html_content = fetch_movie_data(url)
+        if not html_content:
+            raise ValueError("Failed to fetch data from URL.")
+
+        # Step 2: Parse data
+        movie_data = parse_movie_data(html_content)
+        if not movie_data:
+            raise ValueError("Failed to parse movie data.")
+
+        # Step 3: Analyze text sentiment
+        sentiment_result = analyze_text_sentiment(movie_data['description'])
+        movie_data['sentiment'] = sentiment_result
+
+        # Step 4: Image analysis
+        image_analysis_result = classify_image(movie_data['poster_url'])
+        movie_data['image_analysis'] = image_analysis_result
+
+        # Step 5: Store data in the database
+        crawl_movie_data_and_store(movie_data)
+
+        print("Crawling and data processing completed successfully.")
+
+    except Exception as e:
+        print(f"Error during the crawling process: {e}")
diff --git a/MovieVerse-Mobile/app/python/crawler/datasources.py b/MovieVerse-Mobile/app/python/crawler/datasources.py
@@ -0,0 +1,60 @@
+DATA_SOURCES = [
+    'https://www.imdb.com/chart/top',
+    'https://www.rottentomatoes.com/top/bestofrt/',
+    'https://www.metacritic.com/browse/movies/score/metascore/all/filtered',
+    'https://www.themoviedb.org/movie/top_rated',
+    'https://www.boxofficemojo.com/chart/top_lifetime_gross/?area=XWW',
+    'https://www.the-numbers.com/movie/budgets/all',
+    'https://www.the-numbers.com/movie/production-companies/',
+    'https://www.the-numbers.com/movie/keywords',
+    'https://www.the-numbers.com/movie/genres',
+    'https://www.the-numbers.com/movie/franchises',
+    'https://www.the-numbers.com/movie/creative-type',
+    'https://www.the-numbers.com/movie/production-method',
+    'https://www.the-numbers.com/movie/source',
+    'https://www.the-numbers.com/movie/production-status',
+    'https://www.the-numbers.com/movie/production-countries',
+    'https://www.the-numbers.com/movie/languages',
+    'https://www.the-numbers.com/movie/certifications',
+    'https://www.the-numbers.com/movie/mpaa-ratings',
+    'https://www.the-numbers.com/movie/running-times',
+    'https://www.the-numbers.com/movie/decades',
+    'https://www.the-numbers.com/movie/release-dates',
+    'https://www.the-numbers.com/movie/release-types',
+    'https://www.the-numbers.com/movie/weekend-box-office-chart',
+    'https://www.the-numbers.com/movie/weekly-box-office-chart',
+    'https://www.the-numbers.com/movie/weekend-per-theater-chart',
+    'https://www.the-numbers.com/movie/weekly-per-theater-chart',
+    'https://www.the-numbers.com/movie/theater-count',
+    'https://www.the-numbers.com/movie/market',
+    'https://www.the-numbers.com/movie/production-countries',
+    'https://www.the-numbers.com/movie/production-method',
+    'https://www.the-numbers.com/movie/source',
+    'https://www.the-numbers.com/movie/production-status',
+    'https://www.the-numbers.com/movie/production-countries',
+    'https://www.the-numbers.com/movie/languages',
+    'https://www.the-numbers.com/movie/certifications',
+    'https://www.the-numbers.com/movie/mpaa-ratings',
+    'https://www.the-numbers.com/movie/running-times',
+    'https://www.the-numbers.com/movie/decades',
+    'https://www.the-numbers.com/movie/release-dates',
+    'https://www.the-numbers.com/movie/release-types',
+    'https://www.the-numbers.com/movie/weekend-box-office-chart',
+    'https://www.the-numbers.com/movie/weekly-box-office-chart',
+    'https://www.the-numbers.com/movie/weekend-per-theater-chart',
+    'https://www.the-numbers.com/movie/weekly-per-theater-chart',
+    'https://www.the-numbers.com/movie/theater-count',
+    'https://www.the-numbers.com/movie/market',
+    'https://www.the-numbers.com/movie/production-companies',
+    'https://www.the-numbers.com/movie/keywords',
+    'https://www.the-numbers.com/movie/genres',
+    'https://www.the-numbers.com/movie/franchises',
+    'https://www.the-numbers.com/movie/creative-type',
+    'https://www.the-numbers.com/movie/production-method',
+    'https://www.the-numbers.com/movie/source',
+]
+
+
+def fetch_from_sources(crawl_movie_data_and_store=None):
+    for source in DATA_SOURCES:
+        crawl_movie_data_and_store.delay(source)