Skip to content

Commit 6b1f8a0

Browse files
authored
Merge pull request #207 from hoangsonww/main-deployment-branch
Main deployment branch
2 parents 538e31a + c05fd3c commit 6b1f8a0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+2329
-31
lines changed

MovieVerse-Backend/.idea/workspace.xml

+58-27
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

MovieVerse-Backend/README.md

+10-2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ The microservices architecture of MovieVerse is designed to segregate the applic
5151
- Redis or RabbitMQ as a broker for Celery
5252
- BeautifulSoup4 and Requests for web scraping in the Crawler Service
5353
- Transformers and PyTorch for AI functionalities within the Crawler Service
54+
- Python 3.8 or higher (and an IDE that supports Python and can run Python scripts)
5455

5556
To satisfy these prerequisites, simply run the following command:
5657

@@ -68,7 +69,12 @@ pip install -r requirements.txt
6869
```bash
6970
cd mobile-backend
7071
```
71-
3. Follow the specific installation instructions for each service below.
72+
3. Create a Virtual Environment (optional but recommended):
73+
```bash
74+
python3 -m venv venv
75+
source venv/bin/activate
76+
```
77+
4. Follow the specific installation instructions for each service below.
7278

7379
### Running the Services
7480

@@ -210,7 +216,9 @@ MovieVerse currently uses MongoDB, Redis, and MySQL as its primary databases. To
210216
[x] Received Hello from RabbitMQ
211217
```
212218
213-
Note that these servers are for your local development environment only. For our production environment, our databases might look different (in fact, they do!).
219+
Note that these servers are for your local development environment only, in order for you to see how our backend services interact with each other.
220+
221+
In our production environment, we use cloud-based services like AWS, Azure, and Google Cloud to host our databases and services. This thus will look different from what you might see on your end.
214222
215223
#### Machine Learning Services
216224

MovieVerse-Backend/django_backend/django_backend/settings.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323

2424
# SECURITY WARNING: don't run with debug turned on in production!
2525

26-
ALLOWED_HOSTS = []
27-
26+
ALLOWED_HOSTS = ['127.0.0.1', 'localhost', 'movie-verse.com', 'www.movie-verse.com']
2827

2928
# Application definition
3029

Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import requests
2+
3+
4+
def adjust_crawling_strategy(sentiment_trend, crawling_params):
5+
"""
6+
Adjust crawling parameters based on sentiment trend analysis.
7+
8+
:param sentiment_trend: DataFrame with sentiment trend analysis.
9+
:param crawling_params: Dictionary of current crawling parameters.
10+
:return: Adjusted crawling parameters.
11+
"""
12+
recent_trend = sentiment_trend['rolling_avg_sentiment'].iloc[-1]
13+
if recent_trend > 0.5:
14+
crawling_params['frequency'] *= 1.1 # Increase frequency
15+
else:
16+
crawling_params['frequency'] *= 0.9 # Decrease frequency
17+
return crawling_params
18+
19+
20+
def fetch_movie_data(url):
21+
headers = {'User-Agent': 'Mozilla/5.0'}
22+
response = requests.get(url, headers=headers)
23+
if response.status_code == 200:
24+
return response.text
25+
else:
26+
return None
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from transformers import pipeline
2+
3+
summarizer = pipeline("summarization")
4+
5+
6+
def summarize_content(content):
7+
"""
8+
Summarize the content using a pre-trained summarization model.
9+
10+
:param content: String containing the content to summarize.
11+
:return: Summarized content.
12+
"""
13+
summary = summarizer(content, max_length=50, min_length=25, do_sample=False)
14+
return summary[0]['summary_text']
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from PIL import Image
2+
import requests
3+
from torchvision import models, transforms
4+
import torch
5+
from io import BytesIO
6+
7+
# Load a pretrained image classification model
8+
model = models.resnet50(pretrained=True)
9+
model.eval()
10+
11+
# Define image transformations
12+
transform = transforms.Compose([
13+
transforms.Resize(256),
14+
transforms.CenterCrop(224),
15+
transforms.ToTensor(),
16+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
17+
])
18+
19+
20+
def classify_image(image_url):
21+
try:
22+
response = requests.get(image_url)
23+
image = Image.open(BytesIO(response.content))
24+
tensor = transform(image).unsqueeze(0)
25+
26+
with torch.no_grad():
27+
outputs = model(tensor)
28+
_, predicted = torch.max(outputs, 1)
29+
30+
# Translate predicted category index to a label here
31+
return predicted.item()
32+
except Exception as e:
33+
raise e
34+
35+
36+
def analyze_image(image_url):
37+
try:
38+
return classify_image(image_url)
39+
except Exception as e:
40+
raise e
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import pandas as pd
2+
from datetime import datetime
3+
4+
5+
def analyze_sentiment_trend(sentiment_data):
6+
"""
7+
Analyze sentiment trends over time from collected data.
8+
9+
:param sentiment_data: List of dictionaries containing 'date' and 'sentiment' keys.
10+
:return: DataFrame with sentiment trend analysis.
11+
"""
12+
df = pd.DataFrame(sentiment_data)
13+
df['date'] = pd.to_datetime(df['date'])
14+
df.sort_values('date', inplace=True)
15+
df['rolling_avg_sentiment'] = df['sentiment'].rolling(window=7).mean()
16+
return df
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from transformers import pipeline
2+
3+
# Load a sentiment analysis pipeline
4+
sentiment_pipeline = pipeline("sentiment-analysis")
5+
6+
7+
def analyze_text_sentiment(text):
8+
try:
9+
result = sentiment_pipeline(text)
10+
return result
11+
except Exception as e:
12+
raise e
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from .scraper import fetch_movie_data
2+
from .parser import parse_movie_data
3+
from .ai.text_analysis import analyze_text_sentiment
4+
from .ai.image_analysis import classify_image
5+
from .tasks import crawl_movie_data_and_store
6+
from .models import MovieDetail
7+
from django.core.exceptions import ObjectDoesNotExist
8+
9+
10+
def orchestrate_crawling(url):
11+
try:
12+
# Step 1: Fetch data
13+
html_content = fetch_movie_data(url)
14+
if not html_content:
15+
raise ValueError("Failed to fetch data from URL.")
16+
17+
# Step 2: Parse data
18+
movie_data = parse_movie_data(html_content)
19+
if not movie_data:
20+
raise ValueError("Failed to parse movie data.")
21+
22+
# Step 3: Analyze text sentiment
23+
sentiment_result = analyze_text_sentiment(movie_data['description'])
24+
movie_data['sentiment'] = sentiment_result
25+
26+
# Step 4: Image analysis
27+
image_analysis_result = classify_image(movie_data['poster_url'])
28+
movie_data['image_analysis'] = image_analysis_result
29+
30+
# Step 5: Store data in the database
31+
crawl_movie_data_and_store(movie_data)
32+
33+
print("Crawling and data processing completed successfully.")
34+
35+
except Exception as e:
36+
print(f"Error during the crawling process: {e}")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
DATA_SOURCES = [
2+
'https://www.imdb.com/chart/top',
3+
'https://www.rottentomatoes.com/top/bestofrt/',
4+
'https://www.metacritic.com/browse/movies/score/metascore/all/filtered',
5+
'https://www.themoviedb.org/movie/top_rated',
6+
'https://www.boxofficemojo.com/chart/top_lifetime_gross/?area=XWW',
7+
'https://www.the-numbers.com/movie/budgets/all',
8+
'https://www.the-numbers.com/movie/production-companies/',
9+
'https://www.the-numbers.com/movie/keywords',
10+
'https://www.the-numbers.com/movie/genres',
11+
'https://www.the-numbers.com/movie/franchises',
12+
'https://www.the-numbers.com/movie/creative-type',
13+
'https://www.the-numbers.com/movie/production-method',
14+
'https://www.the-numbers.com/movie/source',
15+
'https://www.the-numbers.com/movie/production-status',
16+
'https://www.the-numbers.com/movie/production-countries',
17+
'https://www.the-numbers.com/movie/languages',
18+
'https://www.the-numbers.com/movie/certifications',
19+
'https://www.the-numbers.com/movie/mpaa-ratings',
20+
'https://www.the-numbers.com/movie/running-times',
21+
'https://www.the-numbers.com/movie/decades',
22+
'https://www.the-numbers.com/movie/release-dates',
23+
'https://www.the-numbers.com/movie/release-types',
24+
'https://www.the-numbers.com/movie/weekend-box-office-chart',
25+
'https://www.the-numbers.com/movie/weekly-box-office-chart',
26+
'https://www.the-numbers.com/movie/weekend-per-theater-chart',
27+
'https://www.the-numbers.com/movie/weekly-per-theater-chart',
28+
'https://www.the-numbers.com/movie/theater-count',
29+
'https://www.the-numbers.com/movie/market',
30+
'https://www.the-numbers.com/movie/production-countries',
31+
'https://www.the-numbers.com/movie/production-method',
32+
'https://www.the-numbers.com/movie/source',
33+
'https://www.the-numbers.com/movie/production-status',
34+
'https://www.the-numbers.com/movie/production-countries',
35+
'https://www.the-numbers.com/movie/languages',
36+
'https://www.the-numbers.com/movie/certifications',
37+
'https://www.the-numbers.com/movie/mpaa-ratings',
38+
'https://www.the-numbers.com/movie/running-times',
39+
'https://www.the-numbers.com/movie/decades',
40+
'https://www.the-numbers.com/movie/release-dates',
41+
'https://www.the-numbers.com/movie/release-types',
42+
'https://www.the-numbers.com/movie/weekend-box-office-chart',
43+
'https://www.the-numbers.com/movie/weekly-box-office-chart',
44+
'https://www.the-numbers.com/movie/weekend-per-theater-chart',
45+
'https://www.the-numbers.com/movie/weekly-per-theater-chart',
46+
'https://www.the-numbers.com/movie/theater-count',
47+
'https://www.the-numbers.com/movie/market',
48+
'https://www.the-numbers.com/movie/production-companies',
49+
'https://www.the-numbers.com/movie/keywords',
50+
'https://www.the-numbers.com/movie/genres',
51+
'https://www.the-numbers.com/movie/franchises',
52+
'https://www.the-numbers.com/movie/creative-type',
53+
'https://www.the-numbers.com/movie/production-method',
54+
'https://www.the-numbers.com/movie/source',
55+
]
56+
57+
58+
def fetch_from_sources(crawl_movie_data_and_store=None):
59+
for source in DATA_SOURCES:
60+
crawl_movie_data_and_store.delay(source)

0 commit comments

Comments
 (0)