-
Notifications
You must be signed in to change notification settings - Fork 0
/
scry.py
95 lines (76 loc) · 3.09 KB
/
scry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import sys
import os
import jsonpickle
import nltk
# have to make use of 'tmp' for nltk to work on lambda
nltk.data.path.append("/tmp")
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
nltk.download('vader_lexicon', download_dir = "/tmp")
nltk.download('stopwords', download_dir = "/tmp")
import praw
from praw.models import MoreComments
from datetime import datetime
from uuid import uuid4
from dotenv import load_dotenv
load_dotenv()
client_id = os.getenv('client_id')
client_secret = os.getenv('client_secret')
username = os.getenv('username')
password = os.getenv('password')
user_agent = os.getenv('user_agent')
class ScoredEntry:
def __init__(self, id, scores, entry_date, total, title="default"):
self.id = id
self.scores = scores
self.entry_date = entry_date
self.total = total
self.title = title
def scry(keyword, subreddit="all"):
# searching post titles for match || discussion
# alernatively load all available comments and look for match
sia = SIA()
reddit = praw.Reddit(client_id=client_id,
client_secret=client_secret,
password=password,
user_agent=user_agent,
username=username)
sub = reddit.subreddit(subreddit)
posts = sub.new(limit=100)
eventid = datetime.now().strftime('%Y%m-%d%H-%M%S-') + str(uuid4())
# may need 'reproducible' event ID
result = []
# TODO: create generator function to decouple below responsibilities
for post in posts:
title = post.title
if keyword in title:
submission = reddit.submission(post)
utc = submission.created_utc
dt = datetime.fromtimestamp(utc)
post_entry_date = dt.strftime("%m/%d/%Y, %H:%M:%S")
total_com = len(submission.comments)
if total_com == 0:
continue
sentiment_scores = {
"pos_aggregate": 0,
"neg_aggregate": 0,
"compound_aggregate": 0,
"compound_average": 0
}
for top_level_comment in submission.comments:
if isinstance(top_level_comment, MoreComments):
continue
scores = sia.polarity_scores(top_level_comment.body)
sentiment_scores["compound_aggregate"] += scores['compound']
sentiment_scores["pos_aggregate"] += scores['pos']
sentiment_scores["neg_aggregate"] += scores['neg']
# Right now, only focused on compound score average. pos/neg included incase they
# can also show interesting trends
calc_avg = sentiment_scores["compound_aggregate"] / total_com
# TODO: cap floating dec points
sentiment_scores["compound_average"] = calc_avg
entry = ScoredEntry(eventid, sentiment_scores,
post_entry_date, total_com, title)
frozen = jsonpickle.encode(entry.__dict__)
# print(frozen)
result.append(frozen)
return result