-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
54 lines (42 loc) · 1.29 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#Common functions to conduct sentiment analysis of Bill titles and text
import pandas as pd
import numpy as np
from datetime import datetime
from collections import Counter
from textblob import TextBlob
import string
from nltk.corpus import stopwords
eng_stopwords = stopwords.words('english')
characters = [s for s in string.printable]
STOP_WORDS = eng_stopwords + characters
def subjectivity(text): #function to get subjectivity
'''
Returns subjectivity score of given text
'''
return TextBlob(text).sentiment.subjectivity
def polarity(text): #function to get polarity
'''
Returns polarity score of given text
'''
return TextBlob(text).sentiment.polarity
def analysis(score): #function to compute neg, neutral, pos analysis
'''
Returns an analysis of a given polarity score
'''
if score < 0:
return 'Negative'
elif score == 0:
return 'Neutral'
else:
return 'Positive'
def get_keywords(df_column):
'''
Returns most common keywords from text not counting stop words
'''
most_common = Counter(" ".join(df_column.str.lower()).split()).most_common(40)
keywords = []
for tup in most_common:
word, count = tup
if word not in STOP_WORDS:
keywords.append(tup)
return keywords