-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
101 lines (76 loc) · 3.45 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Author : Bhaumik Darshan Choksi
import json
import urllib.request
from nltk.corpus import stopwords
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from tkinter import *
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
import tkinter
from functools import partial
from LiveNews import LiveNews
API_KEY = "X"
if API_KEY == "X":
print("FIRST GET YOUR OWN API KEY AT https://newsapi.org/")
print("Set the API_KEY variable to hold the string containing this API key")
exit()
garbage = set(stopwords.words('english'))
vectorizer = pickle.load(open("vectorizer.p", "rb"))
encoder = pickle.load(open("encoder.p", "rb"))
keywords = pickle.load(open("keywords.p", "rb"))
classifier = pickle.load(open("classifier.p", "rb"))
def normalize_text(s, keywords):
s = s.lower()
s = re.sub('\s\W', ' ', s)
s = re.sub('\W\s', ' ', s)
s = re.sub('\s+', ' ', s)
s = s.split()
s = list(filter((lambda x: x not in garbage), s))
for word in s:
keywords.append(word)
s = " ".join(s)
return s
def format_live_news_title(title, keywords):
return [" ".join(list(word for word in title.split() if word in keywords))]
def find_category(title, vectorizer, encoder, keywords, classifier):
sample = normalize_text(title, [])
sample = format_live_news_title(sample, keywords)
sample = vectorizer.transform(sample)
output = classifier.predict(sample)
return encoder.inverse_transform(output)[0] # Single elements array, so return first elem
def get_news_articles(category):
ln = LiveNews(API_KEY) # TODO: Remove global variable
articles = ln.fetch(source="google-news") # TODO : Add spinner for news sources in the GUI
for article in articles:
article["category"] = find_category(article["title"], vectorizer, encoder, keywords, classifier)
relevant_articles = list(filter((lambda x : category == x["category"]), articles))
return relevant_articles
# GUI begins
master = tkinter.Tk()
master.title("Bhaumik's newsman")
master.geometry("500x500")
listbox = Listbox(master, width=70, height=20)
business_button = Button(master, text="Business", width=30, height=3, bg="cyan",
command=lambda: news_refresh_callback('b', listbox)).grid(row=1, column=1)
technology_button = Button(master, text="Technology", width=30, height=3, bg="green", command=lambda: news_refresh_callback('t', listbox)).grid(row=1, column=2)
entertainment_button = Button(master, text="Entertainment", width=30, height=3, bg="magenta", command=lambda: news_refresh_callback('e', listbox)).grid(row=2, column=1)
medical_button = Button(master, text="Medical", width=30, height=3, bg="yellow", command=lambda: news_refresh_callback('m', listbox)).grid(row=2, column=2)
listbox.grid(row=3, columnspan=3)
listbox.insert(END, "Pick a category!")
def news_refresh_callback(category, listbox):
listbox.delete(0, END)
articles = get_news_articles(category)
if len(articles) == 0: # No articles returned
listbox.insert(END, "Oops, not articles in this category")
listbox.insert(END, "Try another source")
return
for article in articles:
listbox.insert(END, article["title"])
master.mainloop()
# GUI ends
# Author : Bhaumik Darshan Choksi