-
Notifications
You must be signed in to change notification settings - Fork 2
/
NewsScraper.py
50 lines (34 loc) · 1.46 KB
/
NewsScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import requests
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')
import pandas as pd
"""
Using finviz.com and an input ticker, headlines and time stamps are loaded into a dictionary, which sentiment analysis is then performed on.
"""
def getNews(ticker):
urlstart = 'https://finviz.com/quote.ashx?t='
newsData = {}
dataDictionary = {}
url = urlstart + ticker
req = requests.get(url=url, headers={'user-agent': 'Mozilla/5.0'})
soup = BeautifulSoup(req.text, 'html.parser')
newsData = soup.find(id='news-table')
dataDictionary[ticker] = newsData
newsList = []
for ticker, newsData in dataDictionary.items():
for row in newsData.findAll('tr'):
title = row.a.get_text()
timestamp = row.td.text.split(' ')
if len(timestamp) == 1:
time = timestamp[0]
else:
date = timestamp[0]
time = timestamp [1]
newsList.append([date,time[:7],title])
dataframe = pd.DataFrame(newsList, columns=['date','time','title'])
vader = SentimentIntensityAnalyzer()
narrow = lambda title: vader.polarity_scores(title)['compound']
dataframe['compound'] = dataframe['title'].apply(narrow)
return dataframe.head(50)