-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_hourly_data.py
executable file
·102 lines (71 loc) · 2.99 KB
/
create_hourly_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#! /usr/bin/python
from mysqldb import connection
import pandas as pd
import time
import datetime
###############################################################################
# Create Stock SENTIMENT
###############################################################################
#Query MySQL and find values within that last hour
time_greater_than = datetime.datetime.now() - datetime.timedelta(hours=1)
unix_time_greater_than = time.mktime(time_greater_than.timetuple())
c, conn = connection()
c.execute("SELECT * FROM sentiment WHERE time > %s", (unix_time_greater_than,))
data = c.fetchall()
#Push data to DataFrame to perform GroupBy operations
symbol = []
sentiment = []
date = []
for i in data:
symbol.append(i[0])
sentiment.append(i[1])
date.append(datetime.datetime.fromtimestamp(int(i[2])))
df = pd.DataFrame(symbol)
df['sentiment'] = sentiment
df['date'] = date
transform = lambda x: x.hour
hour = df['date'].apply(transform)
df['hour'] = hour
df.columns = ['symbol', 'sentiment', 'date', 'hour']
stats = df.groupby('symbol')['sentiment'].agg(['mean', 'count'])
#Formate data and push to MySQL hourly table
now = datetime.datetime.now()
now_hourly = datetime.datetime(now.year, now.month, now.day, now.hour)
unix_now = time.mktime(now_hourly.timetuple())
for i in range(len(stats)):
c.execute("INSERT INTO hourly (time, symbol, sentiment, volume) values (%s, %s, %s, %s)",
(unix_now, stats.index[i], stats['mean'][i], stats['count'][i]))
conn.commit()
c.close()
###############################################################################
# Create INDEX SENTIMETN
###############################################################################
#Specify files with Weighting Schemes
files = ['dia_weights.csv',
'qqq_weights.csv',
'spy_weights.csv']
#Establish Connection with DB
c, conn = connection()
#Read each file for each index, get the sentiment, weighting scheme, and volume
for f in files:
path = '/braden/index_data/' + f
df = pd.read_csv(path)
df = df.drop(df.index[0])
index = f[:3]
index_sent = 0
index_volume = 0
for i in range(len(df)):
try:
symbol = df.symbol.iloc[i]
weight = df.weight.iloc[i]
c.execute("SELECT sentiment, time, volume FROM hourly WHERE time = (SELECT MAX(time) FROM hourly) AND symbol = %s",
(symbol, ))
sent_data = c.fetchone()
index_sent += sent_data[0] * weight
time = sent_data[1]
index_volume += sent_data[2]
except Exception:
continue
c.execute("INSERT INTO indexes (time, symbol, sentiment, volume) values (%s, %s, %s, %s)",
(time, index, index_sent, index_volume))
conn.commit()