-
Notifications
You must be signed in to change notification settings - Fork 2
/
utilparse.py
133 lines (114 loc) · 4.59 KB
/
utilparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#utilparse.py
#module to parse all metrics based on utility format
#Notes
# UNIX timestamp for influxdb converted to UTC - grafana will convert back to local
# tags are using local (input file) time zone
#
# Interval times are end times only, start is assumed in a timebased database
from datetime import datetime
import csv
import pytz
import re
rows_generated = 0
rows_delivered = 0
def parse_data(input_file, verbose, util_format, metricsout):
if verbose:
print('Starting parse_data')
infile = open(input_file, mode='r')
csv_reader = csv.reader(infile, delimiter=',')
if verbose:
print('File: ', infile)
if util_format == 'sce-tou':
sce_tou_parse(csv_reader, verbose, metricsout)
elif util_format == 'pep':
pep_parse(csv_reader, verbose, metricsout)
return (rows_delivered, rows_generated, metricsout)
def sce_tou_parse(csv_reader, verbose, metricsout):
global rows_generated
global rows_delivered
point = []
tag = ''
pmult = 0
for row in csv_reader:
if len(row) > 0:
if 'Received' in row[0]:
tag = 'generated'
elif 'Delivered' in row[0]:
tag = 'delivered'
elif 'Consumption' in row[0]:
tag = 'delivered'
elif 'to' in row[0]:
util_timestamp = row[0].split('to')
# SCE adds non-ASCII charter before the to field, need to strip
util_timestamp = [
item.replace('\xa0', '') for item in util_timestamp
]
dt_local = datetime.strptime(util_timestamp[0], '%Y-%m-%d %H:%M:%S')
dt_utc = dt_local.astimezone(pytz.UTC)
dt_utc = dt_utc.strftime("%Y-%m-%d %H:%M:%S")
# use local time instead of UTC as you want customer month
if float(row[1]) != 0:
if tag == 'generated':
rows_generated = rows_generated + 1
pmult = -1
elif tag == 'delivered':
rows_delivered = rows_delivered + 1
pmult = 1
point = {
"measurement": "energy",
"tags": {
"type": tag,
"month": dt_local.strftime('%B'),
"day": dt_local.strftime('%A'),
"year": dt_local.strftime('%Y'),
},
"time": dt_utc,
"fields": {
"kwh": float(row[1]) * pmult
}
}
if verbose:
print(point)
metricsout.append(point)
def pep_parse(csv_reader, verbose, metricsout):
global rows_generated
global rows_delivered
point = []
tag = ''
for row in csv_reader:
if len(row) > 0:
if 'Electric usage' in row[0]:
if float(row[4]) < 0:
tag = 'generated'
else:
tag = 'delivered'
util_timestamp = row[1]
util_timestamp = str(util_timestamp) + ' ' + str(row[3]) + str(':00')
dt_local = datetime.strptime(util_timestamp, "%Y-%m-%d %H:%M:%S")
dt_utc = dt_local.astimezone(pytz.UTC)
dt_utc = dt_utc.strftime("%Y-%m-%d %H:%M:%S")
# use local time instead of UTC as you want customer month
if tag == 'generated':
rows_generated = rows_generated + 1
cost = float(re.sub(r'[^0-9]', '', row[6]))/-100
elif tag == 'delivered':
rows_delivered = rows_delivered + 1
cost = float(re.sub(r'[^0-9]', '', row[6]))/100
point = {
"measurement": "energy",
"tags": {
"type": tag,
"month": dt_local.strftime('%B'),
"day": dt_local.strftime('%A'),
"year": dt_local.strftime('%Y'),
},
"time": dt_utc,
"fields": {
"kwh": float(row[4]),
"cost" : cost
}
}
if verbose:
print(point)
metricsout.append(point)
return ()