This repository has been archived by the owner on Jan 24, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConvReader.py
299 lines (276 loc) · 11.4 KB
/
ConvReader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
from ConvClass import Conv
import re
import sys
import time
import datetime
from PcapReader import parse
def main():
print("parsing",sys.argv[1],"and",sys.argv[2])
convList = parse(sys.argv[1])
print("parsePCAP complete ")
parseQuery(convList,sys.argv[2])
print("parseQuery complete ")
evCount = 0 #number of early validations
lateData = 0 #number of base ev's before data
dmarcless = 0 #number of ev's w/out dmarcs
dataEV = 0 #number of ev's without data
noRCPT = 0 #number of conv's without rcpts
dmarc = 0 #number of conv's with dmarcs
dataDM = 0 #number of dmarc conv's without data
noRCPTdm = 0 #number of dmarc conv's without RCPT TO
validateDM = 0 #number of dmarc conv's with validation
l2Count = 0 #number of l2's before rcpt
l2Data = 0 #number of conv with l2 before data
l2nd = 0 #number of l2's with no data
l2ndDmarc = 0 #number of l2's with no data but a dmarc
l2Only = 0 #number of l2's with no rcpt or data
l2dmarc = 0#number of l2's as above with a dmarc
l3rcpt = 0 #number of l3's before rcpt
l3data = 0 #number of l3's before data
l3nd = 0 #number of l3's with no data
l3ndDmarc = 0 #number of l3's with no data but a dmarc
l3Only = 0 #number of l3's with no rcpt or data
l3dmarc = 0 #number of l3's as above with a dmarc
#new vars for sum2
nol2 = 0 #number of early base queries with no l2
parallel = 0 #number of parallel validations with data before l2
parallel2 = 0 #number of parallel validations with data before l3
lazy = 0 #number of conv's with data, but base or l2 after it
soft = 0 #number of conv's with DMARC and DATA
#outFile = open("conv_timetable.txt","w")
sumFile = open("conv_summary.txt","w")
sum2File = open("conv_sum2.txt","w")
#compilations of various categories
noSPF = open("no_spf.txt","w")
serialSPF = open("serial_spf.txt","w")
parallelSPF = open("parallel_spf.txt","w")
lazySPF = open("lazy_spf.txt","w")
strictDMARC = open("strict_dmarc_spf.txt","w")
softDMARC = open("soft_dmarc_spf.txt","w")
#initialize files woth headers
print("Sender ID\t","Receiver ID\t\t\t","test #\t","Key\t\t","Mail Time\t",
"Validate Time\t","dmarc Time\t","L2 Time\tL3 Time\t","RCPT Time\t\tData Time",
file=noSPF)
print("Sender ID\t","Receiver ID\t\t\t","test #\t","Key\t\t","Mail Time\t",
"Validate Time\t","dmarc Time\t","L2 Time\tL3 Time\t","RCPT Time\t\tData Time",
file=serialSPF)
print("Sender ID\t","Receiver ID\t\t\t","test #\t","Key\t\t","Mail Time\t",
"Validate Time\t","dmarc Time\t","L2 Time\tL3 Time\t","RCPT Time\t\tData Time",
file=parallelSPF)
print("Sender ID\t","Receiver ID\t\t\t","test #\t","Key\t\t","Mail Time\t",
"Validate Time\t","dmarc Time\t","L2 Time\tL3 Time\t","RCPT Time\t\tData Time",
file=lazySPF)
print("Sender ID\t","Receiver ID\t\t\t","test #\t","Key\t\t","Mail Time\t",
"Validate Time\t","dmarc Time\t","L2 Time\tL3 Time\t","RCPT Time\t\tData Time",
file=strictDMARC)
print("Sender ID\t","Receiver ID\t\t\t","test #\t","Key\t\t","Mail Time\t",
"Validate Time\t","dmarc Time\t","L2 Time\tL3 Time\t","RCPT Time\t\tData Time",
file=softDMARC)
complete = len(convList) #number of complete conv's (contains a base response or l2)
for i in convList:
c=convList.get(i)
#c.printConv(outFile)
if c.earlyValidation()==1 or c.earlyL2() == 1:
evCount+= 1
if c.beforeData()==1:
if c.l2Time!="unknown":
parallel+=1
c.printConv(parallelSPF)
else:
nol2+=1
elif c.l3Test()==1:
parallel2+=1
c.printConv(parallelSPF)
else:
if c.earlyValidation()==-1 and c.earlyL2()==-1:
complete = complete-1
c.printConv(noSPF)
elif c.beforeData() == 0 and c.l3Test()<3:
lazy+=1
c.printConv(lazySPF)
if c.beforeData() >0:
lateData+=1
if c.beforeData()>1:
l2Data+=1
c.printConv(serialSPF)
if c.beforeData()>2:
l2nd+=1
c.printConv(serialSPF)
if c.dmarcTime != "unknown":
l2ndDmarc+=1
l3t = c.l3Test()
if l3t>0:
l3rcpt +=1
if l3t>1:
l3data+=1
if l3t>2:
l3nd+=1
if c.dmarcTime != "unknown":
l3ndDmarc +=1
if c.rcptTime == "unknown":
noRCPT += 1
if c.l2Time != "unknown" and c.dataTime =="unknown":
l2Only+=1
if c.dmarcTime != "unknown":
l2dmarc +=1
if c.l3Time != "unknown" and c.dataTime =="unknown":
l3Only+=1
if c.dmarcTime != "unknown":
l3dmarc +=1
if c.dataTime =="unknown":
dataEV+=1
if c.earlyL2() == 1:
l2Count+=1
if c.dmarcTime!="unknown":
dmarc+=1
if c.dataTime=="unknown":
dataDM+=1
c.printConv(strictDMARC)
else:
soft += 1
c.printConv(softDMARC)
if c.rcptTime=="unknown":
noRCPTdm+=1
if c.validateTime!="unknown" or c.l2Time!="unknown":
validateDM+=1
elif c.validateTime!="unknown" or c.l2Time!="unknown":
dmarcless+=1
#print results
print("There are",len(convList),"conversations",file=sumFile)
print("There are",len(convList),"conversations",file=sum2File)
print((complete/len(convList))*100,"% (",complete,")","of the SMTP transactions had a corresponding base or l2 lookup for test1",
file=sumFile)
if complete>0:
print("\tOf those,",(evCount/complete)*100,"% (",evCount,")","had an l2 or base lookup before the RCPT TO",
file=sumFile)
print("\t",(l2Count/complete)*100,"% (",l2Count,")","had an l2 lookup before the RCPT TO",
file=sumFile)
print("\t",(lateData/complete)*100,"% (",lateData,")","had an l2 or base lookup before the DATA",
file=sumFile)
print("\t",(l2Data/complete)*100,"% (",l2Data,")","had an l2 lookup before the DATA",
file=sumFile)
print("\t",(l2nd/complete)*100,"% (",l2nd,")","had an l2 lookup, but no DATA",
file=sumFile)
if l2nd>0:
print("\t\tOf these,",(l2ndDmarc/l2nd)*100,"% (",l2ndDmarc,")","had a corresponding _dmarc",
file=sumFile)
print("\t",(l2Only/complete)*100,"% (",l2Only,")","had an l2 lookup, but no RCPT TO or DATA",
file=sumFile)
if l2Only>0:
print("\t\tOf these,",(l2dmarc/l2Only)*100,"% (",l2dmarc,")","had a corresponding _dmarc",
file=sumFile)
print("\t",(l3rcpt/complete)*100,"% (",l3rcpt,")","had an l3 lookup before the RCPT TO",
file=sumFile)
print("\t",(l3data/complete)*100,"% (",l3data,")","had an l3 lookup before the DATA",
file=sumFile)
print("\t",(l3nd/complete)*100,"% (",l3nd,")","had an l3 lookup, but no DATA",
file=sumFile)
if l3nd>0:
print("\t\tOf these,",(l3ndDmarc/l3nd)*100,"% (",l3ndDmarc,")","had a corresponding _dmarc",
file=sumFile)
print("\t",(l3Only/complete)*100,"% (",l3Only,")","had an l3 lookup, but no RCPT TO or DATA",
file=sumFile)
if l3Only>0:
print("\t\t Of these,",(l3dmarc/l3Only)*100,"% (",l3dmarc,")","had a corresponding _dmarc",
file=sumFile)
print("\t",(dmarcless/complete)*100,"% (",dmarcless,")","had no _dmarc",
file=sumFile)
print((noRCPT/len(convList))*100,"% (",noRCPT,")","of the SMTP transactions had no RCPT TO",
file=sumFile)
print((dataEV/len(convList))*100,"% (",dataEV,")","of the SMTP transactions had no DATA",
file=sumFile)
print((dmarc/len(convList))*100,"% (",dmarc,")","of the SMTP transactions had a corresponding _dmarc lookup",
file=sumFile)
if dmarc>0:
print("\tOf those,",(dataDM/dmarc)*100,"% (",dataDM,")","had no DATA",
file=sumFile)
print("\t",(noRCPTdm/dmarc)*100,"% (",noRCPTdm,")","had no RCPT TO",
file=sumFile)
print("\t",(validateDM/dmarc)*100,"% (",validateDM,")","had base or l2 lookup for test1",
file=sumFile)
#print sum2
length = len(convList)
print("No spf validation:",file = sum2File)
print("\t",(length-complete)/length*100,"% (",(length-complete),") had no base or l2 queries detected",
file=sum2File)
print ("Serial SPF Validation:",file=sum2File)
print("\t",((l2Data-l2nd)/length)*100,"% (",l2Data-l2nd,") had an l2 lookup before DATA",
file=sum2File)
print ("\t",(l2nd/length)*100,"% (",l2nd,") had an l2 lookup, but no DATA",
file=sum2File)
print("Parallel SPF Validation:",file = sum2File)
print("\tAfter an early base response,",file = sum2File)
print ("\t",(parallel/length)*100,"% (",parallel,") had an l2 query after the DATA",
file=sum2File)
print ("\t",(parallel2/length)*100,"% (",parallel2,") had an l3 query after the DATA",
file=sum2File)
print ("\t",(nol2/length)*100,"% (",nol2,") had no l2",
file=sum2File)
print("Lazy SPF Validation:",file = sum2File)
print ("\t",(lazy/length)*100,"% (",lazy,") had a base query or l2 after the DATA",
file=sum2File)
print("SPF validation with strict DMARC adherence:",file = sum2File)
print ("\t",(dataDM/length)*100,"% (",dataDM,") had a DMARC query and no DATA",
file=sum2File)
print("SPF validation with soft DMARC:",file = sum2File)
print ("\t",(soft/length)*100,"% (",soft,") had a DMARC query and DATA",
file=sum2File)
'''
<summary>Given a dictionary of Conv objects and a dns query filename, iterates through filename and checks each entry from test1 for a matching
conversation. If it does, check if this message is one we are tracking (base,l2,l3,dmarc) and append relative messages to the coresponding
Conv object</summary>
'''
def parseQuery(convList,filename):
f = open(filename)
l = f.readlines()
ln = 0
for x in l:
print("\rparsing line",ln,end='\r')
ln +=1
m = None
y = minimize(x)
if y is not None:
m = re.search(RE_ULTIMATE,y)
if m is not None:
if m[8] == "test1":
c = convList.get(m[9])
if c is not None:
#if this is the l2 message
if m[6] == "l2" and c.l2Time == "unknown":
c.l2Time = m[1]
#if this is the l3 message
if m[6] =="l3" and c.l3Time =="unknown":
c.l3Time = m[1]
#if this is the dmarc message
if m[2] is not None:
c.dmarcTime = m[1]
else:
#if no other message has yet been received
if c.validateTime == "unknown":
c.validateTime = m[1]
'''
<summary>Takes a dns query.log file line as input, and strips it to minimal format for parsing in parseQuery</summary>
<returns>Returns a string with the new format, or None if the line could not be parsed.</returns>
'''
def minimize (line):
line = line.rstrip()
m = QUERY_LOG_RE.search(line)
if m is None:
#print('Could not parse log line: %s' % line)
return None
log_ts = get_timestamp_from_log(m.group('timestamp'), m.group('microseconds'))
out = "{} {} {}".format(log_ts,m.group('qname').lower(),m.group('qtype'))
return out
def get_timestamp_from_log(ts_str, microseconds):
ts = time.mktime(datetime.datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S.%f").timetuple())
ts += float(microseconds)
return ts
QUERY_LOG_RE = re.compile(r'^(?P<timestamp>\d+-\d+-\d+T\d+:\d+:\d+(?P<microseconds>\.\d+)?)-\d+:\d+\s(.*\s)?' + \
r'client\s+(@0x[0-9a-f]+\s+)?(?P<client_ip>[a-fA-F0-9:\.]+)#(?P<client_port>\d+)\s(.*\s)?' + \
r'query:\s+(?P<qname>\S+)\s+IN\s+(?P<qtype>\S+)\s+(?P<flags>\S+)' + \
r'\s+\((?P<server_ip>[a-fA-F0-9:\.]+)\)')
#Grouping: 1 = recorded decimal time | 2 = _dmarc | 3 = _\w+ | 4 = _\w+ | 5 = the group that six is in | 6 = l#. | 7 = endcoded time stamp
#8 = test## | 9 = generated name | 10 = #00ms | 11 = | 12 = Record look-up type
RE_ULTIMATE = re.compile(r'(\d+\.\d+)\s(_dmarc\.)?(\w+[^\d+]\.|x\.|m5\.)?(\w+[^\d+]\.)?((l\d+|a|b)\.)?([^\.|\s]+)\.(test\d+)\.([^\.]+)\.spf-test\.(\d00ms\.)?internet-measurement\.cs\.byu\.edu([^\s]+)?\s(\w+)', re.IGNORECASE)
if __name__ == "__main__":
main()