-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgs4.py
31 lines (28 loc) · 825 Bytes
/
gs4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#! /usr/bin/python
from bs4 import BeautifulSoup
from datetime import datetime
import requests
import string
r = requests.get('http://malc0de.com/database/')
htdoc = r.content
soup = BeautifulSoup(htdoc)
table = soup.find("table",{"class": "prettytable"})
FORMAT = '%Y%m%d%H%M%S'
ofn = '/tmp/Malecode-Culled-Product-%s.csv' % (datetime.now().strftime(FORMAT))
g = open(ofn,"w")
for row in table.find_all('tr')[1:]:
rec = row.find_all('td')
date = rec[0].string
g.write(date + ",")
url = str(rec[1].string)
if url not in ('None'):
fn = string.split(string.split(url,'/')[-1],'?')[0]
g.write(fn)
g.write(",")
ip = rec[2].string
cc = rec[3].string
asn1 = rec[4].string
asn2 = str(rec[5].string)
md5s = rec[6].string
g.write(ip + "," + cc + "," + asn1 + "," + asn2 + "," + md5s + "\n")
g.close