-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpopulate_database.py
123 lines (100 loc) · 3.33 KB
/
populate_database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from urllib.request import urlopen, Request
import lxml.html
import cssselect
import mysql.connector
from mysql.connector import Error
pw = ""
db = "LOIs"
host = ""
user = ""
def create_server_connection(host_name, user_name, user_password):
connection = None
try:
connection = mysql.connector.connect(
host=host_name,
user=user_name,
passwd=user_password
)
print("MySQL Database connection successful")
except Error as err:
print(f"Error: '{err}'")
return connection
connection = create_server_connection(host, user, pw)
def create_database(connection, query):
cursor = connection.cursor()
try:
cursor.execute(query)
print("Database created successfully")
except Error as err:
print(f"Error: '{err}'")
create_database_query = "CREATE DATABASE LOIs"
create_database(connection, create_database_query)
def create_db_connection(host_name, user_name, user_password, db_name):
connection = None
try:
connection = mysql.connector.connect(
host=host_name,
user=user_name,
passwd=user_password,
database=db_name
)
print("MySQL Database connection successful")
except Error as err:
print(f"Error: '{err}'")
return connection
print (create_db_connection(host, user, pw, db))
def execute_query(connection, query):
cursor = connection.cursor()
try:
cursor.execute(query)
connection.commit()
print("Query successful")
except Error as err:
print(f"Error: '{err}'")
create_table = """CREATE TABLE LOI_table(
id INT,
location CHAR(200),
address CHAR(200),
date CHAR(200),
time CHAR(200),
advice TEXT,
added CHAR(200),
updated INT
)"""
connection = create_db_connection(host, user, pw, db)
execute_query(connection, create_table)
link = "https://www.health.govt.nz/our-work/diseases-and-conditions/covid-19-novel-coronavirus/covid-19-health-advice-public/contact-tracing-covid-19/covid-19-contact-tracing-locations-interest"
url = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
def get_tuples():
response = urlopen(url).read().decode("utf-8")
startindex = response.find("<tbody>")
endindex = response.find("</tbody>", startindex)
locations = response[startindex:endindex]
markup = lxml.html.fromstring(locations)
tbl = []
rows = markup.cssselect("tr")
for row in rows:
tbl.append(list())
for td in row.cssselect("td"):
tbl[-1].append(td.text_content().strip())
mainlist = tbl
LOIs = []
i = 1
n = 0
while i < len(mainlist):
LOIs.append((1, str(mainlist[n][0]), str(mainlist[n][1]), str(mainlist[n][2]), str(mainlist[n][3]), str(mainlist[n][4]), str(mainlist[n][5]), int(mainlist[n][5][0:2])))
#first index is id, optional to set
#int(mainlist[n][5][0:2]) gives an initial ordering number if needed (though only works because all the initial locations were added in august!)... the scraper adds time.time() to new items which puts them on top..
i = i + 1
n = n + 1
return (LOIs)
LOIs = get_tuples()
def pop_item(listitem):
query = f"""
INSERT INTO LOITable VALUES
{listitem}
"""
execute_query(connection, query)
for i in LOIs:
if "Auckland" in i[2]:
pop_item(i)