Skip to content

Commit 4e9a27f

Browse files
author
Miclain Keffeler
committed
Added Inspection Violation count as a characteristic in the ML model. Also created scripts for getting pertinent information from specific databases.
1 parent 3cb3bc1 commit 4e9a27f

16 files changed

+433240
-31
lines changed

Diff for: clean_and_key.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,47 @@
1+
# -*- coding: utf-8 -*-
2+
# @Author: Miclain Keffeler
3+
# @Date: 2017-02-25 10:15:52
4+
# @Last Modified by: Miclain Keffeler
5+
# @Last Modified time: 2017-02-25 15:03:34
16
import csv
27
import os
38
import sys
49

510
linecount=0
6-
with open('Data/Health_InspViolations.csv', 'r') as csvfile:
11+
input_file = 'raw_data/Health_InspViolations.csv'
12+
output_file = 'clean_data/Health_InspViolations.csv'
13+
14+
#Open the input and output files
15+
with open(input_file, 'r') as csvfile:
716
rdr = csv.reader(csvfile)
8-
with open("CleanData/Health_InspViolations.csv","w") as csvfile1:
17+
with open(output_file,"w") as csvfile1:
918
wtr = csv.writer(csvfile1)
19+
#Add the appropriate column headers to the file
1020
entry='inspection_id,weight,critical_yn'
1121
new_header = entry[0:32]
1222
wtr.writerow([new_header])
23+
1324
for line in rdr:
25+
#If this is not the first line in the file
1426
if(linecount!=0):
1527
linecount=linecount+1
28+
#Strip the double quotes and output
1629
clean_line = [entry.strip('"') for entry in line]
1730
wtr.writerow(clean_line)
1831
for line in rdr:
32+
#Add inspection id, weight to new row
1933
new_row = line[1:2]+line[4:5]
20-
#wtr.writerow(new_row)
34+
35+
#If the critical y/n value is No, add a 0 to new_row and write it
2136
if(line[5:6]==['No']):
2237
liste=[]
2338
liste=[0]
2439
new_row = line[1:2]+line[4:5]+liste[0:1]
2540
wtr.writerow(new_row)
41+
#Else, add a 1 to new_row and write it
2642
else:
2743
lister=[]
2844
lister=[1]
2945
new_row = line[1:2]+line[4:5]+lister[0:1]
30-
wtr.writerow(new_row)
31-
32-
33-
#new_row = line[0:1] + line[4:5] + line[5:6]
34-
46+
wtr.writerow(new_row)
3547

File renamed without changes.

Diff for: clean_data/grouped_louisville_inspections.json

-1
This file was deleted.

Diff for: clean_data/grouped_louisville_inspections_yelp_violations.json

+1
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)