Skip to content

Commit

Permalink
Sort dataframes by prediction_id beforehand
Browse files Browse the repository at this point in the history
Ref: MLWave#5

Also suffled samples file.
  • Loading branch information
lenguyenthedat committed Nov 18, 2015
1 parent 30176e1 commit 37bcdd4
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 17 deletions.
18 changes: 10 additions & 8 deletions correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
second_file = sys.argv[2]

def corr(first_file, second_file):
first_df = pd.read_csv(first_file)
second_df = pd.read_csv(second_file)
goal = first_df.columns[1]
first_df = pd.read_csv(first_file,index_col=0)
second_df = pd.read_csv(second_file,index_col=0)
# assuming first column is `prediction_id` and second column is `prediction`
prediction = first_df.columns[0]
# correlation
print "Finding correlation between: %s and %s" % (first_file,second_file)
print "Column to be measured: %s" % goal
print "Pearson's correlation score: %0.5f" % first_df[goal].corr(second_df[goal],method='pearson')
print "Kendall's correlation score: %0.5f" % first_df[goal].corr(second_df[goal],method='kendall')
print "Spearman's correlation score: %0.5f" % first_df[goal].corr(second_df[goal],method='spearman')
print "Column to be measured: %s" % prediction
print "Pearson's correlation score: %0.5f" % first_df[prediction].corr(second_df[prediction],method='pearson')
print "Kendall's correlation score: %0.5f" % first_df[prediction].corr(second_df[prediction],method='kendall')
print "Spearman's correlation score: %0.5f" % first_df[prediction].corr(second_df[prediction],method='spearman')

corr(first_file, second_file)
corr(first_file, second_file)
5 changes: 4 additions & 1 deletion kaggle_avg.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ def kaggle_bag(glob_files, loc_outfile, method="average", weights="uniform"):
with open(loc_outfile,"wb") as outfile:
for i, glob_file in enumerate( glob(glob_files) ):
print "parsing:", glob_file
for e, line in enumerate( open(glob_file) ):
# sort glob_file by first column, ignoring the first line
lines = open(glob_file).readlines()
lines = [lines[0]] + sorted(lines[1:])
for e, line in enumerate( lines ):
if i == 0 and e == 0:
outfile.write(line)
if e > 0:
Expand Down
5 changes: 4 additions & 1 deletion kaggle_rankavg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ def kaggle_bag(glob_files, loc_outfile):
for i, glob_file in enumerate( glob(glob_files) ):
file_ranks = []
print "parsing:", glob_file
for e, line in enumerate( open(glob_file) ):
# sort glob_file by first column, ignoring the first line
lines = open(glob_file).readlines()
lines = [lines[0]] + sorted(lines[1:])
for e, line in enumerate( lines ):
if e == 0 and i == 0:
outfile.write( line )
elif e > 0:
Expand Down
5 changes: 4 additions & 1 deletion kaggle_vote.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ def kaggle_bag(glob_files, loc_outfile, method="average", weights="uniform"):
with open(loc_outfile,"wb") as outfile:
for i, glob_file in enumerate( glob(glob_files) ):
print "parsing:", glob_file
for e, line in enumerate( open(glob_file) ):
# sort glob_file by first column, ignoring the first line
lines = open(glob_file).readlines()
lines = [lines[0]] + sorted(lines[1:])
for e, line in enumerate( lines ):
if i == 0 and e == 0:
outfile.write(line)
if e > 0:
Expand Down
4 changes: 2 additions & 2 deletions samples/method1.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ImageId,Label
1,1
5,3
2,0
3,9
4,9
5,3
1,1
4 changes: 2 additions & 2 deletions samples/method2.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ImageId,Label
1,2
2,0
3,6
4,2
3,6
5,3
2,0
4 changes: 2 additions & 2 deletions samples/method3.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ImageId,Label
1,2
2,0
3,9
4,2
2,0
5,3
4,2

0 comments on commit 37bcdd4

Please sign in to comment.