Skip to content

Commit

Permalink
Merge pull request MLWave#9 from armaseg/master
Browse files Browse the repository at this point in the history
This closes issue MLWave#3 : Add weightage parameters for more customized blending
  • Loading branch information
MLWave committed Mar 12, 2016
2 parents 23f5061 + 9fad590 commit bc3b472
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 2 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ For more information: http://mlwave.com/kaggle-ensembling-guide/
parsing: ./samples/method3.csv
wrote to ./samples/kaggle_vote.csv

$ python kaggle_vote.py "./samples/_*.csv" "./samples/kaggle_vote.csv" "weighted"
parsing: ./samples/_w3_method1.csv
Using weight: 3
parsing: ./samples/_w2_method2.csv
Using weight: 2
parsing: ./samples/_w2_method3.csv
Using weight: 2
wrote to ./samples/kaggle_vote.csv

$ python kaggle_rankavg.py "./samples/method*.csv" "./samples/kaggle_rankavg.csv"
parsing: ./samples/method1.csv
parsing: ./samples/method2.csv
Expand Down
19 changes: 17 additions & 2 deletions kaggle_vote.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
from collections import defaultdict, Counter
from glob import glob
import sys
import re

glob_files = sys.argv[1]
loc_outfile = sys.argv[2]
weights_strategy = "uniform"
if len(sys.argv) == 4:
weights_strategy = sys.argv[3]

def kaggle_bag(glob_files, loc_outfile, method="average", weights="uniform"):
pattern = re.compile(r"(.)*_[w|W](\d*)_[.]*")
if method == "average":
scores = defaultdict(list)
with open(loc_outfile,"wb") as outfile:
#weight_list may be usefull using a different method
weight_list = [1]*len(glob(glob_files))
for i, glob_file in enumerate( glob(glob_files) ):
print "parsing:", glob_file
if weights == "weighted":
weight = pattern.match(glob_file)
if weight and weight.group(2):
print "Using weight: ",int(weight.group(2))
weight_list[i] = weight_list[i]*int(weight.group(2))
else:
print "Using weight: 1"
# sort glob_file by first column, ignoring the first line
lines = open(glob_file).readlines()
lines = [lines[0]] + sorted(lines[1:])
Expand All @@ -19,9 +33,10 @@ def kaggle_bag(glob_files, loc_outfile, method="average", weights="uniform"):
outfile.write(line)
if e > 0:
row = line.strip().split(",")
scores[(e,row[0])].append(row[1])
for l in range(1,weight_list[i]+1):
scores[(e,row[0])].append(row[1])
for j,k in sorted(scores):
outfile.write("%s,%s\n"%(k,Counter(scores[(j,k)]).most_common(1)[0][0]))
print("wrote to %s"%loc_outfile)

kaggle_bag(glob_files, loc_outfile)
kaggle_bag(glob_files, loc_outfile, weights=weights_strategy)

0 comments on commit bc3b472

Please sign in to comment.