-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbench_detect.py
53 lines (37 loc) · 1.61 KB
/
bench_detect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# coding=utf-8
# Subterm statistics. Given a dataset, sensitive attribute, class
# attribute, and classifier parameters, trains the classifier to
# predict class attribute. Then for each sub-expression in the
# resulting classifier, provides normalized mutual information and
# influence metrics.
import sys
reload(sys)
sys.setdefaultencoding('utf8')
from detect import *
from util import *
from ml_util import *
from plot_util import *
import matplotlib.pyplot as plt
e = experiment_from_args()
project_sens = nth(e.sensitive_index)
data_full = e.data_full
print "\t".join(["dataset_size", "model_size", "model_height", "sub_expressions","runtime"])
def content1((exp,data,dataX,dataY)):
distData = lift(data.itertuples_noid())
distX = lift(map(lambda s: State(s), dataX.itertuples_noid()))
exp.flow(distX, distX, 1.0, project_sens)
return (exp,data,dataX,dataY,distData,distX)
def content2((exp,data,dataX,dataY,distData,distX)):
all_decomps = violations(distX, project_sens, exp, 0.0, 0.0, e.association, e.order)
list_decomps = list(all_decomps)
return list_decomps
for ds in range(100,1010,100):
data = data_full[0:ds]
dataX = data.ix[:, data.columns != e.class_field]
dataY = data[e.class_field].to_frame()
exp = e.expression.copy_()
(list_decomps, runtime) = timethis(lambda: content2(content1((exp,data,dataX,dataY))), count=5)
print "\t".join(map(str,
[len(data),exp.size(),exp.height(),
len(list_decomps),runtime
]))