Skip to content

Commit 080de64

Browse files
committed
Fixed bug in bin data samples, now writing column labels. Fixed bug in graph estimation module.
1 parent 30dddf7 commit 080de64

File tree

5 files changed

+42
-45
lines changed

5 files changed

+42
-45
lines changed

workflow/rules/data/iid/rules.smk

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
rule sample_bin_bn_data:
66
input:
7-
bn="{output_dir}/parameters/bin_bn/{bn}/adjmat=/{adjmat}.rds"
7+
bn="{output_dir}/parameters/bin_bn/{bn}/adjmat=/{adjmat}.rds",
8+
script="workflow/rules/data/iid/sample_data_with_range_header.R"
89
output:
910
data="{output_dir}/data" \
1011
"/adjmat=/{adjmat}"\

workflow/rules/data/iid/sample_data_with_range_header.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ bindata <- generatebinaryBN.data(n = n, binaryBN = bn, samplesize = samples)
3838
myrow <- rep(2, n)
3939
bindata_range_header <- data.frame(rbind(myrow, as.matrix(bindata)))
4040

41-
# TODO: Should take the colun names from bn
42-
colnames(bindata_range_header) <- seq(n)
41+
colnames(bindata_range_header) <- colnames(bn$adj) #seq(n)
4342
write.table(bindata_range_header,
4443
file = filename, row.names = FALSE, quote = FALSE,
4544
col.names = TRUE, sep = ","

workflow/rules/evaluation/graph_estimation/rules.smk

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,11 @@ features = {
143143
"csvs": {"ext":"csv", "argstring":"", "filename":"adjmat"}
144144
}
145145

146-
# Since we have a lot of different data setups and algs, we need to create a rule for
146+
# Since we have a lot of different data setups and algs, we need to create a rule for
147147
# each combination of them.
148148

149149
for bmark_setup in config["benchmark_setup"]:
150-
150+
151151
graph_estimation = bmark_setup["evaluation"]["graph_estimation"]
152152
graph_types = graph_estimation["convert_to"] is not None and graph_estimation["convert_to"] or []
153153
graph_types += ["original"]
@@ -158,14 +158,14 @@ for bmark_setup in config["benchmark_setup"]:
158158
for alg in active_algorithms(bmark_setup, eval_method="graph_estimation"):
159159
data_index = 0
160160
# We want one folder per data setup, so we create one rule for each of them.
161-
161+
162162
for sim_setup in bmark_setup["data"]:
163-
for seed in get_seed_range(sim_setup["seed_range"]):
164-
165-
adjmat_strings = gen_adjmat_string_from_conf(sim_setup["graph_id"], seed)
163+
for seed in get_seed_range(sim_setup["seed_range"]):
164+
165+
adjmat_strings = gen_adjmat_string_from_conf(sim_setup["graph_id"], seed)
166166
parameters_strings = gen_parameter_string_from_conf(sim_setup["parameters_id"], seed)
167167
data_strings = gen_data_string_from_conf(sim_setup["data_id"], seed, seed_in_path=False)
168-
168+
169169

170170
if adjmat_strings is None:
171171
adjmat_strings = [None]
@@ -188,10 +188,9 @@ for bmark_setup in config["benchmark_setup"]:
188188
for adjmat_string in adjmat_strings:
189189
for parameters_string in parameters_strings:
190190
for data_string in data_strings:
191-
#print(bmark_setup)
192-
rule:
193-
name:
194-
"results/output/"+bmark_setup_title+"/graph_estimation/dataset_"+str(data_index+1)+"/"+alg+"/graph_type="+graph_type+"/"+feature
191+
rule:
192+
name:
193+
"results/output/"+bmark_setup_title+"/graph_estimation/dataset_"+str(sim_setup["graph_id"]) + "_" + str(sim_setup["parameters_id"]) + "_" + str(sim_setup["data_id"]) + "_" + str(seed)+"/"+alg+"/graph_type="+graph_type+"/"+feature
195194
input:
196195
conf=configfilename,
197196
graphs=eval_module_conf_to_feature_files_data(filename=feature_dict["filename"],
@@ -206,28 +205,31 @@ for bmark_setup in config["benchmark_setup"]:
206205
data_string=data_string,
207206
alg=alg,
208207
bmark_setup=bmark_setup)
209-
208+
210209
output:
211-
touch("results/output/"+bmark_setup_title+"/graph_estimation/dataset_"+str(data_index+1)+"/graph_type="+graph_type+"/"+feature+"/"+alg+".done")
210+
touch("results/output/"+bmark_setup_title+"/graph_estimation/graph_id=" + str(sim_setup["graph_id"]) + "_parameters_id=" + str(sim_setup["parameters_id"]) + "_data_id=" + str(sim_setup["data_id"]) + "_seed=" + str(seed) +"/graph_type="+graph_type+"/"+feature+"/"+alg+".done")
212211

213212
params:
214213
graph_type=graph_type,
215214
data_index=str(data_index+1),
216215
feature=feature,
217216
ext=feature_dict["ext"],
218217
alg=alg,
219-
bmark_setup=bmark_setup_title
218+
bmark_setup=bmark_setup_title,
219+
output_dir="results/output/"+bmark_setup_title+"/graph_estimation/graph_id="+ str(sim_setup["graph_id"]) + "_parameters_id=" + str(sim_setup["parameters_id"]) + "_data_id=" + str(sim_setup["data_id"]) + "_seed=" + str(seed) +"/graph_type="+graph_type+"/"+feature+"/"+alg
220+
221+
run:
220222

221-
run:
222-
output_dir = "results/output/{params.bmark_setup}/graph_estimation/dataset_"+params["data_index"]+"/graph_type="+params["graph_type"]+"/"+params["feature"]+"/"+params["alg"]
223223
# clean old file while keeping the directory
224224
# check if the directory exists
225-
if Path(output_dir).exists():
225+
if Path(params["output_dir"]).exists():
226226
# remove all files in the directory
227-
[f.unlink() for f in Path(output_dir).glob("*.png") ]
228-
for i, f in enumerate(input.graphs):
229-
shell("mkdir -p " + output_dir)
230-
shell("cp "+f+" " + output_dir + "/"+params["alg"]+"_"+params["graph_type"]+"_" +str(i+1) +"."+params["ext"])
227+
[f.unlink() for f in Path(params["output_dir"]).glob("*.png") ]
228+
229+
# This is to iterate over all the parameter settings.
230+
for j, f in enumerate(input.graphs):
231+
shell("mkdir -p " + params["output_dir"])
232+
shell("cp "+f+" " + params["output_dir"] + "/"+params["alg"]+"_"+params["graph_type"]+ "_" + str(j+1) +"."+params["ext"])
231233

232234
data_index += 1
233235

workflow/rules/helper_functions.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def active_algorithms(bmark_setup, eval_method="benchmarks"):
8989

9090
return list(set(algs))
9191

92+
import pprint as pp
9293

9394
def get_active_rules(wildcards):
9495
"""
@@ -106,33 +107,21 @@ def get_active_rules(wildcards):
106107
graph_types = evaluation["graph_estimation"]["convert_to"] if evaluation["graph_estimation"]["convert_to"] != None else ["original"]
107108
graph_types += ["original"]
108109

109-
# go through all active features and create a done file for each.
110-
110+
# Go through all active features and create a .done file for each.
111111
for feature, isactive in evaluation["graph_estimation"].items():
112112

113113
# These are not features, so skip
114114
if feature in ["ids", "convert_to"]:
115115
continue
116116

117117
if isactive == True:
118-
# Cound the data setups and create a done file for each.
119-
n_comb = 0
120118
for sim_setup in bmark_setup["data"]:
121-
seed=get_seed_range(sim_setup["seed_range"])
122-
adjmat=gen_adjmat_string_from_conf(sim_setup["graph_id"], seed),
123-
parameters=gen_parameter_string_from_conf(sim_setup["parameters_id"], seed),
124-
data=gen_data_string_from_conf(sim_setup["data_id"], seed, seed_in_path=False)
125-
126-
# count total number of combinations of the three above
127-
n_data = len(data) if isinstance(data, list) and len(data) != 0 else 1
128-
n_parameters = len(parameters) if isinstance(parameters, list) and parameters != [] else 1
129-
n_adjmat = len(adjmat) if isinstance(adjmat, list) and adjmat != [] else 1
130-
n_comb += n_data*n_parameters*n_adjmat if n_data*n_parameters*n_adjmat != 0 else 1
131-
132-
for data_index in range(n_comb):
133-
for alg in active_algorithms(bmark_setup, eval_method="graph_estimation"):
134-
for graph_type in graph_types:
135-
rules.append("results/output/"+bmark_setup_title+"/graph_estimation/dataset_"+str(data_index+1)+"/graph_type="+graph_type+"/"+feature+"/"+alg+".done")
119+
seed_range=get_seed_range(sim_setup["seed_range"])
120+
for seed in seed_range:
121+
dataset = str("graph_id=" + str(sim_setup["graph_id"]) + "_parameters_id=" + str(sim_setup["parameters_id"]) + "_data_id=" + str(sim_setup["data_id"]) + "_seed=" + str(seed))
122+
for alg in active_algorithms(bmark_setup, eval_method="graph_estimation"):
123+
for graph_type in graph_types:
124+
rules.append("results/output/"+bmark_setup_title+"/graph_estimation/"+dataset+"/graph_type="+graph_type+"/"+feature+"/"+alg+".done")
136125

137126
# mcmc_traj_plots
138127
if "mcmc_traj_plots" in evaluation and len(evaluation["mcmc_traj_plots"]) > 0:

workflow/rules/parameters/bin_bn/sample_bayesian_network_for_dag.R

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,19 @@ filename_dag <- argv$filename_dag
5353

5454
adjmat <- read.csv(filename_dag, check.names = FALSE)
5555
n <- dim(adjmat)[2]
56+
labels <- colnames(adjmat)
57+
rownames(adjmat) <- colnames(adjmat)
58+
59+
# First we have to use integers as labels. Then convert back to the original labels below.
5660
rownames(adjmat) <- seq(n)
5761
colnames(adjmat) <- seq(n)
5862

59-
DAG <- adjacency2dag(adjmat)
63+
DAG <- adjacency2dag(adjmat)#, nodes = colnames(adjmat))
6064

61-
## TODO: This should pass the column names as well.
6265
set.seed(seed_number)
6366
binBN <- generateBinaryBN(DAG, c(argv$min, argv$max))
67+
# Set the node labels
68+
nodes(binBN$DAG) <- labels
69+
colnames(binBN$adj) <- labels
6470

6571
saveRDS(binBN, file = filename)

0 commit comments

Comments
 (0)