Skip to content

Commit

Permalink
✨ Integrate data splitting config into main config
Browse files Browse the repository at this point in the history
- allow to set frac_mnar from commandline using:
   --config frac_mnar=.5
- dump created data config using separate rule (into experiment folder)
  • Loading branch information
Henry committed Oct 16, 2023
1 parent 02b2d9f commit 288d78e
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
8 changes: 7 additions & 1 deletion project/config/knn_comparison/ald_pgs_all/config.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
config_split: config/knn_comparison/ald_pgs_all/split.yaml
config_train: runs/knn_comparison/ald_pgs_all/configs_train/train_{model}.yaml
folder_experiment: runs/knn_comparison/ald_pgs_all
fn_rawfile_metadata: data/ALD_study/processed/raw_meta.csv
file_format: pkl
cuda: False
split_data:
FN_INTENSITIES: data/ALD_study/processed/ald_plasma_proteinGroups.pkl
sample_completeness: 0.5
min_RT_time: 20
column_names:
- PG.ProteinAccessions
# frac_mnar: 0.0
models:
- Median:
model: Median
Expand Down
6 changes: 0 additions & 6 deletions project/config/knn_comparison/ald_pgs_all/split.yaml

This file was deleted.

24 changes: 21 additions & 3 deletions project/workflow/Snakefile_v2
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ rule all:

nb = "01_2_performance_plots.ipynb"

print(config["models"])
if "frac_mnar" in config:
config["split_data"]["frac_mnar"] = config["frac_mnar"]

# print(config['split_data'])
# MODELS = config["models"].copy()

MODELS = list()
Expand Down Expand Up @@ -164,7 +166,9 @@ rule transform_data_to_wide_format:
# train models in python
rule train_models:
input:
nb=lambda wildcards: "01_1_train_{}.ipynb".format(model_configs[wildcards.model]["model"]),
nb=lambda wildcards: "01_1_train_{}.ipynb".format(
model_configs[wildcards.model]["model"]
),
train_split="{folder_experiment}/data/train_X.csv",
configfile=config["config_train"],
output:
Expand Down Expand Up @@ -224,4 +228,18 @@ rule create_splits:
" -f {input.configfile}"
" -r folder_experiment {params.folder_experiment}"
" -p fn_rawfile_metadata {params.meta_data}"
" && jupyter nbconvert --to html {output.nb}"
" && jupyter nbconvert --to html {output.nb}"


##########################################################################################
# create config file dumps for each model


rule dump_split_config:
output:
configfile=f"{folder_experiment}/{nb_stem}.yaml",
run:
import yaml

with open(output.configfile, "w") as f:
yaml.dump(config["split_data"], f)

0 comments on commit 288d78e

Please sign in to comment.