Skip to content

Commit

Permalink
🎨 test and fix full workflow (print all to console)
Browse files Browse the repository at this point in the history
only use clinical metadata (also in imputation workflow)

in project folder:
- execute v1 of imputation workflow
  snakemake --configfile config\appl_ald_data\plasma\proteinGroups\config.yaml -c1
- execute comparision workflow
  snakemake -s workflow\Snakefile_ald_comparison.smk -c1
  • Loading branch information
Henry committed Mar 12, 2024
1 parent fd0b8fa commit b7e2400
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 17 deletions.
13 changes: 7 additions & 6 deletions project/01_1_train_KNN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,19 @@
"outputs": [],
"source": [
"import logging\n",
"\n",
"import pandas as pd\n",
"import sklearn\n",
"import sklearn.impute\n",
"\n",
"import vaep\n",
"import vaep.model\n",
"import vaep.models as models\n",
"from vaep.models import ae\n",
"from vaep.io import datasplits\n",
"import vaep.nb\n",
"from vaep import sampling\n",
"from vaep.io import datasplits\n",
"from vaep.models import ae\n",
"\n",
"import vaep.nb\n",
"logger = vaep.logging.setup_logger(logging.getLogger('vaep'))\n",
"logger.info(\"Experiment 03 - Analysis of latent spaces and performance comparisions\")\n",
"\n",
Expand Down Expand Up @@ -497,9 +500,7 @@
"cell_type": "code",
"execution_count": null,
"id": "ce0fb347",
"metadata": {
"lines_to_next_cell": 0
},
"metadata": {},
"outputs": [],
"source": [
"# save simulated missing values for both splits\n",
Expand Down
14 changes: 8 additions & 6 deletions project/01_1_train_KNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,20 @@

# %%
import logging

import pandas as pd
import sklearn
import sklearn.impute
from IPython.display import display

import vaep
import vaep.model
import vaep.models as models
from vaep.models import ae
from vaep.io import datasplits
import vaep.nb
from vaep import sampling
from vaep.io import datasplits
from vaep.models import ae

import vaep.nb
logger = vaep.logging.setup_logger(logging.getLogger('vaep'))
logger.info("Experiment 03 - Analysis of latent spaces and performance comparisions")

Expand Down Expand Up @@ -183,9 +187,6 @@
# %% [markdown]
# ## Comparisons
#
# > Note: The interpolated values have less predictions for comparisons than the ones based on models (CF, DAE, VAE)
# > The comparison is therefore not 100% fair as the interpolated samples will have more common ones (especailly the sparser the data)
# > Could be changed.

# %% [markdown]
# ### Validation data
Expand Down Expand Up @@ -236,6 +237,7 @@
# save simulated missing values for both splits
val_pred_fake_na.to_csv(args.out_preds / f"pred_val_{args.model_key}.csv")
test_pred_fake_na.to_csv(args.out_preds / f"pred_test_{args.model_key}.csv")

# %% [markdown]
# ## Config

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
config_split: config/appl_ald_data/plasma/proteinGroups/split.yaml
config_train: config/appl_ald_data/plasma/proteinGroups/train_{model}.yaml
folder_experiment: runs/appl_ald_data_2023_11/plasma/proteinGroups
fn_rawfile_metadata: data/ALD_study/processed/raw_meta.csv
fn_rawfile_metadata: data/ALD_study/processed/ald_metadata_cli.csv
file_format: pkl
models:
- RSN
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FN_INTENSITIES: data/ALD_study/processed/ald_plasma_proteinGroups.pkl
fn_rawfile_metadata: data/ALD_study/processed/raw_meta.csv
sample_completeness: 0.5
column_names:
- PG.ProteinAccessions
frac_mnar: 0.25
meta_cat_col: kleiner
6 changes: 3 additions & 3 deletions project/workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ rule train_models:
err="{folder_experiment}/01_1_train_{model}.e",
out="{folder_experiment}/01_1_train_{model}.o",
name="{model}",
log:
err="{folder_experiment}/01_1_train_{model}.log",
# log:
# err="{folder_experiment}/01_1_train_{model}.log",
conda:
"vaep"
shell:
Expand All @@ -181,7 +181,7 @@ rule train_models:
" -r folder_experiment {params.folder_experiment:q}"
" -p fn_rawfile_metadata {params.meta_data:q}"
" -r model_key {wildcards.model:q}"
" 2> {log.err}"
# " 2> {log.err}"
" && jupyter nbconvert --to html {output.nb:q}"


Expand Down

0 comments on commit b7e2400

Please sign in to comment.