Skip to content

Commit

Permalink
🐛 don't train with too small batches
Browse files Browse the repository at this point in the history
- rather "bigger" batches with more training steps
- update Fig. 2 plots generation to 25MNAR
  • Loading branch information
Henry Webel committed Nov 17, 2023
1 parent 0b0d747 commit 89046b4
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 29 deletions.
26 changes: 14 additions & 12 deletions project/03_2_best_models_comparison_fig2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@
"outputs": [],
"source": [
"# parameters\n",
"FOLDER = Path('runs/dev_dataset_large/')\n",
"FOLDER = Path('runs/mnar_mcar/')\n",
"SIZE = 'l'\n",
"files_in = {\n",
" 'protein groups': FOLDER / 'proteinGroups/figures/performance_test.csv',\n",
" 'peptides': FOLDER / 'peptides/figures/performance_test.csv',\n",
" 'precursors': FOLDER / 'evidence/figures/performance_test.csv'\n",
" 'protein groups': FOLDER / 'pg_l_25MNAR/figures/2_1_performance_test.csv',\n",
" 'peptides': FOLDER / 'pep_l_25MNAR/figures/2_1_performance_test.csv',\n",
" 'precursors': FOLDER / 'evi_l_25MNAR/figures/2_1_performance_test.csv'\n",
"}"
]
},
Expand All @@ -49,11 +50,12 @@
"metadata": {},
"outputs": [],
"source": [
"FOLDER = Path('runs/dev_dataset_small/')\n",
"FOLDER = Path('runs/mnar_mcar/')\n",
"SIZE = 'm'\n",
"files_in = {\n",
" 'protein groups': FOLDER / 'proteinGroups_N50/figures/performance_test.csv',\n",
" 'peptides': FOLDER / 'peptides_N50/figures/performance_test.csv',\n",
" 'precursors': FOLDER / 'evidence_N50/figures/performance_test.csv'\n",
" 'protein groups': FOLDER / 'pg_m_25MNAR/figures/2_1_performance_test.csv',\n",
" 'peptides': FOLDER / 'pep_m_25MNAR/figures/2_1_performance_test.csv',\n",
" 'precursors': FOLDER / 'evi_m_25MNAR/figures/2_1_performance_test.csv'\n",
"}"
]
},
Expand Down Expand Up @@ -134,13 +136,13 @@
},
"outputs": [],
"source": [
"fname = FOLDER / 'best_models_1_test_mpl.pdf'\n",
"fname = FOLDER / f'best_models_{SIZE}_test_mpl.pdf'\n",
"metrics = df['metric_value'].unstack('model')\n",
"ORDER_MODELS = metrics.mean().sort_values().index.to_list()\n",
"metrics = metrics.loc[ORDER_DATA, ORDER_MODELS]\n",
"\n",
"plt.rcParams['figure.figsize'] = [4.0, 2.0]\n",
"matplotlib.rcParams.update({'font.size': 5})\n",
"matplotlib.rcParams.update({'font.size': 6})\n",
"\n",
"ax = (metrics\n",
" .plot\n",
Expand All @@ -149,7 +151,7 @@
" ylabel=f\"{METRIC} (log2 intensities)\",\n",
" color=COLORS_TO_USE_MAPPTING,\n",
" width=.85,\n",
" fontsize=8\n",
" fontsize=7\n",
" ))\n",
"\n",
"ax = vaep.plotting.add_height_to_barplot(ax, size=5)\n",
Expand Down Expand Up @@ -230,7 +232,7 @@
"metadata": {},
"outputs": [],
"source": [
"fname = FOLDER / 'performance_summary.xlsx'\n",
"fname = FOLDER / f'performance_summary_{SIZE}.xlsx'\n",
"perf.to_excel(fname)\n",
"fname.as_posix()"
]
Expand Down
24 changes: 13 additions & 11 deletions project/03_2_best_models_comparison_fig2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.15.0
# jupytext_version: 1.15.2
# kernelspec:
# display_name: Python 3
# language: python
Expand All @@ -33,19 +33,21 @@

# %%
# parameters
FOLDER = Path('runs/dev_dataset_large/')
FOLDER = Path('runs/mnar_mcar/')
SIZE = 'l'
files_in = {
'protein groups': FOLDER / 'proteinGroups/figures/performance_test.csv',
'peptides': FOLDER / 'peptides/figures/performance_test.csv',
'precursors': FOLDER / 'evidence/figures/performance_test.csv'
'protein groups': FOLDER / 'pg_l_25MNAR/figures/2_1_performance_test.csv',
'peptides': FOLDER / 'pep_l_25MNAR/figures/2_1_performance_test.csv',
'precursors': FOLDER / 'evi_l_25MNAR/figures/2_1_performance_test.csv'
}

# %%
FOLDER = Path('runs/dev_dataset_small/')
FOLDER = Path('runs/mnar_mcar/')
SIZE = 'm'
files_in = {
'protein groups': FOLDER / 'proteinGroups_N50/figures/performance_test.csv',
'peptides': FOLDER / 'peptides_N50/figures/performance_test.csv',
'precursors': FOLDER / 'evidence_N50/figures/performance_test.csv'
'protein groups': FOLDER / 'pg_m_25MNAR/figures/2_1_performance_test.csv',
'peptides': FOLDER / 'pep_m_25MNAR/figures/2_1_performance_test.csv',
'precursors': FOLDER / 'evi_m_25MNAR/figures/2_1_performance_test.csv'
}

# %%
Expand Down Expand Up @@ -84,7 +86,7 @@
df

# %%
fname = FOLDER / 'best_models_1_test_mpl.pdf'
fname = FOLDER / f'best_models_{SIZE}_test_mpl.pdf'
metrics = df['metric_value'].unstack('model')
ORDER_MODELS = metrics.mean().sort_values().index.to_list()
metrics = metrics.loc[ORDER_DATA, ORDER_MODELS]
Expand Down Expand Up @@ -148,6 +150,6 @@
perf

# %%
fname = FOLDER / 'performance_summary.xlsx'
fname = FOLDER / f'performance_summary_{SIZE}.xlsx'
perf.to_excel(fname)
fname.as_posix()
5 changes: 3 additions & 2 deletions project/config/single_dev_dataset/mnar_mcar/evi_m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ models:
model: DAE
file_format: csv
latent_dim: 25
batch_size: 10
batch_size: 25
patience: 50
epochs_max: 200
hidden_layers: "256"
sample_idx_position: 0
Expand All @@ -40,7 +41,7 @@ models:
model: VAE
file_format: csv
latent_dim: 10
batch_size: 10
batch_size: 25
epochs_max: 200
hidden_layers: "256"
sample_idx_position: 0
Expand Down
5 changes: 3 additions & 2 deletions project/config/single_dev_dataset/mnar_mcar/pep_m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ models:
model: DAE
file_format: csv
latent_dim: 75
batch_size: 10
batch_size: 25
patience: 50
epochs_max: 200
hidden_layers: "256_128"
sample_idx_position: 0
Expand All @@ -40,7 +41,7 @@ models:
model: VAE
file_format: csv
latent_dim: 50
batch_size: 10
batch_size: 25
epochs_max: 200
hidden_layers: "256"
sample_idx_position: 0
Expand Down
5 changes: 3 additions & 2 deletions project/config/single_dev_dataset/mnar_mcar/pg_m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ models:
model: DAE
file_format: csv
latent_dim: 10
batch_size: 10
batch_size: 25
patience: 50
epochs_max: 200
hidden_layers: "512"
sample_idx_position: 0
Expand All @@ -40,7 +41,7 @@ models:
model: VAE
file_format: csv
latent_dim: 25
batch_size: 10
batch_size: 25
epochs_max: 200
hidden_layers: "512_256"
sample_idx_position: 0
Expand Down

0 comments on commit 89046b4

Please sign in to comment.