Skip to content

Commit

Permalink
🎨🔧 improve swarmplots, add methods in ALD comp.
Browse files Browse the repository at this point in the history
  • Loading branch information
Henry committed Nov 26, 2023
1 parent 29a549a commit c9e00e4
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 148 deletions.
76 changes: 9 additions & 67 deletions project/10_4_ald_compare_single_pg.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"source": [
"from pathlib import Path\n",
"\n",
"import logging\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
Expand All @@ -32,10 +33,10 @@
"import vaep.imputation\n",
"\n",
"logger = vaep.logging.setup_nb_logger()\n",
"\n",
"logging.getLogger('fontTools').setLevel(logging.WARNING)\n",
"\n",
"plt.rcParams['figure.figsize'] = [4, 2.5] # [16.0, 7.0] , [4, 3]\n",
"vaep.plotting.make_large_descriptors(5)"
"vaep.plotting.make_large_descriptors(7)"
]
},
{
Expand Down Expand Up @@ -580,7 +581,9 @@
"cell_type": "code",
"execution_count": null,
"id": "f813f693",
"metadata": {},
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"min_y_int, max_y_int = vaep.plotting.data.get_min_max_iterable(\n",
Expand All @@ -592,66 +595,6 @@
"min_max, target_name"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d03628eb",
"metadata": {},
"outputs": [],
"source": [
"for idx in feat_sel:\n",
" fig, ax = plt.subplots()\n",
"\n",
" feat_observed = data[idx].dropna()\n",
"\n",
" label_template = '{method} (N={n:,d}, q={q:.3f})'\n",
" # observed data\n",
" vaep.plotting.data.plot_histogram_intensities(\n",
" feat_observed,\n",
" ax=ax,\n",
" min_max=min_max,\n",
" label=label_template.format(method='measured',\n",
" n=len(feat_observed),\n",
" q=float(qvalues.loc[idx, ('None', 'qvalue')])),\n",
" color='grey',\n",
" alpha=0.6)\n",
"\n",
" # all models\n",
" for i, method in enumerate(model_keys):\n",
" try:\n",
" pred = pred_real_na.loc[pd.IndexSlice[:, idx], method].dropna()\n",
" if len(pred) == 0:\n",
" # in case no values was imputed -> qvalue is as based on measured\n",
" label = label_template.format(method=method,\n",
" n=len(pred),\n",
" q=float(qvalues.loc[idx, ('None', 'qvalue')]\n",
" ))\n",
" else:\n",
" label = label_template.format(method=method,\n",
" n=len(pred),\n",
" q=float(qvalues.loc[idx, (method, 'qvalue')]\n",
" ))\n",
" ax, bins = vaep.plotting.data.plot_histogram_intensities(\n",
" pred,\n",
" ax=ax,\n",
" min_max=min_max,\n",
" label=label,\n",
" color=f'C{i}',\n",
" alpha=0.6)\n",
" except KeyError:\n",
" print(f\"No missing values for {idx}: {method}\")\n",
" continue\n",
" first_pg = idx.split(\";\")[0]\n",
" ax.set_title(\n",
" f'Imputation for protein group {first_pg} with target {target_name} (N= {len(data):,d} samples)')\n",
" ax.set_ylabel('count measurments')\n",
" _ = ax.legend()\n",
" files_out[fname.name] = fname.as_posix()\n",
" vaep.savefig(\n",
" fig, folder / f'{first_pg}_hist.pdf')\n",
" plt.close(fig)"
]
},
{
"cell_type": "markdown",
"id": "b9db8a0e",
Expand Down Expand Up @@ -682,8 +625,8 @@
" tmp_dot.remove()\n",
"\n",
" feat_observed = data[idx].dropna()\n",
" label_template = '{method} (N={n:,d}, q={q:.3f})'\n",
" key = label_template.format(method='measured',\n",
" label_template = '{method}\\n(N={n:,d}, q={q:.3f})'\n",
" key = label_template.format(method='observed',\n",
" n=len(feat_observed),\n",
" q=float(qvalues.loc[idx, ('None', 'qvalue')])\n",
" )\n",
Expand Down Expand Up @@ -726,7 +669,7 @@
" order=groups_order,\n",
" dodge=True,\n",
" hue=args.target,\n",
" size=1,\n",
" size=2,\n",
" ax=ax)\n",
" first_pg = idx.split(\";\")[0]\n",
" ax.set_title(\n",
Expand All @@ -747,7 +690,6 @@
" _ = ax.collections[0].set_paths([new_mk])\n",
" _ = ax.collections[1].set_paths([new_mk])\n",
"\n",
" # import matplotlib.lines as mlines\n",
" label_target_0, label_target_1 = ax.collections[-2].get_label(), ax.collections[-1].get_label()\n",
" _ = ax.collections[-2].set_label(f'imputed, {label_target_0}')\n",
" _ = ax.collections[-1].set_label(f'imputed, {label_target_1}')\n",
Expand Down
64 changes: 6 additions & 58 deletions project/10_4_ald_compare_single_pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# %%
from pathlib import Path

import logging
import matplotlib.pyplot as plt
import pandas as pd

Expand All @@ -33,10 +34,10 @@
import vaep.imputation

logger = vaep.logging.setup_nb_logger()

logging.getLogger('fontTools').setLevel(logging.WARNING)

plt.rcParams['figure.figsize'] = [4, 2.5] # [16.0, 7.0] , [4, 3]
vaep.plotting.make_large_descriptors(5)
vaep.plotting.make_large_descriptors(7)

# %% [markdown]
# ## Parameters
Expand Down Expand Up @@ -313,58 +314,6 @@

min_max, target_name

# %%
for idx in feat_sel:
fig, ax = plt.subplots()

feat_observed = data[idx].dropna()

label_template = '{method} (N={n:,d}, q={q:.3f})'
# observed data
vaep.plotting.data.plot_histogram_intensities(
feat_observed,
ax=ax,
min_max=min_max,
label=label_template.format(method='measured',
n=len(feat_observed),
q=float(qvalues.loc[idx, ('None', 'qvalue')])),
color='grey',
alpha=0.6)

# all models
for i, method in enumerate(model_keys):
try:
pred = pred_real_na.loc[pd.IndexSlice[:, idx], method].dropna()
if len(pred) == 0:
# in case no values was imputed -> qvalue is as based on measured
label = label_template.format(method=method,
n=len(pred),
q=float(qvalues.loc[idx, ('None', 'qvalue')]
))
else:
label = label_template.format(method=method,
n=len(pred),
q=float(qvalues.loc[idx, (method, 'qvalue')]
))
ax, bins = vaep.plotting.data.plot_histogram_intensities(
pred,
ax=ax,
min_max=min_max,
label=label,
color=f'C{i}',
alpha=0.6)
except KeyError:
print(f"No missing values for {idx}: {method}")
continue
first_pg = idx.split(";")[0]
ax.set_title(
f'Imputation for protein group {first_pg} with target {target_name} (N= {len(data):,d} samples)')
ax.set_ylabel('count measurments')
_ = ax.legend()
files_out[fname.name] = fname.as_posix()
vaep.savefig(
fig, folder / f'{first_pg}_hist.pdf')
plt.close(fig)

# %% [markdown]
# ## Compare with target annotation
Expand All @@ -383,8 +332,8 @@
tmp_dot.remove()

feat_observed = data[idx].dropna()
label_template = '{method} (N={n:,d}, q={q:.3f})'
key = label_template.format(method='measured',
label_template = '{method}\n(N={n:,d}, q={q:.3f})'
key = label_template.format(method='observed',
n=len(feat_observed),
q=float(qvalues.loc[idx, ('None', 'qvalue')])
)
Expand Down Expand Up @@ -427,7 +376,7 @@
order=groups_order,
dodge=True,
hue=args.target,
size=1,
size=2,
ax=ax)
first_pg = idx.split(";")[0]
ax.set_title(
Expand All @@ -448,7 +397,6 @@
_ = ax.collections[0].set_paths([new_mk])
_ = ax.collections[1].set_paths([new_mk])

# import matplotlib.lines as mlines
label_target_0, label_target_1 = ax.collections[-2].get_label(), ax.collections[-1].get_label()
_ = ax.collections[-2].set_label(f'imputed, {label_target_0}')
_ = ax.collections[-1].set_label(f'imputed, {label_target_1}')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
folder_experiment: runs/appl_ald_data/plasma/proteinGroups
out_folder: diff_analysis # subfolder of experiment folder
fn_rawfile_metadata: 'data/ALD_study/processed/raw_meta.csv'
fn_rawfile_metadata: "data/ALD_study/processed/raw_meta.csv"
make_plots: True
covar:
covar:
kleiner: age,bmi,gender_num,nas_steatosis_ordinal,abstinent_num
inflammation: age,bmi,gender_num,nas_steatosis_ordinal,abstinent_num
steatosis: age,bmi,gender_num,abstinent_num,kleiner,nas_inflam
Expand All @@ -19,9 +19,10 @@ annotaitons_gene_col: PG.Genes
baseline: RSN
ref_method_score:
methods:
- Median
- CF
- DAE
- VAE
- rf
- KNN
- Median
- CF
- DAE
- VAE
- QRILC
- TRKNN
- RF
33 changes: 18 additions & 15 deletions project/config/appl_ald_data/plasma/proteinGroups/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,27 @@ models:
- VAE
- KNN
NAGuideR_methods:
- ZERO
- MINIMUM
- BPCA
- COLMEDIAN
- ROWMEDIAN
- GSIMP
- IMPSEQ
- IMPSEQROB
- IRM
- KNN_IMPUTE
# - SEQKNN # error
- BPCA
- SVDMETHOD
- LLS
- MLE
- QRILC
# - MICE-CART # stopped after 24h
# - MICE-NORM # stopped after 24h
- MINDET
- MINIMUM
- MINPROB
- IRM
# - IMPSEQ # error
- IMPSEQROB
# - MICE-NORM # stopped after 30mins
# - MICE-CART # stopped after 30mins
# - TRKNN # error
- MLE
- MSIMPUTE
- MSIMPUTE_MNAR
- PI
- QRILC
- RF
- PI
- ROWMEDIAN
# - SEQKNN # Error in x[od, ismiss, drop = FALSE]: subscript out of bounds
- SVDMETHOD
- TRKNN
- ZERO

0 comments on commit c9e00e4

Please sign in to comment.