Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add 10x mouse brain dataset #480

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 68 additions & 1 deletion pyrovelocity/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def create_reports_config(model_name: str, model_number: int):
),
),
pyrovelocity=dict(
download=["larry", "pbmc10k"],
download=["larry", "pbmc10k", "mouse_brain_10x"],
process=["pbmc10k"],
sources=dict(
figshare_root_url="https://ndownloader.figshare.com/files"
Expand Down Expand Up @@ -226,6 +226,15 @@ def create_reports_config(model_name: str, model_number: int):
process_method="load_data",
process_args=dict(),
),
mouse_brain_10x=create_dataset_config(
"mouse_brain_10x",
dl_root="${data_external.root_path}",
data_file="mouse_brain_10x.h5ad",
rel_path="${data_external.root_path}/mouse_brain_10x.h5ad",
url="${data_external.pyrovelocity.sources.figshare_root_url}/mouse_brain_10x",
process_method="load_data",
process_args=dict(),
),
),
),
model_training=dict(
Expand All @@ -244,6 +253,8 @@ def create_reports_config(model_name: str, model_number: int):
"larry_neu_model2",
"larry_multilineage_model2",
"pbmc10k_model2",
"mouse_brain_10x_model1",
"mouse_brain_10x_model2",
],
simulate_model1=create_model_config(
"simulate",
Expand Down Expand Up @@ -379,6 +390,24 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=2000,
),
mouse_brain_10x_model1=create_model_config(
"pyrovelocity",
"mouse_brain_10x",
1,
"umap",
guide_type="auto_t0_constraint",
cell_state="celltype",
max_epochs=2000,
),
mouse_brain_10x_model2=create_model_config(
"pyrovelocity",
"mouse_brain_10x",
2,
"umap",
cell_state="celltype",
offset=True,
max_epochs=2000,
),
),
reports=dict(
model_summary=dict(
Expand All @@ -393,6 +422,8 @@ def create_reports_config(model_name: str, model_number: int):
"pons_model2",
"pbmc10k_model2",
"larry_tips_model2",
"mouse_brain_10x_model1",
"mouse_brain_10x_model2",
],
simulate_model1=create_reports_config("medium", 1),
simulate_model2=create_reports_config("medium", 2),
Expand All @@ -404,6 +435,8 @@ def create_reports_config(model_name: str, model_number: int):
pons_model2=create_reports_config("pons", 2),
pbmc10k_model2=create_reports_config("pbmc10k", 2),
larry_tips_model2=create_reports_config("larry_tips", 2),
mouse_brain_10x_model1=create_reports_config("mouse_brain_10x", 1),
mouse_brain_10x_model2=create_reports_config("mouse_brain_10x", 2),
),
figure2=dict(
tag="fig2",
Expand Down Expand Up @@ -560,6 +593,7 @@ def create_reports_config(model_name: str, model_number: int):
"larry_cospar",
"larry_cytotrace",
"larry_dynamical",
"mouse_brain_10x",
]

process_data = [
Expand All @@ -575,6 +609,7 @@ def create_reports_config(model_name: str, model_number: int):
"larry_mono",
"larry_neu",
"larry_multilineage",
"mouse_brain_10x",
]
train_models = [
"pancreas_model2",
Expand All @@ -587,6 +622,8 @@ def create_reports_config(model_name: str, model_number: int):
"larry_multilineage_model2",
"pbmc10k_model2",
"pbmc5k_model2",
"mouse_brain_10x_model1",
"mouse_brain_10x_model2",
]

model_training = dict(
Expand Down Expand Up @@ -844,6 +881,24 @@ def create_reports_config(model_name: str, model_number: int):
offset=True,
max_epochs=2000,
),
mouse_brain_10x_model1=create_model_config(
"pyrovelocity",
"mouse_brain_10x",
1,
"umap",
guide_type="auto_t0_constraint",
cell_state="celltype",
max_epochs=2000,
),
mouse_brain_10x_model2=create_model_config(
"pyrovelocity",
"mouse_brain_10x",
2,
"umap",
cell_state="celltype",
offset=True,
max_epochs=2000,
),
)

data_sets = dict(
Expand Down Expand Up @@ -999,6 +1054,16 @@ def create_reports_config(model_name: str, model_number: int):
process_method="load_data",
process_args=dict(count_thres="${base.count_threshold}"),
),
mouse_brain_10x=create_dataset_config(
source="pyrovelocity",
name="mouse_brain_10x",
dl_root="${paths.data_external}",
data_file="mouse_brain_10x.h5ad",
rel_path="${paths.data_external}/${.data_file}",
url="https://storage.googleapis.com/pyrovelocity/data/mouse_brain_10x.h5ad",
process_method="load_data",
process_args=dict(count_thres="${base.count_threshold}"),
),
)

return make_config(
Expand Down Expand Up @@ -1043,6 +1108,8 @@ def create_reports_config(model_name: str, model_number: int):
),
# larry_model1=create_reports_config("larry", 1),
# larry_model2=create_reports_config("larry", 2),
mouse_brain_10x_model1=create_reports_config("mouse_brain_10x", 1),
mouse_brain_10x_model2=create_reports_config("mouse_brain_10x", 2),
),
figure2=dict(
tag="fig2",
Expand Down
127 changes: 127 additions & 0 deletions reproducibility/figures/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ download_data:
- larry_cospar
- larry_cytotrace
- larry_dynamical
- mouse_brain_10x
process_data:
- simulate_medium
- pons
Expand All @@ -33,6 +34,7 @@ process_data:
- larry_mono
- larry_neu
- larry_multilineage
- mouse_brain_10x
train_models:
- pancreas_model2
- pbmc68k_model2
Expand All @@ -44,6 +46,8 @@ train_models:
- larry_multilineage_model2
- pbmc10k_model2
- pbmc5k_model2
- mouse_brain_10x_model1
- mouse_brain_10x_model2
data_sets:
simulate_medium:
source: simulate
Expand Down Expand Up @@ -241,6 +245,19 @@ data_sets:
count_thres: 0
rel_path: data/processed/pbmc5k_processed.h5ad
thresh_histogram_path: data/processed/pbmc5k_thresh_histogram.pdf
mouse_brain_10x:
source: pyrovelocity
data_file: mouse_brain_10x.h5ad
dl_root: data/external
dl_path: data/external/mouse_brain_10x.h5ad
rel_path: data/external/mouse_brain_10x.h5ad
url: https://storage.googleapis.com/pyrovelocity/data/mouse_brain_10x.h5ad
derived:
process_method: load_data
process_args:
count_thres: 0
rel_path: data/processed/mouse_brain_10x_processed.h5ad
thresh_histogram_path: data/processed/mouse_brain_10x_thresh_histogram.pdf
model_training:
simulate_model1:
path: models/simulate_medium_model1
Expand Down Expand Up @@ -1130,6 +1147,80 @@ model_training:
cell_specific_kinetics: null
kinetics_num: 2
loss_plot_path: models/pbmc5k_model2/loss_plot.png
mouse_brain_10x_model1:
path: models/mouse_brain_10x_model1
model_path: models/mouse_brain_10x_model1/model
input_data_path: data/processed/mouse_brain_10x_processed.h5ad
trained_data_path: models/mouse_brain_10x_model1/trained.h5ad
pyrovelocity_data_path: models/mouse_brain_10x_model1/pyrovelocity.pkl.zst
posterior_samples_path: models/mouse_brain_10x_model1/posterior_samples.pkl.zst
metrics_path: models/mouse_brain_10x_model1/metrics.json
run_info_path: models/mouse_brain_10x_model1/run_info.json
vector_field_parameters:
basis: umap
gpu_id: 0
training_parameters:
_target_: pyrovelocity.api.train_model
_partial_: true
guide_type: auto_t0_constraint
model_type: auto
svi_train: false
batch_size: -1
train_size: 1.0
use_gpu: 0
likelihood: Poisson
num_samples: 30
log_every: 100
cell_state: celltype
patient_improve: 0.0001
patient_init: 45
seed: 99
lr: 0.01
max_epochs: 2000
include_prior: true
library_size: true
offset: false
input_type: raw
cell_specific_kinetics: null
kinetics_num: 2
loss_plot_path: models/mouse_brain_10x_model1/loss_plot.png
mouse_brain_10x_model2:
path: models/mouse_brain_10x_model2
model_path: models/mouse_brain_10x_model2/model
input_data_path: data/processed/mouse_brain_10x_processed.h5ad
trained_data_path: models/mouse_brain_10x_model2/trained.h5ad
pyrovelocity_data_path: models/mouse_brain_10x_model2/pyrovelocity.pkl.zst
posterior_samples_path: models/mouse_brain_10x_model2/posterior_samples.pkl.zst
metrics_path: models/mouse_brain_10x_model2/metrics.json
run_info_path: models/mouse_brain_10x_model2/run_info.json
vector_field_parameters:
basis: umap
gpu_id: 0
training_parameters:
_target_: pyrovelocity.api.train_model
_partial_: true
guide_type: auto
model_type: auto
svi_train: false
batch_size: -1
train_size: 1.0
use_gpu: 0
likelihood: Poisson
num_samples: 30
log_every: 100
cell_state: celltype
patient_improve: 0.0001
patient_init: 45
seed: 99
lr: 0.01
max_epochs: 2000
include_prior: true
library_size: true
offset: true
input_type: raw
cell_specific_kinetics: null
kinetics_num: 2
loss_plot_path: models/mouse_brain_10x_model2/loss_plot.png
reports:
model_summary:
pancreas_model1:
Expand Down Expand Up @@ -1492,6 +1583,42 @@ reports:
fig2_part2_plot: reports/larry_multilineage_model2/fig2_part2_plot.pdf
violin_clusters_lin: reports/larry_multilineage_model2/clusters_violin_lin.pdf
violin_clusters_log: reports/larry_multilineage_model2/clusters_violin_log.pdf
mouse_brain_10x_model1:
path: reports/mouse_brain_10x_model1
trained_data_path: models/mouse_brain_10x_model1/trained.h5ad
pyrovelocity_data_path: models/mouse_brain_10x_model1/pyrovelocity.pkl.zst
dataframe_path: data/processed/mouse_brain_10x_model1_dataframe.pkl.zst
shared_time_plot: reports/mouse_brain_10x_model1/shared_time.pdf
volcano_plot: reports/mouse_brain_10x_model1/volcano.pdf
rainbow_plot: reports/mouse_brain_10x_model1/rainbow.pdf
uncertainty_param_plot: reports/mouse_brain_10x_model1/param_uncertainties.pdf
vector_field_plot: reports/mouse_brain_10x_model1/vector_field.pdf
posterior_phase_portraits: reports/mouse_brain_10x_model1/posterior_phase_portraits
t0_selection: reports/mouse_brain_10x_model1/t0_selection.tif
biomarker_selection_plot: reports/mouse_brain_10x_model1/markers_selection_scatterplot.tif
biomarker_phaseportrait_plot: reports/mouse_brain_10x_model1/markers_phaseportrait.pdf
fig2_part1_plot: reports/mouse_brain_10x_model1/fig2_part1_plot.pdf
fig2_part2_plot: reports/mouse_brain_10x_model1/fig2_part2_plot.pdf
violin_clusters_lin: reports/mouse_brain_10x_model1/clusters_violin_lin.pdf
violin_clusters_log: reports/mouse_brain_10x_model1/clusters_violin_log.pdf
mouse_brain_10x_model2:
path: reports/mouse_brain_10x_model2
trained_data_path: models/mouse_brain_10x_model2/trained.h5ad
pyrovelocity_data_path: models/mouse_brain_10x_model2/pyrovelocity.pkl.zst
dataframe_path: data/processed/mouse_brain_10x_model2_dataframe.pkl.zst
shared_time_plot: reports/mouse_brain_10x_model2/shared_time.pdf
volcano_plot: reports/mouse_brain_10x_model2/volcano.pdf
rainbow_plot: reports/mouse_brain_10x_model2/rainbow.pdf
uncertainty_param_plot: reports/mouse_brain_10x_model2/param_uncertainties.pdf
vector_field_plot: reports/mouse_brain_10x_model2/vector_field.pdf
posterior_phase_portraits: reports/mouse_brain_10x_model2/posterior_phase_portraits
t0_selection: reports/mouse_brain_10x_model2/t0_selection.tif
biomarker_selection_plot: reports/mouse_brain_10x_model2/markers_selection_scatterplot.tif
biomarker_phaseportrait_plot: reports/mouse_brain_10x_model2/markers_phaseportrait.pdf
fig2_part1_plot: reports/mouse_brain_10x_model2/fig2_part1_plot.pdf
fig2_part2_plot: reports/mouse_brain_10x_model2/fig2_part2_plot.pdf
violin_clusters_lin: reports/mouse_brain_10x_model2/clusters_violin_lin.pdf
violin_clusters_log: reports/mouse_brain_10x_model2/clusters_violin_log.pdf
figure2:
tag: fig2
path: reports/fig2
Expand Down
Loading
Loading