Skip to content

Commit

Permalink
modify figure 1 with updated data and fixes referencing broadinstitut…
Browse files Browse the repository at this point in the history
  • Loading branch information
gwaybio committed Aug 9, 2021
1 parent 7486b64 commit 01a871e
Show file tree
Hide file tree
Showing 4 changed files with 866 additions and 689 deletions.
1,332 changes: 753 additions & 579 deletions 6.paper_figures/figure1.ipynb

Large diffs are not rendered by default.

Binary file modified 6.paper_figures/figures/figure1.pdf
Binary file not shown.
Binary file modified 6.paper_figures/figures/figure1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
223 changes: 113 additions & 110 deletions 6.paper_figures/scripts/nbconverted/figure1.r
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,30 @@ pr_df$dose <- factor(pr_df$dose, levels = dose_order)
print(dim(pr_df))
head(pr_df)

print(dim(cell_painting_pr_df))
print(dim(l1000_pr_df))

threshold_df <- pr_df %>%
dplyr::filter(type == 'non replicate') %>%
dplyr::filter(type == 'non_replicate') %>%
dplyr::group_by(assay, dose) %>%
dplyr::summarise(threshold = quantile(correlation_values, 0.95))
dplyr::summarise(threshold = quantile(replicate_correlation, 0.95))

percent_replicating_df <- pr_df %>%
dplyr::left_join(threshold_df, by = c("assay", "dose")) %>%
dplyr::filter(type == "true replicate") %>%
dplyr::mutate(pass_threshold = threshold < correlation_values) %>%
dplyr::filter(type == "replicate") %>%
dplyr::mutate(pass_threshold = threshold < replicate_correlation) %>%
dplyr::group_by(dose, assay) %>%
dplyr::summarize(percent_replicating = paste0(100 * round((sum(pass_threshold) / length(pass_threshold)), 4), "%")) %>%
dplyr::ungroup()

percent_replicating_df

panel_a_gg <- (
ggplot(pr_df, aes(x = correlation_values))
ggplot(pr_df, aes(x = replicate_correlation))
+ geom_density(aes(fill = type), alpha = 0.4)
+ facet_grid("assay~dose")
+ geom_vline(data = threshold_df, linetype = "dashed", color = "blue", aes(xintercept=threshold))
+ geom_text(data = percent_replicating_df, aes(label = percent_replicating, x = 0.55, y = 7.5))
+ geom_text(data = percent_replicating_df, aes(label = percent_replicating, x = 0.55, y = 8.5))
+ theme_bw()
+ figure_theme
+ scale_fill_manual("Correlations\nbetween", labels = replicate_labels, values = replicate_colors)
Expand Down Expand Up @@ -68,6 +71,9 @@ significant_compounds_df <- cell_painting_comp_df %>%

head(significant_compounds_df)

total_compounds <- length(unique(significant_compounds_df$compound))
total_compounds

pass_thresh_summary_df <- significant_compounds_df %>%
dplyr::group_by(dose) %>%
dplyr::mutate(
Expand All @@ -93,7 +99,7 @@ cell_painting_rect <- pass_thresh_summary_df %>%
xmin_bar = seq(0, (length(unique(pass_thresh_summary_df$dose)) - 1) * 2, 2),
xmax_bar = seq(1, (length(unique(pass_thresh_summary_df$dose))) * 2, 2),
assay = "Cell Painting",
label_text_y = ymax_bar / 2
label_text_y = ymax_bar / 4
)

l1000_rect <- pass_thresh_summary_df %>%
Expand All @@ -107,8 +113,7 @@ l1000_rect <- pass_thresh_summary_df %>%
label_text_y = ymax_bar - 25
)

full_rect = dplyr::bind_rows(cell_painting_rect, l1000_rect)
full_rect
full_rect <- dplyr::bind_rows(cell_painting_rect, l1000_rect)

num_pass_both_text <- full_rect %>%
dplyr::filter(assay == "Cell Painting") %>%
Expand All @@ -124,17 +129,53 @@ num_pass_both_text <- full_rect %>%

num_pass_both_text

percentile_pass_df <- pass_thresh_summary_df %>%
dplyr::mutate(
num_pass_total = unique_pass_l1000 + unique_pass_cellpainting + num_pass_both,
num_pass_percentile = paste("Total:\n", round(num_pass_total / total_compounds, 4) * 100, "%")
) %>%
dplyr::select(dose, num_pass_percentile, num_pass_total)

# Prep legend order
full_rect <- full_rect %>%
dplyr::add_row(
dose = NA,
ymax_bar = NA,
unique_pass = NA,
num_pass_both = NA,
ymin_bar = NA,
xmin_bar = NA,
xmax_bar = NA,
assay = "Both",
label_text_y = NA
) %>%
dplyr::left_join(percentile_pass_df, by = "dose")

full_rect$assay <- factor(full_rect$assay, levels = c("L1000", "Both", "Cell Painting"))

full_rect

updated_assay_colors <- c(assay_colors, "Both" = "#BDB4B4")

panel_b_gg <- (
ggplot(full_rect)
+ geom_rect(aes(fill = assay, ymin = ymin_bar, ymax = ymax_bar, xmin = xmin_bar, xmax = xmax_bar), alpha = 0.5)
+ geom_text(aes(x = xmin_bar + 0.5, y = label_text_y, label = unique_pass))
+ geom_text(data = num_pass_both_text, aes(x = xmin_bar + 0.5, y = label_text_y, label = num_pass_both))
+ scale_fill_manual("Assay", values = assay_colors)
# Select only L1000 below to not duplicate text
+ geom_text(
data = full_rect %>% dplyr::filter(assay == "L1000"),
aes(x = xmin_bar + 0.5, y = ymax_bar + 100, label = num_pass_percentile),
size = 3
)
+ scale_fill_manual("Compounds\nreproducible\nin assay", values = updated_assay_colors)
+ theme_bw()
+ figure_theme
+ scale_x_continuous(labels = num_pass_both_text$dose, breaks = seq(0.5, length(num_pass_both_text$dose) * 2, 2))
+ ylab("Number of compounds over 95% threshold\nof matched non-replicate median distribution")
+ ylab("Total number of compounds over 95% threshold\nof matched non-replicate median distribution")
+ xlab("")
+ ylim(0, max(full_rect$num_pass_total, na.rm = TRUE) + 100)

)

panel_b_gg
Expand All @@ -144,6 +185,12 @@ results_dir <- file.path("../1.Data-exploration/Consensus/")
pm_cellpainting_list <- load_percent_matching(assay = "cellpainting", results_dir = results_dir)
pm_l1000_list <- load_percent_matching(assay = "l1000", results_dir = results_dir)

print(dim(pm_cellpainting_list[["percent_matching"]]))
print(dim(pm_l1000_list[["percent_matching"]]))

print(dim(pm_cellpainting_list[["percent_matching_pvals"]]))
print(dim(pm_l1000_list[["percent_matching_pvals"]]))

p_val_alpha_thresh <- 0.05
no_replicates_thresh <- 3

Expand Down Expand Up @@ -181,17 +228,23 @@ percent_matching_df <- pm_df %>%

percent_matching_df

# How many compounds per assay per dose with greater than 3 compounds?
for (dose in unique(pm_df$dose)) {
pm_sub_df <- pm_df %>% dplyr::filter(dose == !!dose)
print(table(pm_sub_df %>% dplyr::pull(assay)))
}

panel_c_gg <- (
ggplot(pm_df, aes(x = matching_score, y = neg_log_10_p_val))
+ geom_point(aes(color = no_of_replicates), alpha = 0.6)
+ geom_text(data = percent_matching_df, aes(label = percent_matching, x = 0.5, y = 4))
+ geom_text(data = percent_matching_df, aes(label = percent_matching, x = -0.25, y = 3))
+ facet_grid("assay~dose")
+ geom_hline(linetype = "dashed", color = "blue", yintercept = 2)
+ theme_bw()
+ figure_theme
+ scale_color_continuous("Number of\nreplicates", values = scales::rescale(c(0, 2, 4, 6, 8, 15, 30)), type = "viridis")
+ xlab("Median pairwise Pearson correlation\nbetween compound profiles of the same mechanism of action")
+ ylab("-log10 p value")
+ scale_color_continuous("Number of\ncompounds\nper MOA", values = scales::rescale(c(0, 2, 4, 6, 8, 15, 30)), type = "viridis")
+ xlab("Median pairwise Pearson correlation between\ncompound profiles of the same mechanism of action (MOA)")
+ ylab("Non-parametric -log10 p value")
)

panel_c_gg
Expand Down Expand Up @@ -230,7 +283,7 @@ cell_painting_moa_rect <- pass_thresh_summary_moa_df %>%
xmin_bar = seq(0, (length(unique(pass_thresh_summary_df$dose)) - 1) * 2, 2),
xmax_bar = seq(1, (length(unique(pass_thresh_summary_df$dose))) * 2, 2),
assay = "Cell Painting",
label_text_y = 4
label_text_y = 2
)

l1000_moa_rect <- pass_thresh_summary_moa_df %>%
Expand All @@ -241,7 +294,7 @@ l1000_moa_rect <- pass_thresh_summary_moa_df %>%
xmin_bar = seq(0, (length(unique(pass_thresh_summary_df$dose)) - 1) * 2, 2),
xmax_bar = seq(1, (length(unique(pass_thresh_summary_df$dose))) * 2, 2),
assay = "L1000",
label_text_y = ymax_bar - 3
label_text_y = ymax_bar - 1.5
)

full_moa_rect <- dplyr::bind_rows(cell_painting_moa_rect, l1000_moa_rect)
Expand All @@ -258,19 +311,56 @@ num_pass_both_moa_text <- full_moa_rect %>%
) %>%
dplyr::mutate(label_text_y = ymin_l1000_bar + num_pass_both / 2)

num_pass_both_moa_text
total_moas <- length(unique(significant_moa_df$moa))
total_moas

percentile_pass_moa_df <- pass_thresh_summary_moa_df %>%
dplyr::mutate(
num_pass_total = unique_pass_l1000 + unique_pass_cellpainting + num_pass_both,
num_pass_percentile = paste("Total:\n", round(num_pass_total / total_moas, 4) * 100, "%")
) %>%
dplyr::select(dose, num_pass_total, num_pass_percentile)

percentile_pass_moa_df

# Prep legend order
full_moa_rect <- full_moa_rect %>%
dplyr::add_row(
dose = NA,
ymax_bar = NA,
unique_pass = NA,
num_pass_both = NA,
ymin_bar = NA,
xmin_bar = NA,
xmax_bar = NA,
assay = "Both",
label_text_y = NA
) %>%
dplyr::left_join(percentile_pass_moa_df, by = "dose")

full_moa_rect$assay <- factor(full_moa_rect$assay, levels = c("L1000", "Both", "Cell Painting"))

panel_d_gg <- (
ggplot(full_moa_rect)
+ geom_rect(aes(fill = assay, ymin = ymin_bar, ymax = ymax_bar, xmin = xmin_bar, xmax = xmax_bar), alpha = 0.5)
+ geom_text(aes(x = xmin_bar + 0.5, y = label_text_y, label = unique_pass))
+ geom_text(data = num_pass_both_moa_text, aes(x = xmin_bar + 0.5, y = label_text_y, label = num_pass_both))
+ scale_fill_manual("Assay", values = assay_colors)
# Select only L1000 below to not duplicate text
+ geom_text(
data = full_moa_rect %>% dplyr::filter(assay == "L1000"),
aes(x = xmin_bar + 0.5, y = ymax_bar + 4, label = num_pass_percentile),
size = 3
)
+ scale_fill_manual("MOA\nconsistent\nin assay", values = updated_assay_colors)
+ theme_bw()
+ figure_theme
+ scale_x_continuous(labels = num_pass_both_moa_text$dose, breaks = seq(0.5, length(num_pass_both_moa_text$dose) * 2, 2))
+ scale_x_continuous(
labels = num_pass_both_moa_text$dose,
breaks = seq(0.5, length(num_pass_both_moa_text$dose) * 2, 2),
)
+ ylab("Number of MOAs over 95% threshold\nof scores from matched null MOA median scores")
+ xlab("")
+ ylim(0, max(full_moa_rect$num_pass_total, na.rm = TRUE) + 5)
)

panel_d_gg
Expand Down Expand Up @@ -306,91 +396,6 @@ sup_maybe_gg <- (

sup_maybe_gg

# Load profiles for heatmap prep
results_dir <- file.path("../1.Data-exploration/Consensus/")

cell_painting_profile_file <- file.path(
results_dir, "cell_painting", "moa_sizes_consensus_datasets",
"cell_painting_moa_analytical_set_profiles.tsv.gz"
)
l1000_profile_file <- file.path(
results_dir, "L1000", "moa_sizes_consensus_datasets",
"l1000_moa_analytical_set_profiles.tsv.gz"
)

cp_profile_cols <- readr::cols(
.default = readr::col_double(),
Metadata_Plate_Map_Name = readr::col_character(),
Metadata_cell_id = readr::col_character(),
Metadata_broad_sample = readr::col_character(),
Metadata_pert_well = readr::col_character(),
Metadata_time_point = readr::col_character(),
Metadata_moa = readr::col_character(),
Metadata_target = readr::col_character(),
broad_id = readr::col_character(),
pert_iname = readr::col_character(),
moa = readr::col_character()
)

l1000_profile_cols <- readr::cols(
.default = readr::col_double(),
sig_id = readr::col_character(),
pert_id = readr::col_character(),
pert_idose = readr::col_character(),
pert_iname = readr::col_character(),
moa = readr::col_character()
)

cp_profile_df <- readr::read_tsv(cell_painting_profile_file, col_types = cp_profile_cols)
cp_profile_df$Metadata_dose_recode <- dplyr::recode(paste(cp_profile_df$Metadata_dose_recode), !!!dose_rename)
cp_profile_df$Metadata_dose_recode <- factor(cp_profile_df$Metadata_dose_recode, levels = dose_order)

l1000_profile_df <- readr::read_tsv(l1000_profile_file, col_types = l1000_profile_cols) %>%
rename(Metadata_dose_recode = dose)
l1000_profile_df$Metadata_dose_recode <- dplyr::recode(paste(l1000_profile_df$Metadata_dose_recode), !!!dose_rename)
l1000_profile_df$Metadata_dose_recode <- factor(l1000_profile_df$Metadata_dose_recode, levels = dose_order)

common_perts <- intersect(
unique(cp_profile_df$pert_iname),
unique(l1000_profile_df$pert_iname)
)

cp_profile_df <- cp_profile_df %>% dplyr::filter(pert_iname %in% !!common_perts)
l1000_profile_df <- l1000_profile_df %>% dplyr::filter(pert_iname %in% !!common_perts)

target_moa <- "plk inhibitor"
corr_compare_plot_ready <- get_subset_correlation_data(cp_profile_df, l1000_profile_df, target_moa)
compound_a_gg <- plot_correlation_data(corr_compare_plot_ready, target_moa, fix_coords = FALSE)

target_moa <- "tubulin inhibitor"
corr_compare_plot_ready <- get_subset_correlation_data(cp_profile_df, l1000_profile_df, target_moa)
compound_b_gg <- plot_correlation_data(corr_compare_plot_ready, target_moa, fix_coords = FALSE)

target_moa <- "cdk inhibitor"
corr_compare_plot_ready <- get_subset_correlation_data(cp_profile_df, l1000_profile_df, target_moa)
compound_c_gg <- plot_correlation_data(corr_compare_plot_ready, target_moa, fix_coords = FALSE)

target_moa <- "hsp inhibitor"
corr_compare_plot_ready <- get_subset_correlation_data(cp_profile_df, l1000_profile_df, target_moa)
compound_d_gg <- plot_correlation_data(corr_compare_plot_ready, target_moa, fix_coords = FALSE)

cor_legend <- cowplot::get_legend(compound_a_gg)

panel_e_gg <- cowplot::plot_grid(
cowplot::plot_grid(
compound_a_gg + theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.position = "none"),
compound_b_gg + theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.position = "none"),
compound_c_gg + theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.position = "none"),
compound_d_gg + theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.position = "none"),
nrow = 2
),
cor_legend,
ncol = 2,
rel_widths = c(0.9, 0.1)
)

panel_e_gg

figure_1_gg <- cowplot::plot_grid(
cowplot::plot_grid(
panel_a_gg,
Expand All @@ -406,15 +411,13 @@ figure_1_gg <- cowplot::plot_grid(
rel_widths = c(1, 0.5),
labels = c("c", "d")
),
panel_e_gg,
nrow = 3,
labels = c("", "", "e"),
rel_heights = c(0.7, 0.7, 1)
nrow = 2,
rel_heights = c(1, 1)
)

figure_1_gg

for (extension in extensions) {
output_file <- paste0(output_figure_base, extension)
cowplot::save_plot(output_file, figure_1_gg, base_width = 16, base_height = 16, dpi = 500)
cowplot::save_plot(output_file, figure_1_gg, base_width = 16, base_height = 8, dpi = 500)
}

0 comments on commit 01a871e

Please sign in to comment.