diff --git a/.gitignore b/.gitignore index 044181e..8c499b9 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ .Ruserdata inst/doc vignettes/*.html +omnipathr-log/ diff --git a/DESCRIPTION b/DESCRIPTION index b377ed5..a30d424 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: nichenetr Type: Package Title: NicheNet: Modeling Intercellular Communication by Linking Ligands to Target Genes -Version: 2.0.6 +Version: 2.1.0 Authors@R: c(person("Robin", "Browaeys", role = c("aut")), person("Chananchida", "Sang-aram", role = c("aut", "cre"), email = "chananchida.sangaram@ugent.be")) Description: This package allows you the investigate intercellular communication from a computational perspective. More specifically, it allows to investigate how interacting cells influence each other's gene expression. Functionalities of this package (e.g. including predicting extracellular upstream regulators and their affected target genes) build upon a probabilistic model of ligand-target links that was inferred by data-integration. @@ -12,24 +12,26 @@ URL: https://github.com/saeyslab/nichenetr BugReports: https://github.com/saeyslab/nichenetr/issues RoxygenNote: 7.1.2 Depends: R (>= 3.0.0) -Imports: +Imports: + tidyverse, + data.table, dplyr, + ggplot2, + magrittr, + purrr, + readr, + tibble, tidyr, igraph, Matrix, fdrtool, ROCR, caTools, - data.table, limma, - readr, Hmisc, - tibble, caret, - purrr, randomForest, DiagrammeR, - ggplot2, mlrMBO, parallelMap, emoa, @@ -38,7 +40,6 @@ Imports: Seurat, cowplot, ggpubr, - magrittr, circlize, ComplexHeatmap, grDevices, @@ -47,12 +48,12 @@ Imports: shadowtext Suggests: knitr, + RColorBrewer, rmarkdown, testthat, doMC, mco, parallel, covr, - tidyverse, sf VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 0a8c292..4808f39 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,11 +9,14 @@ export(alias_to_symbol_seurat) export(apply_hub_corrections) export(assess_influence_source) export(assess_rf_class_probabilities) +export(assign_ligands_to_celltype) +export(bootstrap_ligand_activity_analysis) export(calculate_de) export(calculate_fraction_top_predicted) export(calculate_fraction_top_predicted_fisher) export(calculate_niche_de) export(calculate_niche_de_targets) +export(calculate_p_value_bootstrap) export(calculate_spatial_DE) export(classification_evaluation_continuous_pred_wrapper) export(combine_sender_receiver_de) @@ -56,6 +59,7 @@ export(extract_top_fraction_ligands) export(extract_top_fraction_targets) export(extract_top_n_ligands) export(extract_top_n_targets) +export(generate_info_tables) export(generate_prioritization_tables) export(get_active_ligand_receptor_network) export(get_active_ligand_target_df) @@ -69,12 +73,14 @@ export(get_ligand_activities_targets) export(get_ligand_signaling_path) export(get_ligand_signaling_path_with_receptor) export(get_ligand_slope_ligand_prediction_popularity) +export(get_ligand_target_links_oi) export(get_multi_ligand_importances) export(get_multi_ligand_importances_regression) export(get_multi_ligand_rf_importances) export(get_multi_ligand_rf_importances_regression) export(get_ncitations_genes) export(get_non_spatial_de) +export(get_optimized_parameters_nsga2r) export(get_prioritization_tables) export(get_single_ligand_importances) export(get_single_ligand_importances_regression) @@ -83,23 +89,27 @@ export(get_slope_target_gene_popularity) export(get_slope_target_gene_popularity_ligand_prediction) export(get_target_genes_ligand_oi) export(get_top_predicted_genes) +export(get_weighted_ligand_receptor_links) export(get_weighted_ligand_target_links) export(infer_supporting_datasources) export(ligand_activity_performance_top_i_removed) export(make_circos_lr) +export(make_circos_plot) export(make_discrete_ligand_target_matrix) export(make_heatmap_bidir_lt_ggplot) export(make_heatmap_ggplot) export(make_ligand_activity_target_exprs_plot) export(make_ligand_receptor_lfc_plot) export(make_ligand_receptor_lfc_spatial_plot) +export(make_line_plot) export(make_mushroom_plot) export(make_threecolor_heatmap_ggplot) export(mlrmbo_optimization) export(model_based_ligand_activity_prediction) -export(model_evaluation_hyperparameter_optimization) -export(model_evaluation_optimization) +export(model_evaluation_hyperparameter_optimization_mlrmbo) export(model_evaluation_optimization_application) +export(model_evaluation_optimization_mlrmbo) +export(model_evaluation_optimization_nsga2r) export(mutate_cond) export(nichenet_seuratobj_aggregate) export(nichenet_seuratobj_aggregate_cluster_de) @@ -107,6 +117,8 @@ export(nichenet_seuratobj_cluster_de) export(normalize_single_cell_ligand_activities) export(predict_ligand_activities) export(predict_single_cell_ligand_activities) +export(prepare_circos_visualization) +export(prepare_ligand_receptor_visualization) export(prepare_ligand_target_visualization) export(prepare_settings_leave_one_in_characterization) export(prepare_settings_leave_one_out_characterization) @@ -125,12 +137,15 @@ export(process_table_to_ic) export(randomize_complete_network_source_specific) export(randomize_datasource_network) export(randomize_network) +export(run_nsga2R_cluster) export(scale_quantile) export(scale_quantile_adapted) export(scaling_modified_zscore) export(scaling_zscore) export(single_ligand_activity_score_classification) export(single_ligand_activity_score_regression) +export(visualize_parameter_values) +export(visualize_parameter_values_across_folds) export(wrapper_average_performances) export(wrapper_evaluate_single_importances_ligand_prediction) import(Seurat) diff --git a/R/application_prediction.R b/R/application_prediction.R index e4c7eb1..9a8ce15 100644 --- a/R/application_prediction.R +++ b/R/application_prediction.R @@ -100,7 +100,7 @@ predict_ligand_activities = function(geneset,background_expressed_genes,ligand_t } #' @title Infer weighted active ligand-target links between a possible ligand and target genes of interest #' -#' @description \code{get_weighted_ligand_target_links} Infer active ligand target links between possible lignands and genes belonging to a gene set of interest: consider the intersect between the top n targets of a ligand and the gene set. +#' @description \code{get_weighted_ligand_target_links} Infer active ligand target links between possible ligands and genes belonging to a gene set of interest: consider the intersect between the top n targets of a ligand and the gene set. #' #' @usage #' get_weighted_ligand_target_links(ligand, geneset,ligand_target_matrix,n = 250) @@ -217,6 +217,94 @@ prepare_ligand_target_visualization = function(ligand_target_df, ligand_target_m return(vis_ligand_target_network) } + +#' @title Get the weighted ligand-receptor links between a possible ligand and its receptors +#' @description \code{get_weighted_ligand_receptor_links} Get the weighted ligand-receptor links between a possible ligand and its receptors +#' +#' @param best_upstream_ligands Character vector containing ligands of interest. +#' @param expressed_receptors Character vector of receptors expressed in the cell type of interest. +#' @param lr_network A data frame with two columns, \code{from} and \code{to}, containing the ligand-receptor interactions. +#' @param weighted_networks_lr_sig A data frame with three columns, \code{from}, \code{to} and \code{weight}, containing the ligand-receptor interactions and their weights. +#' +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' ligand_receptor_links_df <- get_weighted_ligand_receptor_links(best_upstream_ligands, expressed_receptors, lr_network, weighted_networks$lr_sig) +#' } +#' +get_weighted_ligand_receptor_links = function(best_upstream_ligands, expressed_receptors, lr_network, weighted_networks_lr_sig) { + + lr_network <- lr_network %>% distinct(from, to) + weighted_networks_lr <- inner_join(weighted_networks_lr_sig, lr_network, by = c("from","to")) + + lr_network_top <- lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) + best_upstream_receptors <- lr_network_top %>% pull(to) %>% unique() + + lr_network_top_df_long <- weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) + + return(lr_network_top_df_long) + +} + +#' @title Prepare ligand-receptor visualization +#' @description \code{prepare_ligand_receptor_visualization} Prepare a matrix of ligand-receptor interactions for visualization. +#' +#' @param lr_network_top_df_long A data frame with three columns, \code{from}, \code{to} and \code{weight}, containing the ligand-receptor interactions and their weights. +#' @param best_upstream_ligands Character vector of ligands of interest. This will only be used if \code{order_hclust = "receptors"} or \code{order_hclust = "none"}. +#' @param order_hclust "both", "ligands", "receptors", or "none". If "both", the ligands and receptors are ordered by hierarchical clustering. If "ligands" or "receptors" only the ligands or receptors are ordered hierarchically. If "none", no hierarchical clustering is performed, and the ligands are ordered based on \code{best_upstream_ligands}, and the receptors are ordered alphabetically. +#' +#' @return A matrix of ligand-receptor interactions for visualization. +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' ligand_receptor_network = prepare_ligand_receptor_visualization(best_upstream_ligands = best_upstream_ligands, expressed_receptors = expressed_receptors, lr_network = lr_network, weighted_networks_lr_sig = weighted_networks_lr_sig, order_hclust = TRUE) +#' } +#' +prepare_ligand_receptor_visualization = function(lr_network_top_df_long, best_upstream_ligands, order_hclust = "both") { + + lr_network_top_df <- lr_network_top_df_long %>% spread("from","weight",fill = 0) + lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) + + # Check if order_hclust is valid + if (!(order_hclust %in% c("both", "ligands", "receptors", "none"))) { + stop("order_hclust must be one of 'both', 'ligands', 'receptors', or 'none'") + } + + if (order_hclust == "both" | order_hclust == "receptors") { + dist_receptors = dist(lr_network_top_matrix, method = "binary") + hclust_receptors = hclust(dist_receptors, method = "ward.D2") + order_receptors = hclust_receptors$labels[hclust_receptors$order] + } + + if (order_hclust == "both" | order_hclust == "ligands") { + dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") + hclust_ligands = hclust(dist_ligands, method = "ward.D2") + order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] + } + + if (order_hclust == "none" | order_hclust == "receptors") { + order_ligands_receptor = rev(best_upstream_ligands) + } + + if (order_hclust == "none" | order_hclust == "ligands") { + order_receptors = rownames(lr_network_top_matrix) + } + + order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix)) + order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix)) + + vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] + rownames(vis_ligand_receptor_network) <- order_receptors + colnames(vis_ligand_receptor_network) <- order_ligands_receptor + + return(vis_ligand_receptor_network) + +} + #' @title Assess probability that a target gene belongs to the geneset based on a multi-ligand random forest model #' #' @description \code{assess_rf_class_probabilities} Assess probability that a target gene belongs to the geneset based on a multi-ligand random forest model (with cross-validation). Target genes and background genes will be split in different groups in a stratified way. @@ -399,6 +487,103 @@ calculate_fraction_top_predicted_fisher = function(affected_gene_predictions, qu return(summary) } } + +#' @title Run ligand activity analysis with bootstrap +#' @description \code{bootstrap_ligand_activity_analysis} Randomly sample a gene set from all expressed genes in the receiver cell type, then perform ligand activity analysis on this gene set. This 'null gene set' has equal length to the gene set of interest. +#' @usage bootstrap_ligand_activity_analysis(expressed_genes_receiver, geneset_oi, background_expressed_genes, ligand_target_matrix, potential_ligands, n_iter = 10, n_cores = 1, parallel_func = "mclapply") +#' @param expressed_genes_receiver Genes expressed in the receiver cell type +#' @inheritParams predict_ligand_activities +#' @param n_iter Number of iterations to perform (Default: 10) +#' @param geneset_oi Character vector of the gene symbols of genes of which the expression is potentially affected by ligands from the interacting cell. +#' @param n_cores Number of cores to use for parallelization (Default: 1) +#' @param parallel_func Parallelization function to use from "mclapply", "pbmclapply", or "parLapply" (Default: "mclapply") +#' @return List of n_iter elements, each element containing the output of predict_ligand_activities for a random gene set +#' @examples +#' \dontrun{ +#' permutations <- bootstrap_ligand_activity_analysis(expressed_genes_receiver, geneset_oi, background_expressed_genes, +#' ligand_target_matrix, potential_ligands, n_iter = 10, n_cores = 1, parallel_func = "mclapply") +#' } +#' @export +bootstrap_ligand_activity_analysis <- function(expressed_genes_receiver, geneset_oi, background_expressed_genes, + ligand_target_matrix, potential_ligands, n_iter = 10, + n_cores=1, parallel_func = "mclapply"){ + + # Check if parallel function is valid + if (!(parallel_func %in% c("mclapply", "pbmclapply", "parLapply"))) { + stop("parallel_func must be one of 'mclapply', 'pbmclapply', or 'parLapply'") + } + + # Check if parallel function is mclapply/pbmclapply that they are not on Windows + if (Sys.info()[['sysname']] == "Windows" && parallel_func %in% c("mclapply", "pbmclapply")){ + if (n_cores > 1){ + warning("Setting 'n_cores' to 1 as Windows OS cannot make use of mclapply and pbmclapply parallelization. Consider using 'parLapply' instead.") + n_cores = 1 + } + } + + if (parallel_func %in% c("mclapply", "pbmclapply")){ + parFunc <- ifelse(parallel_func == "mclapply", parallel::mclapply, pbmcapply::pbmclapply) + + perms <- parFunc(1:n_iter, function (i){ + random_geneset <- sample(expressed_genes_receiver, size = length(geneset_oi)) + predict_ligand_activities(geneset = random_geneset, background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) + }, mc.cores = n_cores) + + } else if (parallel_func == "parLapply"){ + if(Sys.info()[['sysname']] == "Windows"){ + # STILL HAS TO BE TESTED ON WINDOWS + clust <- parallel::makeCluster(n_cores) + parallel::clusterExport(clust, c("expressed_genes_receiver", "geneset_oi","background_expressed_genes","ligand_target_matrix","potential_ligands"), envir = environment()) + parallel::clusterEvalQ(clust, library(nichenetr)) + parallel::clusterEvalQ(clust, library(tidyverse)) + + } else { + clust <- parallel::makeCluster(n_cores, type="FORK") + } + + on.exit(parallel::stopCluster(clust)) + perms <- parallel::parLapply(clust, 1:n_iter, function (i){ + random_geneset <- sample(expressed_genes_receiver, size = length(geneset_oi)) + predict_ligand_activities(geneset = random_geneset, background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) + }) + + } + + return(perms) + +} + + +#' @title Calculate ligand p-values from the bootstrapped ligand activity analysis +#' @description \code{calculate_p_value_bootstrap} Calculate the p-value for each ligand from the bootstrapped ligand activity analysis +#' @usage calculate_p_value_bootstrap(bootstrap_results, ligand_activities, metric = "aupr_corrected") +#' @param bootstrap_results Output of \code{\link{bootstrap_ligand_activity_analysis}} +#' @param ligand_activities Output of \code{\link{predict_ligand_activities}} +#' @param metric Metric to use (Default: "aupr_corrected") +#' @return Data frame with the ligand name, the number of times the bootstrapped value had a higher score than the observed (\code{total}), and the p-value for each ligand, calculated as \code{total/length(bootstrap_results)} +#' @examples +#' \dontrun{ +#' permutations <- bootstrap_ligand_activity_analysis(expressed_genes_receiver, geneset_oi, background_expressed_genes, +#' ligand_target_matrix, potential_ligands, n_iter = 10) +#' p_values <- calculate_p_value_bootstrap(permutations, ligand_activities, metric = "aupr_corrected") +#' } +#' @export +calculate_p_value_bootstrap <- function(bootstrap_results, ligand_activities, metric = "aupr_corrected"){ + n_iter <- length(bootstrap_results) + bootstrap_results %>% bind_rows(.id = "round") %>% group_by(test_ligand) %>% + select(round, test_ligand, all_of(metric)) %>% arrange(test_ligand) %>% rename(null_metric = metric) %>% + # Merge with observed values + inner_join(ligand_activities %>% select(test_ligand, all_of(metric)) %>% rename(observed_metric = metric), by = "test_ligand") %>% + # Calculate fraction of permutations with higher AUPR + mutate(null_is_larger = null_metric > observed_metric) %>% + summarise(total = sum(null_is_larger)) %>% + mutate(pval = total/n_iter) + +} + + #' @title Cut off outer quantiles and rescale to a [0, 1] range #' #' @description \code{scale_quantile} Cut off outer quantiles and rescale to a [0, 1] range @@ -1963,7 +2148,7 @@ nichenet_seuratobj_aggregate_cluster_de = function(seurat_obj, receiver_affected #' @description \code{get_lfc_celltype} Get log fold change of genes between two conditions in cell type of interest when using a Seurat single-cell object. #' #' @usage -#' get_lfc_celltype(celltype_oi, seurat_obj, condition_colname, condition_oi, condition_reference, celltype_col = "celltype", expression_pct = 0.10) +#' get_lfc_celltype(celltype_oi, seurat_obj, condition_colname, condition_oi, condition_reference, celltype_col = "celltype") #' #' #' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/. #' @param celltype_oi Name of celltype of interest. Should be present in the celltype metadata dataframe. @@ -1971,7 +2156,7 @@ nichenet_seuratobj_aggregate_cluster_de = function(seurat_obj, receiver_affected #' @param condition_oi Condition of interest. Should be a name present in the "condition_colname" column of the metadata. #' @param condition_reference The second condition (e.g. reference or steady-state condition). Should be a name present in the "condition_colname" column of the metadata. #' @param celltype_col Metadata colum name where the cell type identifier is stored. Default: "celltype". If this is NULL, the Idents() of the seurat object will be considered as your cell type identifier. -#' @param expression_pct To consider only genes if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10 +#' @param ... Additional arguments passed to \code{\link{FindMarkers}}. #' #' @return A tbl with the log fold change values of genes. Positive lfc values: higher in condition_oi compared to condition_reference. #' @@ -1986,7 +2171,7 @@ nichenet_seuratobj_aggregate_cluster_de = function(seurat_obj, receiver_affected #' } #' @export #' -get_lfc_celltype = function(celltype_oi, seurat_obj, condition_colname, condition_oi, condition_reference, celltype_col = "celltype", expression_pct = 0.10){ +get_lfc_celltype = function(celltype_oi, seurat_obj, condition_colname, condition_oi, condition_reference, celltype_col = "celltype", ...){ requireNamespace("Seurat") requireNamespace("dplyr") if(!is.null(celltype_col)){ @@ -1997,8 +2182,9 @@ get_lfc_celltype = function(celltype_oi, seurat_obj, condition_colname, conditio seuratObj_sender = subset(seurat_obj, idents = celltype_oi) } + seuratObj_sender = SetIdent(seuratObj_sender, value = seuratObj_sender[[condition_colname, drop=TRUE]]) - DE_table_sender = FindMarkers(object = seuratObj_sender, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = expression_pct, logfc.threshold = 0.05) %>% rownames_to_column("gene") + DE_table_sender = FindMarkers(object = seuratObj_sender, ident.1 = condition_oi, ident.2 = condition_reference, ...) %>% rownames_to_column("gene") SeuratV4 = c("avg_log2FC") %in% colnames(DE_table_sender) diff --git a/R/application_visualization.R b/R/application_visualization.R index 5544d17..9028bf4 100644 --- a/R/application_visualization.R +++ b/R/application_visualization.R @@ -11,6 +11,7 @@ #' @param top_n_regulators The number of top regulators that should be included in the ligand-target signaling network. Top regulators are regulators that score both high for being upstream of the target gene(s) and high for being downstream of the ligand. Default: 4. #' @param weighted_networks A list of two elements: lr_sig: a data frame/ tibble containg weighted ligand-receptor and signaling interactions (from, to, weight); and gr: a data frame/tibble containng weighted gene regulatory interactions (from, to, weight) #' @param ligands_position Indicate whether the ligands in the ligand-target matrix are in the rows ("rows") or columns ("cols"). Default: "cols". +#' @param minmax_scaling Indicate whether the weights of both dataframes should be min-max scaled between 0.75 and 1. Default: FALSE. #' #' @return A list containing 2 elements (sig and gr): the integrated weighted ligand-signaling and gene regulatory networks data frame / tibble format with columns: from, to, weight #' @@ -27,7 +28,7 @@ #' } #' @export #' -get_ligand_signaling_path = function(ligand_tf_matrix, ligands_all, targets_all, top_n_regulators = 4, weighted_networks, ligands_position = "cols"){ +get_ligand_signaling_path = function(ligand_tf_matrix, ligands_all, targets_all, top_n_regulators = 4, weighted_networks, ligands_position = "cols", minmax_scaling = FALSE){ if (!is.list(weighted_networks)) stop("weighted_networks must be a list object") @@ -70,6 +71,11 @@ get_ligand_signaling_path = function(ligand_tf_matrix, ligands_all, targets_all, tf_regulatory = weighted_networks$gr %>% filter(from %in% final_combined_df$TF & to %in% targets_all) %>% ungroup() %>% distinct() + if (minmax_scaling){ + tf_signaling <- tf_signaling %>% mutate(weight = ((weight-min(weight))/(max(weight)-min(weight))) + 0.75) + tf_regulatory <- tf_regulatory %>% mutate(weight = ((weight-min(weight))/(max(weight)-min(weight))) + 0.75) + } + return(list(sig = tf_signaling, gr = tf_regulatory)) } #' @title Get ligand-target signaling paths between ligand(s), receptors, and target gene(s) of interest @@ -344,7 +350,7 @@ make_heatmap_ggplot = function(matrix, y_name, x_name, y_axis = TRUE,x_axis = TR requireNamespace("dplyr") requireNamespace("ggplot2") - matrix_df_vis = matrix %>% data.frame() %>% rownames_to_column("y") %>% as_tibble() %>% gather(x,"score", -y) %>% mutate(y = factor(y, levels = rownames(matrix), ordered = TRUE), x = factor(x, levels = colnames(matrix), ordered = TRUE)) + matrix_df_vis = matrix %>% data.frame(check.names = FALSE) %>% rownames_to_column("y") %>% as_tibble() %>% gather(x,"score", -y) %>% mutate(y = factor(y, levels = rownames(matrix), ordered = TRUE), x = factor(x, levels = colnames(matrix), ordered = TRUE)) plot_object = matrix_df_vis %>% ggplot(aes(x,y,fill = score)) + geom_tile(color = "white", size = 0.5) + scale_fill_gradient(low = "whitesmoke", high = color) + theme_minimal() @@ -424,7 +430,7 @@ make_threecolor_heatmap_ggplot = function(matrix, y_name, x_name, y_axis = TRUE, requireNamespace("dplyr") requireNamespace("ggplot2") - matrix_df_vis = matrix %>% data.frame() %>% rownames_to_column("y") %>% as_tibble() %>% gather(x,"score", -y) %>% mutate(y = factor(y, levels = rownames(matrix), ordered = TRUE), x = factor(x, levels = colnames(matrix), ordered = TRUE)) + matrix_df_vis = matrix %>% data.frame(check.names = FALSE) %>% rownames_to_column("y") %>% as_tibble() %>% gather(x,"score", -y) %>% mutate(y = factor(y, levels = rownames(matrix), ordered = TRUE), x = factor(x, levels = colnames(matrix), ordered = TRUE)) plot_object = matrix_df_vis %>% ggplot(aes(x,y,fill = score)) + geom_tile(color = "white", size = 0.5) + scale_fill_gradient2(low = low_color, mid = mid_color,high = high_color, midpoint = mid) + theme_minimal() @@ -557,6 +563,7 @@ make_heatmap_bidir_lt_ggplot = function(matrix, y_name, x_name, y_axis = TRUE, x #' @param receptor_fill_colors A vector of the low and high colors to use for the receptor semicircle fill gradient (default: c("#FEE0D2", "#A50F15")) #' @param unranked_ligand_fill_colors A vector of the low and high colors to use for the unranked ligands when show_all_datapoints is TRUE (default: c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2))) #' @param unranked_receptor_fill_colors A vector of the low and high colors to use for the unkraed receptors when show_all_datapoints is TRUE (default: c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2))) +#' @param ... Additional arguments passed to \code{\link{ggplot2::theme}}. As there are often issues with the scales legend, it is recommended to change legend sizes and positions using this argument, i.e., \code{legend.key.height}, \code{legend.key.width}, \code{legend.title}, and \code{legend.text}. #' #' @return A ggplot object #' @@ -572,24 +579,29 @@ make_heatmap_bidir_lt_ggplot = function(matrix, y_name, x_name, y_axis = TRUE, x #' prior_table <- generate_prioritization_tables(processed_expr_table, processed_DE_table, ligand_activities, processed_condition_markers, prioritizing_weights) #' make_mushroom_plot(prior_table) #' +#' #' # Show only top 20, and write rankings on the plot #' make_mushroom_plot(prior_table, top_n = 20, show_rankings = TRUE) #' #' # Show all datapoints, and use true color range #' make_mushroom_plot(prior_table, show_all_datapoints = TRUE, true_color_range = TRUE) #' +#' #' # Change the size and color columns #' make_mushroom_plot(prior_table, size = "pct_expressed", color = "scaled_avg_exprs") #' } +#' +#' #' @export #' -make_mushroom_plot = function(prioritization_table, top_n = 30, show_rankings = FALSE, +make_mushroom_plot <- function(prioritization_table, top_n = 30, show_rankings = FALSE, show_all_datapoints = FALSE, true_color_range = FALSE, size = "scaled_avg_exprs", color = "scaled_lfc", ligand_fill_colors = c("#DEEBF7", "#08306B"), receptor_fill_colors = c("#FEE0D2", "#A50F15"), unranked_ligand_fill_colors = c(alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2)), - unranked_receptor_fill_colors = c( alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2))){ + unranked_receptor_fill_colors = c( alpha("#FFFFFF", alpha=0.2), alpha("#252525", alpha=0.2)), + ...){ size_ext <- c("ligand", "receptor"); color_ext <- c("ligand", "receptor") if (size == "pct_expressed") size_ext <- c("sender", "receiver") if (color == "pct_expressed") color_ext <- c("sender", "receiver") @@ -600,7 +612,7 @@ make_mushroom_plot = function(prioritization_table, top_n = 30, show_rankings = stop(paste(paste0("`", cols_to_use %>% .[!. %in% colnames(prioritization_table)], "`", collapse =", "), "column not in prioritization table")) } if(!is.logical(show_rankings) | length(show_rankings) != 1) - stop("show_rankings should be a TRUE or FALSE") + stop("show_rankings should be a TRUE or FALSE") if(!is.logical(show_all_datapoints) | length(show_all_datapoints) != 1) stop("show_all_datapoints should be a TRUE or FALSE") if(!is.logical(true_color_range) | length(true_color_range) != 1) @@ -645,14 +657,46 @@ make_mushroom_plot = function(prioritization_table, top_n = 30, show_rankings = # Rename size and color columns to be more human-readable keywords_adj <- c("LFC", "p-val", "product", "mean", "adjusted", "expression") %>% setNames(c("lfc", "pval", "prod", "avg", "adj", "exprs")) size_title <- sapply(stringr::str_split(size, "_")[[1]], function(k) ifelse(is.na(keywords_adj[k]), k, keywords_adj[k])) %>% - paste0(., collapse = " ") %>% R.utils::capitalize() + paste0(., collapse = " ") %>% stringr::str_replace("^\\w{1}", toupper) color_title <- sapply(stringr::str_split(color, "_")[[1]], function(k) ifelse(is.na(keywords_adj[k]), k, keywords_adj[k])) %>% - paste0(., collapse = " ") %>% R.utils::capitalize() + paste0(., collapse = " ") %>% stringr::str_replace("^\\w{1}", toupper) color_lims <- c(0,1) if (true_color_range) color_lims <- NULL scale <- 0.5 + + ncelltypes <- length(celltypes_vec) + n_interactions <- length(lr_interaction_vec) + legend2_df <- data.frame(values = c(0.25, 0.5, 0.75, 1), x=(ncelltypes+2.5):(ncelltypes+5.5), y=rep(floor(n_interactions/3), 4), start=-pi) + axis_rect <- data.frame(xmin=0, xmax=ncelltypes+1, ymin=0, ymax=n_interactions+1) + panel_grid_y <- data.frame(x = rep(seq(from = 0.5, to = ncelltypes+0.5, by = 1), each=2), + y = c(n_interactions+1, 0), group = rep(1:(ncelltypes+1), each=2)) + panel_grid_x <- data.frame(y = rep(seq(from = 0.5, to = n_interactions+0.5, by = 1), each=2), + x = c(ncelltypes+1, 0), group = rep(1:(n_interactions+1), each=2)) + + theme_args <- list(panel.grid.major = element_blank(), + legend.box = "horizontal", + panel.background = element_blank()) + + theme_args[names(list(...))] <- list(...) + + # Check if legend.title is in the extra arguments + # Multiply by ratio of 5/14: see https://stackoverflow.com/questions/25061822/ggplot-geom-text-font-size-control + scale_legend_title_size <- ifelse("legend.title" %in% names(theme_args), theme_args$legend.title$size*(5/14), GeomLabel$default_aes$size) + # Check if legend.text is in the extra arguments + scale_legend_text_size <- ifelse("legend.text" %in% names(theme_args), theme_args$legend.text$size*(5/14), GeomLabel$default_aes$size) + + # Check if legend.justification is in the extra arguments + if (!"legend.justification" %in% names(theme_args)) { + theme_args$legend.justification <- c(1, 0.7) + } + + # Check if legend.position is in the extra arguments + if (!"legend.position" %in% names(theme_args)){ + theme_args$legend.position <- c(1, 0.7) + } + p1 <- ggplot() + # Draw ligand semicircle geom_arc_bar(data = filtered_table %>% filter(type=="ligand", prioritization_rank <= top_n), @@ -661,6 +705,8 @@ make_mushroom_plot = function(prioritization_table, top_n = 30, show_rankings = color = "white") + scale_fill_gradient(low = ligand_fill_colors[1] , high=ligand_fill_colors[2] , limits=color_lims, oob=scales::squish, + n.breaks = 3, + guide = guide_colorbar(order = 1), name=paste0(color_title, " (", color_ext[1], ")") %>% str_wrap(width=15)) + # Create new fill scale for receptor semicircles new_scale_fill() + @@ -668,34 +714,57 @@ make_mushroom_plot = function(prioritization_table, top_n = 30, show_rankings = aes(x0 = x, y0 = y, r0 = 0, r = sqrt(size)*scale, start = start, end = start + pi, fill=color), color = "white") + - scale_fill_gradient(low = receptor_fill_colors[1], high=receptor_fill_colors[2] , limits=color_lims, oob=scales::squish, - name=paste0(color_title, " (", color_ext[2], ")") %>% str_wrap(width=15)) + + # Size legend + geom_arc_bar(data = legend2_df, aes(x0=x, y0=y, r0=0, r=sqrt(values)*scale, start=start, end=start+pi), fill="black") + + geom_rect(data = legend2_df, aes(xmin=x-0.5, xmax=x+0.5, ymin=y-0.5, ymax=y+0.5), color="gray90", fill=NA) + + geom_text(data = legend2_df, aes(label=values, x=x, y=y-0.6), vjust=1, size = scale_legend_text_size) + + geom_text(data = data.frame(x = (ncelltypes+4), y = floor(n_interactions/3)+1, + label = size_title %>% str_wrap(width=15)), + aes(x=x, y=y, label=label), size = scale_legend_title_size, vjust=0, lineheight = .75) + + # Panel grid + geom_line(data = panel_grid_y, aes(x=x, y=y, group=group), color = "gray90") + + geom_line(data = panel_grid_x, aes(x=x, y=y, group=group), color = "gray90") + + # Draw box to represent x and y "axis" + geom_rect(data = axis_rect, aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax), color = "black", fill = "transparent") + # Other plot information - scale_y_continuous(breaks=length(lr_interaction_vec):1, labels=names(lr_interaction_vec)) + - scale_x_continuous(breaks=1:length(celltypes_vec), labels=names(celltypes_vec), position="top") + + scale_fill_gradient(low = receptor_fill_colors[1], high=receptor_fill_colors[2] , limits=color_lims, oob=scales::squish, n.breaks = 3, + name=paste0(color_title, " (", color_ext[2], ")") %>% str_wrap(width=15), + guide = guide_colorbar(order = 2)) + + scale_y_continuous(breaks=n_interactions:1, labels=names(lr_interaction_vec), expand = expansion(add=c(0,0))) + + scale_x_continuous(breaks=1:ncelltypes, labels=names(celltypes_vec), position="top", expand = expansion(add=c(0,0))) + xlab("Sender cell types") + ylab("Ligand-receptor interaction") + coord_fixed() + - theme_bw() + - theme(panel.grid.major = element_blank(), - legend.box = "horizontal") - + do.call(theme, theme_args) # Add unranked ligand and receptor semicircles if requested if (show_all_datapoints){ + + # Limits will depend on true_color_range + unranked_ligand_lims <- c(0,1); unranked_receptor_lims <- c(0,1) + if (true_color_range){ + # Follow limits of the top_n lr pairs + unranked_ligand_lims <- filtered_table %>% filter(type=="ligand", prioritization_rank <= top_n) %>% + select(color) %>% range + unranked_receptor_lims <- filtered_table %>% filter(type=="receptor", prioritization_rank <= top_n) %>% + select(color) %>% range + } + p1 <- p1 + new_scale_fill() + geom_arc_bar(data = filtered_table %>% filter(type=="ligand", prioritization_rank > top_n), aes(x0 = x, y0 = y, r0 = 0, r = sqrt(size)*scale, start = start, end = start + pi, fill=color), color = "white") + scale_fill_gradient(low = unranked_ligand_fill_colors[1], high=unranked_ligand_fill_colors[2], - limits=color_lims, oob=scales::squish, guide = "none") + + limits=unranked_ligand_lims, oob = scales::oob_squish, + guide = "none") + new_scale_fill() + geom_arc_bar(data = filtered_table %>% filter(type=="receptor", prioritization_rank > top_n), aes(x0 = x, y0 = y, r0 = 0, r = sqrt(size)*scale, start = start, end = start + pi, fill=color), color = "white") + scale_fill_gradient(low=unranked_receptor_fill_colors[1], high=unranked_receptor_fill_colors[2], - limits=color_lims, oob=scales::squish, guide = "none") + limits=unranked_receptor_lims, oob = scales::oob_squish, + guide = "none") } # Add ranking numbers if requested @@ -704,33 +773,539 @@ make_mushroom_plot = function(prioritization_table, top_n = 30, show_rankings = aes(x=x, y=y, label=prioritization_rank)) } - legend1 <- ggpubr::as_ggplot(ggpubr::get_legend(p1)) - - # For the size legend, create a new plot - legend2 <- ggplot(data.frame(values = c(0.25, 0.5, 0.75, 1), x=1:4, y=1, start=-pi)) + - geom_rect(aes(xmin=x-0.5, xmax=x+0.5, ymin=y-0.5, ymax=y+0.5), color="gray80", fill=NA) + - geom_arc_bar(aes(x0=x, y0=y, r0=0, r=sqrt(values)*scale, start=start, end=start+pi), fill="black") + - geom_text(aes(label=values, x=x, y=y-0.6), vjust=1) + - labs(tag = size_title) + - scale_x_continuous(breaks = 1:4, labels=c(0.25, 0.5, 0.75, 1)) + - scale_y_continuous(limits=c(-0.5, 1.5)) + - labs(x="Percent expressed") + - coord_fixed() + theme_classic() + - theme(panel.background = element_blank(), - plot.background = element_blank(), - plot.margin = margin(0, 0, 10, 0), - plot.tag.position = "top", - plot.tag = element_text(margin=margin(0, 0, 5,0), size=10), - axis.text = element_blank(), - axis.line = element_blank(), - axis.ticks = element_blank(), - axis.title = element_blank()) - - # Combine the two legends - legends <- cowplot::plot_grid(NULL, legend1, legend2, NULL, nrow=4, scale=c(1,1,0.5,1), - rel_heights = c(2, 1, 2, 2), align = "v", axis="tb") - cowplot::plot_grid(p1 + theme(legend.position="none"), legends) + p1 +} + + +## Circos plot functions +#' @title Assign ligands to cell types +#' @usage assign_ligands_to_celltype(seuratObj, ligands, celltype_col, func.agg=mean, func.assign=function(x) mean(x) + sd(x), slot="data", condition_oi=NULL, condition_col=NULL) +#' @description Assign ligands to a sender cell type, based on the strongest expressing cell type of that ligand. Ligands are only assigned to a cell type if that cell type is the only one to show an expression that is higher than the average + SD. Otherwise, it is assigned to "General". +#' @param seuratObj Seurat object +#' @param ligands Vector of ligands to assign to cell types +#' @param celltype_col Metadata column name in the Seurat object that contains the cell type information +#' @param func.agg Function to use to aggregate the expression of a ligand across all cells in a cell type (default = mean) +#' @param func.assign Function to use to assign a ligand to a cell type (default = mean + SD) +#' @param slot Slot in the Seurat object to use (default = "data"). If "data", the normalized counts are first exponentiated before aggregation is performed +#' @param condition_oi Condition of interest to subset the Seurat object (default = NULL) +#' @param condition_col Metadata column name in the Seurat object that contains the condition of interest (default = NULL) +#' @return A data frame of two columns, the cell type the ligand has been assigned to (\code{ligand_type}) and the ligand name (\code{ligand}) +#' @export +#' @examples \dontrun{ +#' assign_ligands_to_celltype(seuratObj = seuratObj, ligands = best_upstream_ligands[1:20], +#' celltype_col = "celltype", func.agg = mean, func.assign = function(x) {mean(x)+sd(x)}, +#' slot = "data", condition_oi = "LCMV", condition_col = "aggregate") +#' } +#' +assign_ligands_to_celltype <- function(seuratObj, ligands, celltype_col, func.agg = mean, func.assign = function(x) {mean(x)+sd(x)}, + condition_oi = NULL, condition_col = NULL, slot = "data") { + # Check that if condition_oi is given, then so is condition_oi, and vice versa + if (any(!is.na(condition_col), !is.na(condition_oi)) & !all(!is.na(condition_col), !is.na(condition_oi))){ + stop("Please input both condition_colname and condition_oi") + } + + # Check that all ligands are in the seurat object + if (any(!ligands %in% rownames(seuratObj))){ + stop("Not all ligands are in the Seurat object") + } + + seuratObj_subset <- subset(seuratObj, features = ligands) + + # Calculate average ligand expression in sender cells + if (!is.null(condition_oi)){ + seuratObj_subset <- seuratObj_subset[, seuratObj_subset[[condition_col]] == condition_oi ] + } + + avg_expression_ligands <- lapply(unique(seuratObj_subset$celltype), function (celltype) { + if (slot == "data"){ + # Exponentiate-1 and calculate in non-log space + expm1(GetAssayData(seuratObj_subset[, seuratObj_subset[[celltype_col]] == celltype], slot = slot)) %>% + apply(1, func.agg) + + } else { + apply(GetAssayData(seuratObj_subset[, seuratObj_subset[[celltype_col]] == celltype], slot = slot), 1, func.agg) + + } + }) %>% setNames(unique(seuratObj_subset$celltype)) %>% + do.call(cbind, .) %>% + set_rownames(ligands) + + sender_ligand_assignment <- avg_expression_ligands %>% apply(1, function(ligand_expression){ + ligand_expression > func.assign(ligand_expression) + }) %>% t() + sender_ligand_assignment <- sender_ligand_assignment %>% apply(2, function(x){x[x == TRUE]}) %>% purrr::keep(function(x){length(x) > 0}) + + all_assigned_ligands = sender_ligand_assignment %>% lapply(function(x){names(x)}) %>% unlist() + unique_ligands = all_assigned_ligands %>% table() %>% .[. == 1] %>% names() + general_ligands = ligands %>% setdiff(unique_ligands) + + ligand_type_indication_df <- lapply(names(sender_ligand_assignment), function(sender) { + unique_ligands_sender <- names(sender_ligand_assignment[[sender]]) %>% setdiff(general_ligands) + data.frame(ligand_type = sender, ligand = unique_ligands_sender) + }) %>% bind_rows() + + ligand_type_indication_df <- bind_rows(ligand_type_indication_df, + data.frame(ligand_type = "General", ligand = general_ligands)) + + return(ligand_type_indication_df) +} + +#' @title Get ligand-target links of interest +#' @usage get_ligand_target_links_oi(ligand_type_indication_df, active_ligand_target_links_df, cutoff = 0.40) +#' @description Filter ligand-target links based on a cutoff +#' @param ligand_type_indication_df Dataframe with column names \code{ligand_type} and \code{ligand}, from the function \code{\link{assign_ligands_to_celltype}} +#' @param active_ligand_target_links_df Dataframe with weighted ligand-target links from the function \code{\link{get_ligand_target_links}}, and an additional column \code{target_type} that indicates the grouping of target genes +#' @param cutoff Quantile to filter ligand-target links (default = 0.40, meaning 40\% of the lowest weighted ligand-target links are removed) +#' @return A dataframe with ligand-target links with weights above a certain cutoff. This dataframe also contains the attribute \code{cutoff_include_all_ligands}, which is the cutoff value of regulatory potential used at \code{cutoff} quantile. +#' @export +#' @examples \dontrun{ +#' active_ligand_target_links_df <- lapply(best_upstream_ligands, get_weighted_ligand_target_links, +#' geneset = geneset_oi, +#' ligand_target_matrix = ligand_target_matrix, +#' n = 200) +#' active_ligand_target_links_df <- drop_na(bind_rows(active_ligand_target_links_df)) +#' ligand_type_indication_df <- assign_ligands_to_celltype(seuratObj = seuratObj, ligands = best_upstream_ligands[1:20]) +#' circos_links <- get_ligand_target_links_oi(ligand_type_indication_df, +#' active_ligand_target_links_df %>% mutate(target_type = "LCMV-DE"), +#' cutoff = 0.40) +#' attr(circos_links, "cutoff_include_all_ligands") # This is the cutoff value of regulatory potential used +#' } +#' +get_ligand_target_links_oi <- function(ligand_type_indication_df, active_ligand_target_links_df, cutoff = 0.40){ + # Check that ligand_type_indication_df has the correct colnames + if (!all(c("ligand_type", "ligand") %in% colnames(ligand_type_indication_df))){ + stop("ligand_type_indication_df must have columns ligand_type and ligand") + } + + # Check that active_ligand_target_links_df has the correct colnames + if (!all(c("ligand", "target", "weight", "target_type") %in% colnames(active_ligand_target_links_df))){ + stop("active_ligand_target_links_df must have columns ligand, target, weight, and target_type") + } + + # Check that cutoff is between 0 and 1 + if (cutoff < 0 | cutoff > 1){ + stop("cutoff must be between 0 and 1") + } + + active_ligand_target_links_df <- active_ligand_target_links_df %>% inner_join(ligand_type_indication_df) + cutoff_include_all_ligands <- active_ligand_target_links_df$weight %>% quantile(cutoff) + active_ligand_target_links_df_circos <- active_ligand_target_links_df %>% filter(weight > cutoff_include_all_ligands) + ligands_to_remove <- setdiff(active_ligand_target_links_df$ligand %>% unique(), active_ligand_target_links_df_circos$ligand %>% unique()) + targets_to_remove <- setdiff(active_ligand_target_links_df$target %>% unique(), active_ligand_target_links_df_circos$target %>% unique()) + circos_links <- active_ligand_target_links_df %>% filter(!target %in% targets_to_remove & !ligand %in% ligands_to_remove) + + # Add this as an attribute + attr(circos_links, "cutoff_include_all_ligands") <- cutoff_include_all_ligands + return(circos_links) } +#' @title Prepare circos visualization +#' @usage prepare_circos_visualization(circos_links, ligand_colors = NULL, target_colors = NULL, widths = NULL, celltype_order = NULL) +#' @description Prepare the data for the circos visualization by incorporating the colors and order of the links, as well as gaps between different cell types +#' @param circos_links Dataframe from the function \code{\link{get_ligand_target_links_oi}} containing weighted ligand-target links, cell type expressing the ligand, and target gene goruping +#' @param ligand_colors Named vector of colors for each cell type (default: NULL, where colors follow the ggplot default color scheme) +#' @param target_colors Named vector of colors for each target gene grouping (default: NULL, where colors follow the ggplot default color scheme) +#' @param widths Named list of widths for the different types groupings, including: +#' \itemize{ +#' \item width_same_cell_same_ligand_type: Width of the links between ligands of the same cell type (default: 0.5) +#' \item width_different_cell: Width of the links between different cell types, or between different target gene groups (default: 6) +#' \item width_ligand_target: Width of the links between ligands and targets (default: 15) +#' \item width_same_cell_same_target_type: Width of the links between target genes of the same group (default: 0.5) +#' } +#' @param celltype_order Order of the cell types (default: NULL, where cell types are ordered alphabetically, followed by "General"). Cell types are then drawn counter-clockwise in the circos plot. +#' @return A list of four objects, including: +#' \itemize{ +#' \item circos_links: Dataframe of weighted ligand-target links +#' \item ligand_colors: Named vector of ligands and their colors +#' \item order: Vector of order of the ligands and target genes +#' \item gaps: Vector of gaps between the different groupings +#' } +#' @examples \dontrun{ +#' celltype_order <- c("General", "NK", "B", "DC", "Mono") +#' ligand_colors <- c("General" = "lawngreen", "NK" = "royalblue", "B" = "darkgreen", "Mono" = "violet", "DC" = "steelblue2") +#' target_colors <- c("LCMV-DE" = "tomato") +#' vis_circos_obj <- prepare_circos_visualization(circos_links, ligand_colors, target_colors, celltype_order = celltype_order) +#' } +#' +#' @export +prepare_circos_visualization <- function(circos_links, ligand_colors = NULL, target_colors = NULL, widths = NULL, celltype_order = NULL) { + # Check that circos_links has the correct colnames + if (!all(c("ligand", "target", "weight", "target_type", "ligand_type") %in% colnames(circos_links))){ + stop("circos_links must have columns ligand, target, weight, target_type, and ligand_type") + } + + # If ligand_colors and/or target_colors is NULL, set to default ggplot colors (equally spaced colors around the color wheel) + if (is.null(ligand_colors) | is.null(target_colors)){ + n_ligands <- is.null(ligand_colors)*length(unique(circos_links$ligand_type)) + n_targets <- is.null(target_colors)*length(unique(circos_links$target_type)) + n_total <- n_ligands + n_targets + hues <- seq(15, 375, length = n_total + 1) + if (is.null(target_colors)){ + target_colors <- hcl(h = hues, l = 65, c = 100)[(n_ligands+1):n_total] + names(target_colors) <- unique(circos_links$target_type) + } + + if (is.null(ligand_colors)){ + ligand_colors <- hcl(h = hues, l = 65, c = 100)[1:n_ligands] + names(ligand_colors) <- unique(circos_links$ligand_type) + } + } + + # Check that ligand colors contains all ligand types + if (!all(unique(circos_links$ligand_type) %in% names(ligand_colors))){ + stop("ligand_colors must contain all cell types in circos_links$ligand_type") + } + + # If ligand colors contain additional cell types, intersect + if (length(setdiff(names(ligand_colors), unique(circos_links$ligand_type))) > 0){ + warning("ligand_colors contains additional cell types not in circos_links$ligand_type, these will be removed") + ligand_colors <- ligand_colors %>% .[names(.) %in% unique(circos_links$ligand_type)] + } + + # Check that target colors contains all target types + if (!all(unique(circos_links$target_type) %in% names(target_colors))){ + stop("target_colors must contain all target groupings in circos_links$target_type") + } + + # If target colors contain additional target types, intersect + if (length(setdiff(names(target_colors), unique(circos_links$target_type))) > 0){ + warning("target_colors contains additional target types not in circos_links$target_type, these will be removed") + target_colors <- target_colors %>% .[names(.) %in% unique(circos_links$target_type)] + } + + # Check that celltype_order contains all cell types + if (!is.null(celltype_order) & !all(unique(circos_links$ligand_type) %in% celltype_order)){ + stop("celltype_order must contain all cell types in circos_links$ligand_type") + } + + # If celltype_order contains additional cell types, intersect + if (!is.null(celltype_order) & length(setdiff(celltype_order, unique(circos_links$ligand_type))) > 0){ + warning("celltype_order contains additional cell types not in circos_links$ligand_type, these will be removed") + celltype_order <- celltype_order %>% .[. %in% unique(circos_links$ligand_type)] + } + + # If width is null, set default widths + if (is.null(widths)){ + widths <- list(width_same_cell_same_ligand_type = 0.5, + width_different_cell = 6, + width_ligand_target = 15, + width_same_cell_same_target_type = 0.5) + } + + # Check that widths contains all widths + if (!all(c("width_same_cell_same_ligand_type", "width_different_cell", "width_ligand_target", "width_same_cell_same_target_type") %in% names(widths))){ + stop("widths must contain all four width names") + } + + # Check that all widths are numeric + if (!all(is.numeric(unlist(widths)))){ + stop("all widths must be numeric") + } + + + # give each segment of ligands and targets a specific color and order + grid_col_tbl_ligand <- tibble(ligand_type = ligand_colors %>% names(), color_ligand_type = ligand_colors) + grid_col_tbl_target <- tibble(target_type = target_colors %>% names(), color_target_type = target_colors) + + circos_links <- circos_links %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as target! + circos_links <- circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_target) + links_circle <- circos_links %>% select(ligand, target, weight) + + ligand_color <- circos_links %>% distinct(ligand,color_ligand_type) + grid_ligand_color <- ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) + target_color <- circos_links %>% distinct(target,color_target_type) + grid_target_color <- target_color$color_target_type %>% set_names(target_color$target) + + grid_col <- c(grid_ligand_color, grid_target_color) + + # Prepare the circos visualization: order ligands and targets + target_type_order <- circos_links %>% arrange(target_type) %>% pull(target_type) %>% unique() + target_order <- circos_links %>% arrange(target_type, target) %>% pull(target) %>% unique() + + if (is.null(celltype_order)){ + circos_links_arranged <- circos_links %>% mutate( + ligand_type_order = case_when( + ligand_type == "General" ~ 1, + TRUE ~ 2 + )) %>% arrange(ligand_type_order, desc(ligand_type)) %>% + select(-ligand_type_order) + ligand_type_order <- circos_links_arranged %>% pull(ligand_type) %>% unique + ligand_order <- circos_links_arranged %>% pull(ligand) %>% unique + } else { + ligand_type_order <- celltype_order + # Arrange circos_links according to celltype_order + ligand_order <- circos_links %>% arrange(factor(ligand_type, levels = celltype_order), ligand) %>% pull(ligand) %>% unique + + } + + order <- c(ligand_order,target_order) + + # Prepare the circos visualization: define the gaps between the different segments + gaps_sender_cell_types <- unlist(lapply(seq_along(ligand_type_order), function(i) { + c(rep(widths$width_same_cell_same_ligand_type, + times = (circos_links %>% filter(ligand_type == ligand_type_order[i]) %>% distinct(ligand) %>% nrow()-1)), + if (i < length(ligand_type_order)) widths$width_different_cell) + })) + + gaps_target_types <- unlist(lapply(seq_along(target_type_order), function(i) { + c(rep(widths$width_same_cell_same_target_type, + times = (circos_links %>% filter(target_type == target_type_order[i]) %>% distinct(target) %>% nrow()-1)), + if (i < length(target_type_order)) widths$width_different_cell) + })) + + gaps <- c( + gaps_sender_cell_types, + widths$width_ligand_target, + gaps_target_types, + widths$width_ligand_target + ) + + return(list(links_circle = links_circle, ligand_colors = grid_col, order=order, gaps = gaps)) + +} + +#' @title Draw a circos plot +#' @usage make_circos_plot(vis_circos_obj, transparency = FALSE, args.circos.text = list(), ...) +#' @description Draw a circos plot +#' @param vis_circos_obj Object returned by \code{\link{prepare_circos_visualization}} +#' @param transparency Logical indicating whether the transparency of the links will correspond to the ligand-target potential score (default: FALSE) +#' @param args.circos.text List of arguments to pass to \code{\link{circos.text}} (by default, the text size is set to 1) +#' @param ... Additional arguments to pass to \code{\link{chordDiagram}} +#' @return A circos plot +#' @export +#' @examples +#' \dontrun{ +#' # Default +#' make_circos_plot(vis_circos_obj, transparency = FALSE) +#' +#' # Transparency +#' make_circos_plot(vis_circos_obj, transparency = TRUE) +#' +#' # Make text smaller +#' make_circos_plot(vis_circos_obj, transparency = TRUE, args.circos.text = list(cex = 0.5)) +#' +#' # Don't sort links of each ligand based on widths (not recommended) +#' make_circos_plot(vis_circos_obj, transparency = TRUE, args.circos.text = list(cex = 0.5), link.sort = FALSE) +#' } +make_circos_plot <- function(vis_circos_obj, transparency = FALSE, args.circos.text = list(), ...){ + # Check that transparency is a logical + if (!is.logical(transparency)) stop("transparency should be a logical") + + # Check that vis_circos_obj contains the required elements + if (!all(c("links_circle", "ligand_colors", "order", "gaps") %in% names(vis_circos_obj))) stop("vis_circos_obj should contain the elements 'links_circle', 'ligand_colors', 'order' and 'gaps'") + + # Check that all elements of args.circos.text is part of the arguments of circos.text + if (!all(names(args.circos.text) %in% names(formals(circos.text)))) { + warning("args.circos.text contain element(s) that are not part of the arguments of circos.text") + } + + # Check that all elements of ... is part of the arguments of chordDiagram + if (!all(names(list(...)) %in% names(formals(chordDiagram)))) { + warning("extra arguments contain element(s) that are not part of the arguments of chordDiagram") + } + + # give the option that links in the circos plot will be transparant ~ ligand-target potential score + if (!transparency){ + transparency_val <- 0 + } else if (transparency) { + transparency_val <- vis_circos_obj$links_circle %>% mutate(weight =(weight-min(weight))/(max(weight)-min(weight))) %>% mutate(transparency = 1-weight) %>% .$transparency + } + + default_params <- list(x = vis_circos_obj$links_circle, + order=vis_circos_obj$order, + grid.col = vis_circos_obj$ligand_colors, + transparency = transparency_val, + directional = 1, link.sort = TRUE, + link.decreasing = FALSE, + diffHeight = 0.005, + direction.type = c("diffHeight", "arrows"), + link.arr.type = "big.arrow", + link.visible = vis_circos_obj$links_circle$weight >= attr(vis_circos_obj$links_circle, "cutoff_include_all_ligands"), + annotationTrack = "grid", + preAllocateTracks = list(track.height = 0.075) + ) + + # Replace this with user arguments + default_params[names(list(...))] = list(...) + + circos_text_default_params <- list( + facing = "clockwise", + niceFacing = TRUE, + adj = c(0, 0.55), + cex = 1 + ) + circos_text_default_params[names(args.circos.text)] <- args.circos.text + + # Only the widths of the blocks that indicate each target gene is proportional the ligand-target regulatory potential (~prior knowledge supporting the regulatory interaction). + circos.par(gap.degree = vis_circos_obj$gaps) + do.call(chordDiagram, default_params) + + # we go back to the first track and customize sector labels + circos.track(track.index = 1, panel.fun = function(x, y) { + do.call(circos.text, c(list(x=CELL_META$xcenter, y=CELL_META$ylim[1], label=CELL_META$sector.index), circos_text_default_params) + ) + }, bg.border = NA) + circos.clear() +} + +#' @title Make a line plot +#' @usage make_line_plot(ligand_activities, potential_ligands, ranking_range = c(1, 20), agnostic_color = "tomato", focused_color = "black", tied_color = "gray75", inset_scale = 1) +#' @description Make a line plot comparing the ranking of ligands based on their activities in the sender-agnostic and sender-focused approaches +#' @param ligand_activities Dataframe containing the ligand activities from the sender-agnostic approach +#' @param potential_ligands Character vector containing the ligands that are expressed in the sender cell type (i.e. the ligands that are used in the sender-focused approach) +#' @param ranking_range Numeric vector of length 2 indicating the range of the rankings to be displayed (default: c(1, 20)) +#' @param agnostic_color Color representing ligands only inthe sender-agnostic approach (default: "tomato") +#' @param focused_color Color representing expressed ligands from the sender-focused approach (default: "black") +#' @param tied_color Color to shade ligands that are tied in the same rank (default: "gray75") +#' @param inset_scale Numeric value indicating the size of the points and text in the inset (default: 1) +#' @return A ggplot object showing the distribution of sender-focused ligands, as well as a line plot inset comparing the rankings between the two approaches +#' @examples \dontrun{ +#' # Default +#' make_line_plot(ligand_activities, potential_ligands) +#' } +#' @export + +make_line_plot <- function(ligand_activities, potential_ligands, ranking_range = c(1, 20), + agnostic_color = "tomato", focused_color = "black", tied_color = "gray75", + inset_scale = 1) { + + inset_text_size <- ggplot2::GeomLabel$default_aes$size*inset_scale + axis_text_size <- ggplot2::GeomLabel$default_aes$size*0.75*inset_scale + axis_title_size <- ggplot2::GeomLabel$default_aes$size*inset_scale + point_size <- ggplot2::GeomPoint$default_aes$size*inset_scale + segment_linewidth <- ggplot2::GeomSegment$default_aes$linewidth*inset_scale + nudge_x <- 0.05/inset_scale + + # Check if all potential ligands are in ligand_activities + if (!all(potential_ligands %in% ligand_activities$test_ligand)) stop("Not all potential ligands are in ligand_activities") + + # Check if ranking_range is small -> large + if (ranking_range[1] >= ranking_range[2]) stop("Starting range should be smaller than ending range") + + # Add rank to ligand_activities if it doesn't exist + if (!"rank" %in% names(ligand_activities)) ligand_activities <- ligand_activities %>% mutate(rank = rank(desc(aupr_corrected))) + + # x position of "sender-agnostic" ligands + agnostic_x <- 3.25 + focused_x <- agnostic_x+(1.5*inset_scale) + + # Create dataframe of the two approaches + rankings_df <- bind_rows(ligand_activities %>% select(test_ligand, rank) %>% mutate(type = "agnostic"), + ligand_activities %>% filter(test_ligand %in% potential_ligands) %>% + select(test_ligand, rank) %>% mutate(type = "focused")) %>% + group_by(type) %>% mutate(new_rank = 1:n(), + x = case_when(type == "agnostic" ~ agnostic_x, + type == "focused" ~ focused_x)) %>% + # Ligands that are expressed + group_by(test_ligand) %>% mutate(expressed = (n() > 1)) %>% ungroup() + + # Define some variables + start_n <- ranking_range[1] + end_n <- ranking_range[2] + n_ligands <- max(rankings_df$new_rank) + margin <- 1/10 # Leave 1/10 of plot empty at top and bottom + by_n <- ((n_ligands*margin*9)-(n_ligands*margin))/(end_n-start_n) # Space between each rank + #cutoff <- by_n+((end_n-start_n+0.5)*by_n) # Doesn't work for all cases + + # Set index to 1 for the start_n ligand + rankings_df <- rankings_df %>% + group_by(type) %>% + mutate(index = (-start_n+2):(n()-start_n+1), + y = (n_ligands*margin)+(by_n*(index-1))) + + cutoff <- (rankings_df %>% filter(new_rank == end_n, type == "agnostic") %>% pull(y)) + (by_n*0.25) + + # Line segments that go beyond the inset + line_df <- rankings_df %>% filter(expressed) %>% select(-c(rank, new_rank, index, expressed)) %>% + pivot_wider(names_from = type, values_from = c(x, y)) %>% + rename(x1 = x_agnostic, y1 = y_agnostic, x2 = x_focused, y2 = y_focused) %>% + # Use equation of a line to find the x value at the cutoff + # Different lines for the top and bottom cutoff + mutate(m = (y2-y1)/(x2-x1), x0 = case_when(y2 > by_n ~ ((cutoff-y1)/m)+x1, + y2 <= by_n ~ (((n_ligands*margin)-y1)/m)+x1)) + + # Highlight ties + ties_df <- rankings_df %>% group_by(type, rank) %>% + mutate(ties = n() > 1) %>% filter(ties == TRUE) %>% + ungroup() %>% group_split(type) %>% lapply(., function(group) { + group %>% split(f = .$rank) %>% + sapply(., function (k) data.frame(range(k$y))) %>% bind_cols %>% t() %>% data.frame() %>% + `colnames<-`(c("ystart", "yend")) %>% `rownames<-`(NULL) %>% + mutate(xstart = case_when(unique(group$type) == "agnostic" ~ agnostic_x, + unique(group$type) == "focused" ~ focused_x), + xend = xstart) + }) %>% bind_rows() %>% + # Clip the lines to the cutoff + filter(ystart < cutoff, yend > by_n) %>% mutate(yend = case_when(yend > cutoff ~ cutoff, + TRUE ~ yend), + ystart = case_when(ystart <= (n_ligands*margin) ~ (n_ligands*margin)-(by_n*0.5), + TRUE ~ ystart)) + + # Subset the dataframe to the range of interest + rankings_df_subset <- rankings_df %>% filter(new_rank <= end_n, new_rank >= start_n) + + ggplot() + + # BAR PLOT + # Base + expressed ligands drawn as line segments + geom_rect(aes(ymin=1, ymax=n_ligands, xmin=0.5, xmax=1.5), fill = agnostic_color) + + geom_segment(data = rankings_df %>% filter(type == "agnostic", expressed), + aes(y=new_rank, yend=new_rank, x=0.5, xend=1.5), color = focused_color, linewidth = segment_linewidth) + + # y-axis + ticks + geom_segment(aes(y=0, yend=max(labeling::extended(0, n_ligands, 5)), x=0.3, xend=0.3)) + + geom_segment(data = (axis_df <- data.frame(x=0.3, xend=0.25, y=labeling::extended(0, n_ligands, 5), + yend=labeling::extended(0, n_ligands, 5))), + aes(y=y, yend=yend, x=x, xend=xend)) + + # y-axis ticklabels + title + geom_text(data=axis_df, aes(x=xend-0.05, y=y, label=y, hjust=1), size=axis_text_size) + + geom_text(aes(x=0, y=n_ligands/2, label="Ligand rankings", vjust=-1/inset_scale), angle=90, size=axis_title_size) + + # Title + #geom_text(aes(x=1, y=0, label = "Distribution of expressed ligands\nacross all sender-agnostic ligands"), nudge_y=by_n) + + ggtitle("Distribution of expressed ligands\nacross all sender-agnostic ligands") + + # ELBOW CONNECTORS + # Top, vertical line, bottom, horizontal line connecting to inset + geom_segment(data = data.frame(x = c(1.6, 1.65, 1.65, 1.65), + xend = c(1.65, 1.65, 1.60, agnostic_x-1.25), + y=c(min(rankings_df_subset$new_rank), min(rankings_df_subset$new_rank), max(rankings_df_subset$new_rank), ((end_n-start_n)/2)+start_n-1), + yend=c(min(rankings_df_subset$new_rank), max(rankings_df_subset$new_rank), max(rankings_df_subset$new_rank), ((end_n-start_n)/2)+start_n-1)), + aes(x=x, xend=xend, y=y, yend=yend)) + + # LINE PLOT + # Ties + geom_segment(data = ties_df, aes(x=xstart, y=ystart, xend=xend, yend=yend), + color = tied_color, linewidth=3, lineend="round") + + # Points + geom_point(data = rankings_df_subset, aes(x=x, y=y, color = expressed), size = point_size) + + # Line segment from focused -> agnostic + geom_segment(data = line_df %>% filter(x0 > agnostic_x, y2 < cutoff, y2 >= (n_ligands*margin)), aes(x=x2, y=y2, xend=x0, yend=cutoff), linewidth = segment_linewidth) + + # Line segment from agnostic -> focused + geom_segment(data = line_df %>% filter(x0 > agnostic_x, y1 < cutoff, y1 >= (n_ligands*margin)), aes(x=x1, y=y1, xend=x0, yend=(n_ligands*margin)), linewidth = segment_linewidth) + + # Line segment within range + geom_line(data = rankings_df_subset, aes(x=x, y=y, group = test_ligand), linewidth = segment_linewidth) + + # Ligand names + geom_text(data = rankings_df_subset %>% filter(type == "agnostic"), aes(x=x, y=y, label = test_ligand, hjust = "right", color = expressed), nudge_x = -nudge_x, size=inset_text_size) + + geom_text(data = rankings_df_subset %>% filter(type == "focused"), aes(x=x, y=y, label = test_ligand, hjust = "left", color = expressed), nudge_x = nudge_x, size=inset_text_size) + + # Ranking labels + geom_text(data = rankings_df_subset, aes(x=agnostic_x-0.75, y=y, label=new_rank), size=inset_text_size) + + # Outer rectangle + geom_rect(aes(ymin=0, ymax=n_ligands*(margin*19/2), xmin=agnostic_x-1.25, xmax = agnostic_x+2.25), fill=NA, color="black") + + # Heading + geom_text(data = data.frame(x = c(agnostic_x-0.75, agnostic_x, focused_x), + y = n_ligands*(margin/2), label = c("Rank", "Agnostic", "Focused")), + aes(x=x, y=y, label=label), size=inset_text_size) + + # PLOT SETTINGS + scale_color_manual(values = c("TRUE" = focused_color, "FALSE" = agnostic_color), breaks=c(TRUE, FALSE), labels = c("Only agnostic", "Tied")) + + scale_y_reverse() + + xlim(0, agnostic_x+2.5) + + labs(y = "Ligand rankings") + + guides(color = guide_legend(override.aes = list(shape = c(19, 15), size = c(2, 4), color = c(agnostic_color, tied_color)))) + + theme_classic() + + theme(axis.title = element_blank(), axis.text = element_blank(), + axis.line = element_blank(), axis.ticks = element_blank(), + legend.title = element_blank(), legend.direction = "horizontal", + legend.position = c(0.5, 0.05), + legend.background = element_blank(), + legend.text = element_text(size = 12), + plot.title = element_text(hjust=0.11, margin=margin(0, 0, -20, 0))) + +} diff --git a/R/differential_nichenet_plotting.R b/R/differential_nichenet_plotting.R index 47356dd..3b3bbb8 100644 --- a/R/differential_nichenet_plotting.R +++ b/R/differential_nichenet_plotting.R @@ -722,9 +722,6 @@ make_ligand_receptor_lfc_spatial_plot = function(receiver_oi, prioritized_tbl_oi #' #' @description \code{make_circos_lr} Plot the prioritized ligand-receptor pairs in a circos plot (via the circlize package) #' -#' @usage -#' make_circos_lr(prioritized_tbl_oi, colors_sender, colors_receiver, cutoff = 0, scale = FALSE, transparency = NULL, circos_type = "normal", border = TRUE) -#' #' @param prioritized_tbl_oi Dataframe with the ligand-receptor interactions that should be visualized #' @param colors_sender Named character vector giving the colors of each sender cell type #' @param colors_receiver Named character vector giving the colors of each receiver cell type @@ -733,6 +730,7 @@ make_ligand_receptor_lfc_spatial_plot = function(receiver_oi, prioritized_tbl_oi #' @param transparency Vector of transparency values of the links or NULL, in that case this will be calculated automatically. Default: NULL. #' @param circos_type "normal" or "arrow". Default: "normal". #' @param border Border to arrows or not in `chordDiagram`? (Default: TRUE) +#' @param separate_legend return plot and legend as separate objects? (Default: FALSE) #' #' @return List containing the circos plot and the legend #' @@ -747,7 +745,8 @@ make_ligand_receptor_lfc_spatial_plot = function(receiver_oi, prioritized_tbl_oi #' #' @export #' -make_circos_lr= function(prioritized_tbl_oi, colors_sender, colors_receiver, cutoff = 0, scale = FALSE, transparency = NULL, circos_type = "normal", border = TRUE){ +make_circos_lr= function(prioritized_tbl_oi, colors_sender, colors_receiver, cutoff = 0, scale = FALSE, transparency = NULL, circos_type = "normal", border = TRUE, + separate_legend = FALSE){ requireNamespace("dplyr") requireNamespace("ggplot2") @@ -773,6 +772,10 @@ make_circos_lr= function(prioritized_tbl_oi, colors_sender, colors_receiver, cut #circos_links = circos_links_oi %>% dplyr::rename(weight = prioritization_score) %>% dplyr::mutate(ligand = paste(sender, ligand, sep = "_"), receptor = paste(receptor, receiver, sep = "_")) + if (!"ligand_receptor" %in% colnames(circos_links)) { + circos_links = circos_links %>% dplyr::mutate(ligand_receptor = paste(ligand, receptor, sep = "--")) + } + df = circos_links %>% mutate(ligand_receptor_sender_receiver = paste0(sender, receiver, ligand_receptor)) ligand.uni = unique(df$ligand) @@ -955,24 +958,28 @@ make_circos_lr= function(prioritized_tbl_oi, colors_sender, colors_receiver, cut plot(NULL ,xaxt='n',yaxt='n',bty='n',ylab='',xlab='', xlim=0:1, ylim=0:1) # grid_col_all = c(grid_col_receptor, grid_col_ligand) - legend = ComplexHeatmap::Legend(at = prioritized_tbl_oi$receiver %>% unique() %>% sort(), + legend_receiver = ComplexHeatmap::Legend(at = prioritized_tbl_oi$receiver %>% unique() %>% sort(), type = "grid", legend_gp = grid::gpar(fill = grid_col_receptor[prioritized_tbl_oi$receiver %>% unique() %>% sort()]), title_position = "topleft", title = "Receiver") - ComplexHeatmap::draw(legend, just = c("left", "bottom")) - legend = ComplexHeatmap::Legend(at = prioritized_tbl_oi$sender %>% unique() %>% sort(), + legend_sender = ComplexHeatmap::Legend(at = prioritized_tbl_oi$sender %>% unique() %>% sort(), type = "grid", legend_gp = grid::gpar(fill = grid_col_ligand[prioritized_tbl_oi$sender %>% unique() %>% sort()]), title_position = "topleft", title = "Sender") - ComplexHeatmap::draw(legend, just = c("left", "top")) - p_legend = grDevices::recordPlot() + circos_legend_sender <- grid::grid.grabExpr(ComplexHeatmap::draw(legend_sender)) + circos_legend_receiver <- grid::grid.grabExpr(ComplexHeatmap::draw(legend_receiver)) + aligned_legend <- cowplot::plot_grid(NULL, circos_legend_sender, circos_legend_receiver, NULL, ncol=1, rel_heights = c(1, 1, 1, 1)) + + if (separate_legend){ + return(list(p_circos = p_circos, p_legend = aligned_legend)) + } - return(list(p_circos = p_circos, p_legend = p_legend)) + return(cowplot::plot_grid(p_circos, aligned_legend, rel_widths = c(1, 0.1))) } diff --git a/R/parameter_optimization.R b/R/parameter_optimization.R index eb1c3f5..3832338 100644 --- a/R/parameter_optimization.R +++ b/R/parameter_optimization.R @@ -1,9 +1,9 @@ -#' @title Construct and evaluate a ligand-target model given input parameters with the purpose of parameter optimization. +#' @title Construct and evaluate a ligand-target model given input parameters with the purpose of parameter optimization with mlrMBO. #' -#' @description \code{model_evaluation_optimization} will take as input a setting of parameters (data source weights and hyperparameters) and layer-specific networks to construct a ligand-target matrix and evaluate its performance on input validation settings (average performance for both target gene prediction and ligand activity prediction, as measured via the auroc and aupr). +#' @description \code{model_evaluation_optimization_mlrmbo} will take as input a setting of parameters (data source weights and hyperparameters) and layer-specific networks to construct a ligand-target matrix and evaluate its performance on input validation settings (average performance for both target gene prediction and ligand activity prediction, as measured via the auroc and aupr). #' #' @usage -#' model_evaluation_optimization(x, source_names, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",damping_factor = NULL,...) +#' model_evaluation_optimization_mlrmbo(x, source_names, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",damping_factor = NULL,...) #' #' @inheritParams evaluate_model #' @inheritParams construct_ligand_target_matrix @@ -20,12 +20,13 @@ #' library(dplyr) #' nr_datasources = source_weights_df$source %>% unique() %>% length() #' test_input = list("source_weights" = rep(0.5, times = nr_datasources), "lr_sig_hub" = 0.5, "gr_hub" = 0.5, "damping_factor" = 0.5) -# test_evaluation_optimization = model_evaluation_optimization(test_input, source_weights_df$source %>% unique(), "PPR", TRUE, lr_network, sig_network, gr_network, lapply(expression_settings_validation,convert_expression_settings_evaluation), secondary_targets = FALSE, remove_direct_links = "no") +#' expression_settings_validation = readRDS(url("https://zenodo.org/record/3260758/files/expression_settings.rds")) +# test_evaluation_optimization = model_evaluation_optimization_mlrmbo(test_input, source_weights_df$source %>% unique(), "PPR", TRUE, lr_network, sig_network, gr_network, lapply(expression_settings_validation,convert_expression_settings_evaluation), secondary_targets = FALSE, remove_direct_links = "no") #' } #' #' @export #' -model_evaluation_optimization = function(x, source_names, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",damping_factor = NULL,...){ +model_evaluation_optimization_mlrmbo = function(x, source_names, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",damping_factor = NULL,...){ requireNamespace("dplyr") if (!is.null(damping_factor) & is.null(x$damping_factor)){ # for the case damping factor is a fixed parameter @@ -116,7 +117,7 @@ model_evaluation_optimization = function(x, source_names, algorithm, correct_top return(c(mean_auroc_target_prediction, mean_aupr_target_prediction, median_auroc_ligand_prediction, median_aupr_ligand_prediction)) } -#' @title Optimization of objective functions via model-based optimization. +#' @title Optimization of objective functions via model-based optimization (mlrMBO). #' #' @description \code{mlrmbo_optimization} will execute multi-objective model-based optimization of an objective function. The defined surrogate learner here is "kriging". #' @@ -192,14 +193,14 @@ mlrmbo_optimization = function(run_id,obj_fun,niter,ncores,nstart,additional_arg parallelStop() return(res) } -#' @title Construct and evaluate a ligand-target model given input parameters with the purpose of hyperparameter optimization. +#' @title Construct and evaluate a ligand-target model given input parameters with the purpose of hyperparameter optimization using mlrMBO. #' -#' @description \code{model_evaluation_hyperparameter_optimization} will take as input a setting of parameters (hyperparameters), data source weights and layer-specific networks to construct a ligand-target matrix and evaluate its performance on input validation settings (average performance for both target gene prediction and ligand activity prediction, as measured via the auroc and aupr). +#' @description \code{model_evaluation_hyperparameter_optimization_mlrmbo} will take as input a setting of parameters (hyperparameters), data source weights and layer-specific networks to construct a ligand-target matrix and evaluate its performance on input validation settings (average performance for both target gene prediction and ligand activity prediction, as measured via the auroc and aupr). #' #' @usage -#' model_evaluation_hyperparameter_optimization(x, source_weights, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",damping_factor = NULL,...) +#' model_evaluation_hyperparameter_optimization_mlrmbo(x, source_weights, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",damping_factor = NULL,...) #' -#' @inheritParams model_evaluation_optimization +#' @inheritParams model_evaluation_optimization_mlrmbo #' @param x A list containing the following elements. $lr_sig_hub: hub correction factor for the ligand-signaling network; $gr_hub: hub correction factor for the gene regulatory network; $damping_factor: damping factor in the PPR algorithm if using PPR and optionally $ltf_cutoff: the cutoff on the ligand-tf matrix. For more information about these parameters: see \code{construct_ligand_target_matrix} and \code{apply_hub_correction}. #' @param source_weights A named numeric vector indicating the weight for every data source. #' @param ... Additional arguments to \code{make_discrete_ligand_target_matrix}. @@ -213,12 +214,12 @@ mlrmbo_optimization = function(run_id,obj_fun,niter,ncores,nstart,additional_arg #' test_input = list("lr_sig_hub" = 0.5, "gr_hub" = 0.5, "damping_factor" = 0.5) #' source_weights = source_weights_df$weight #' names(source_weights) = source_weights_df$source -# test_evaluation_optimization = model_evaluation_hyperparameter_optimization(test_input, source_weights, "PPR", TRUE, lr_network, sig_network, gr_network, lapply(expression_settings_validation,convert_expression_settings_evaluation), secondary_targets = FALSE, remove_direct_links = "no") +# test_evaluation_optimization = model_evaluation_hyperparameter_optimization_mlrmbo(test_input, source_weights, "PPR", TRUE, lr_network, sig_network, gr_network, lapply(expression_settings_validation,convert_expression_settings_evaluation), secondary_targets = FALSE, remove_direct_links = "no") #' } #' #' @export #' -model_evaluation_hyperparameter_optimization = function(x, source_weights, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",damping_factor = NULL,...){ +model_evaluation_hyperparameter_optimization_mlrmbo = function(x, source_weights, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",damping_factor = NULL,...){ requireNamespace("dplyr") if (!is.null(damping_factor) & is.null(x$damping_factor)){ # for the case damping factor is a fixed parameter @@ -377,6 +378,436 @@ process_mlrmbo_nichenet_optimization = function(optimization_results,source_name return(output_optimization) } + +#' @title Construct and evaluate a ligand-target model with the purpose of parameter optimization with NSGA-II. +#' @description \code{model_evaluation_optimization_nsga2} will take as input a vector of data source weights and hyperparameters to construct a ligand-target matrix and evaluate its performance on input validation settings. +#' @usage +#' model_evaluation_optimization_nsga2(y, source_names, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",damping_factor = NULL) +#' +#' @param y A numeric vector containing the data source weights as the first elements, and hyperparameters as the last elements. The order of the data source weights accords to the order of source_names. +#' @inheritParams model_evaluation_optimization_mlrmbo +#' +#' @return A numeric vector of length 4 containing the average auroc for target gene prediction, average aupr (corrected for TP fraction) for target gene prediction, average auroc for ligand activity prediction and average aupr for ligand activity prediction. +#' +#' @examples +#' \dontrun{ +#' nr_datasources = source_weights_df$source %>% unique() %>% length() +#' test_input = c(rep(0.5, times = nr_datasources), 0.5, 0.5, 0.5, 0.5) +#' expression_settings_validation = readRDS(url("https://zenodo.org/record/3260758/files/expression_settings.rds")) +#' test_evaluation_optimization = model_evaluation_optimization_nsga2(test_input, source_weights_df$source %>% unique(), "PPR", TRUE, lr_network, sig_network, gr_network, +#' lapply(expression_settings_validation, convert_expression_settings_evaluation), secondary_targets = FALSE, remove_direct_links = "no") +#' } +#' +#' @export +model_evaluation_optimization_nsga2r = function(y, source_names, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no", damping_factor = NULL) { + # change numeric vector y input to list x + library(dplyr) + library(tidyr) + source_names_zero_possible <- c(sig_network$source, gr_network$source) %>% unique() + + x = list() + x$source_weights = y[1:length(source_names)] + names(x$source_weights) = source_names + x$source_weights[ (names(x$source_weights) %in% source_names_zero_possible) & (x$source_weights <= 0.025) ] = 0 + x$source_weights[ (names(x$source_weights) %in% source_names) & (x$source_weights >= 0.975) ] = 1 + + other_params = y[length(source_names)+1 : length(y)] + x$lr_sig_hub = other_params[1] + x$gr_hub = other_params[2] + x$ltf_cutoff = other_params[3] + x$damping_factor = other_params[4] + + if (!is.null(damping_factor) & is.null(x$damping_factor)) { + x$damping_factor = damping_factor + } + if (!is.list(x)) + stop("x should be a list!") + if (!is.numeric(x$source_weights)) + stop("x$source_weights should be a numeric vector") + if (x$lr_sig_hub < 0 | x$lr_sig_hub > 1) + stop("x$lr_sig_hub must be a number between 0 and 1 (0 and 1 included)") + if (x$gr_hub < 0 | x$gr_hub > 1) + stop("x$gr_hub must be a number between 0 and 1 (0 and 1 included)") + if (is.null(x$ltf_cutoff)) { + if ((algorithm == "PPR" | algorithm == "SPL") & correct_topology == + FALSE) + warning("Did you not forget to give a value to x$ltf_cutoff?") + } + else { + if (x$ltf_cutoff < 0 | x$ltf_cutoff > 1) + stop("x$ltf_cutoff must be a number between 0 and 1 (0 and 1 included)") + } + if (algorithm == "PPR") { + if (x$damping_factor < 0 | x$damping_factor >= 1) + stop("x$damping_factor must be a number between 0 and 1 (0 included, 1 not)") + } + if (algorithm != "PPR" & algorithm != "SPL" & algorithm != + "direct") + stop("algorithm must be 'PPR' or 'SPL' or 'direct'") + if (correct_topology != TRUE & correct_topology != FALSE) + stop("correct_topology must be TRUE or FALSE") + if (!is.data.frame(lr_network)) + stop("lr_network must be a data frame or tibble object") + if (!is.data.frame(sig_network)) + stop("sig_network must be a data frame or tibble object") + if (!is.data.frame(gr_network)) + stop("gr_network must be a data frame or tibble object") + if (!is.list(settings)) + stop("settings should be a list!") + if (!is.character(settings[[1]]$from) | !is.character(settings[[1]]$name)) + stop("setting$from and setting$name should be character vectors") + if (!is.logical(settings[[1]]$response) | is.null(names(settings[[1]]$response))) + stop("setting$response should be named logical vector containing class labels of the response that needs to be predicted ") + if (secondary_targets != TRUE & secondary_targets != FALSE) + stop("secondary_targets must be TRUE or FALSE") + if (remove_direct_links != "no" & remove_direct_links != + "ligand" & remove_direct_links != "ligand-receptor") + stop("remove_direct_links must be 'no' or 'ligand' or 'ligand-receptor'") + if (!is.character(source_names)) + stop("source_names should be a character vector") + if (length(source_names) != length(x$source_weights)) + stop("Length of source_names should be the same as length of x$source_weights") + if (correct_topology == TRUE && !is.null(x$ltf_cutoff)) + warning("Because PPR-ligand-target matrix will be corrected for topology, the proposed cutoff on the ligand-tf matrix will be ignored (x$ltf_cutoff") + if (correct_topology == TRUE && algorithm != "PPR") + warning("Topology correction is PPR-specific and makes no sense when the algorithm is not PPR") + # names(x$source_weights) = source_names + + parameters_setting = list(model_name = "query_design", source_weights = x$source_weights) + if (algorithm == "PPR") { + if (correct_topology == TRUE) { + parameters_setting = add_hyperparameters_parameter_settings(parameters_setting, + lr_sig_hub = x$lr_sig_hub, gr_hub = x$gr_hub, + ltf_cutoff = 0, algorithm = algorithm, damping_factor = x$damping_factor, + correct_topology = TRUE) + } + else { + parameters_setting = add_hyperparameters_parameter_settings(parameters_setting, + lr_sig_hub = x$lr_sig_hub, gr_hub = x$gr_hub, + ltf_cutoff = x$ltf_cutoff, algorithm = algorithm, + damping_factor = x$damping_factor, correct_topology = FALSE) + } + } + if (algorithm == "SPL" | algorithm == "direct") { + parameters_setting = add_hyperparameters_parameter_settings(parameters_setting, + lr_sig_hub = x$lr_sig_hub, gr_hub = x$gr_hub, ltf_cutoff = x$ltf_cutoff, + algorithm = algorithm, damping_factor = NULL, correct_topology = FALSE) + } + output_evaluation = evaluate_model(parameters_setting, lr_network, + sig_network, gr_network, settings, calculate_popularity_bias_target_prediction = FALSE, + calculate_popularity_bias_ligand_prediction = FALSE, + ncitations = ncitations, secondary_targets = secondary_targets, + remove_direct_links = remove_direct_links, n_target_bins = 3) + + ligands_evaluation = settings %>% sapply(function(x){x$from}) %>% unlist() %>% unique() + + ligand_activity_performance_setting_summary = output_evaluation$performances_ligand_prediction_single %>% select(-setting, -ligand) %>% group_by(importance_measure) %>% summarise_all(mean) %>% group_by(importance_measure) %>% mutate(geom_average = exp(mean(log(c(auroc,aupr_corrected))))) + best_metric = ligand_activity_performance_setting_summary %>% ungroup() %>% filter(geom_average == max(geom_average)) %>% pull(importance_measure) %>% .[1] + performances_ligand_prediction_single_summary = output_evaluation$performances_ligand_prediction_single %>% filter(importance_measure == best_metric) + + performances_target_prediction_averaged = ligands_evaluation %>% lapply(function(x){x}) %>% + lapply(wrapper_average_performances, output_evaluation$performances_target_prediction,"median") %>% bind_rows() %>% drop_na() + performances_ligand_prediction_single_summary_averaged = ligands_evaluation %>% lapply(function(x){x}) %>% + lapply(wrapper_average_performances, performances_ligand_prediction_single_summary %>% select(-importance_measure),"median") %>% bind_rows() %>% drop_na() + + mean_auroc_target_prediction = performances_target_prediction_averaged$auroc %>% mean(na.rm = TRUE) %>% unique() + mean_aupr_target_prediction = performances_target_prediction_averaged$aupr_corrected %>% mean(na.rm = TRUE) %>% unique() + + # we want also to look at median ligand prediction, but also the mean: why? median focuses on improving half of the datasets, but can lead to ignorance of a few bad datasets -- try to semi-avoid this with the mean + ## combine both mean and median + median_auroc_ligand_prediction = performances_ligand_prediction_single_summary_averaged$auroc %>% median(na.rm = TRUE) %>% unique() + median_aupr_ligand_prediction = performances_ligand_prediction_single_summary_averaged$aupr_corrected %>% median(na.rm = TRUE) %>% unique() + + mean_auroc_ligand_prediction = performances_ligand_prediction_single_summary_averaged$auroc %>% mean(na.rm = TRUE) %>% unique() + mean_aupr_ligand_prediction = performances_ligand_prediction_single_summary_averaged$aupr_corrected %>% mean(na.rm = TRUE) %>% unique() + + score_auroc_ligand_prediction = (median_auroc_ligand_prediction + mean_auroc_ligand_prediction) * 0.5 + score_aupr_ligand_prediction = (median_aupr_ligand_prediction + mean_aupr_ligand_prediction) * 0.5 + + return(c(mean_auroc_target_prediction*-1, mean_aupr_target_prediction*-1, + score_auroc_ligand_prediction*-1, score_aupr_ligand_prediction*-1)) +} + +#' @title Run NSGA-II for parameter optimization. +#' @description \code{run_nsga2R_cluster} runs the NSGA-II algorithm for parameter optimization and allows for parallelization. The core of this function is adapted from \code{nsga2R::nsga2R}. +#' @usage +#' run_nsga2R_cluster(run_id, fn, varNo, objDim, lowerBounds = rep(-Inf, varNo), upperBounds = rep(Inf, varNo), popSize = 100, tourSize = 2, generations = 20, cprob = 0.7, XoverDistIdx = 5, mprob = 0.2, MuDistIdx = 10, ncores = 24) +#' +#' @param fn The function to be optimized, usually \code{model_evaluation_optimization_nsga2}. +#' @param varNo The number of variables to be optimized, usually the number of data sources + 4 hyperparameters (lr_sig_hub, gr_hub, ltf_cutoff, damping_factor). +#' @param objDim Number of objective functions +#' @param lowerBounds A numeric vector containing the lower bounds for the variables to be optimized (default: -Inf) +#' @param upperBounds A numeric vector containing the upper bounds for the variables to be optimized (default: Inf) +#' @param popSize The population size (default: 100) +#' @param tourSize The tournament size (default: 2) +#' @param generations The number of generations (default: 20) +#' @param cprob The crossover probability (default: 0.7) +#' @param XoverDistIdx The crossover distribution index, it can be any nonnegative real number (default: 5) +#' @param mprob The mutation probability (default: 0.2) +#' @param MuDistIdx The mutation distribution index, it can be any nonnegative real number (default: 10) +#' @param ncores The number of cores to be used for parallelization (default: 24) +#' @param ... Additional arguments to \code{fn}. +#' +#' @return An 'nsga2R' object containing input settings and the following elements: +#' \itemize{ +#' \item intermed_output_list_params: a list with intermediate values of parameters for each generation (each element has dimensions popSize x varNo) +#' \item intermed_output_list_obj: a list with intermediate values of objective functions for each generation (each element has dimensions popSize x objDim) +#' \item parameters: the solutions of variables that were optimized +#' \item objectives: non-dominated objective function values +#' \item paretoFrontRank: nondomination ranks (or levels) that each non-dominated solution belongs to +#' \item crowdingDistance: crowding distances of each non-dominated solution +#' } +#' +#' @examples +#' \dontrun{ +#' source_names <- c(lr_network$source, sig_network$source, gr_network$source) %>% unique() +#' n_param <- length(source_names) + 4 +#' n_obj <- 4 +#' lower_bounds <- c(rep(0,times = length(source_names)), 0, 0, 0.9, 0.01) +#' upper_bounds <- c(rep(1,times = length(source_names)), 1, 1, 0.999, 0.99) +#' results <- run_nsga2R_cluster(model_evaluation_optimization_nsga2r, varNo=n_param, objDim=n_obj, +#' lowerBounds=lower_bounds, upperBounds=upper_bounds, popSize = 360, tourSize = 2, generations = 15, ncores = 8) +#' } +#' +#' @export +run_nsga2R_cluster = function (fn, varNo, objDim, lowerBounds = rep(-Inf, varNo), + upperBounds = rep(Inf, varNo), popSize = 100, tourSize = 2, + generations = 20, ncores = 24, cprob = 0.7, XoverDistIdx = 5, mprob = 0.2, + MuDistIdx = 10, ...) { + library(nsga2R) + library(dplyr) + library(tidyr) + + intermed_output_list_params = list() + intermed_output_list_obj = list() + + doMC::registerDoMC(ncores) + + cat("********** R based Nondominated Sorting Genetic Algorithm II *********") + cat("\n") + cat("initializing the population") + cat("\n") + + print(Sys.time()) + + parent <- t(sapply(1:popSize, function(u) array(runif(length(lowerBounds), + lowerBounds, upperBounds)))) + # parent_old = parent + # parent_classic <- cbind(parent, t(apply(parent, 1, fn))) + parent <- cbind(parent, t(parallel::mclapply(split(parent, 1:nrow(parent)), fn, mc.cores = ncores, ...) %>% unlist() %>% matrix(nrow = objDim))) + + cat("ranking the initial population") + cat("\n") + ranking <- fastNonDominatedSorting(parent[, (varNo + 1):(varNo + + objDim)]) + rnkIndex <- integer(popSize) + i <- 1 + while (i <= length(ranking)) { + rnkIndex[ranking[[i]]] <- i + i <- i + 1 + } + parent <- cbind(parent, rnkIndex) + cat("crowding distance calculation") + cat("\n") + objRange <- apply(parent[, (varNo + 1):(varNo + objDim)], + 2, max) - apply(parent[, (varNo + 1):(varNo + objDim)], + 2, min) + cd <- crowdingDist4frnt(parent, ranking, objRange) + parent <- cbind(parent, apply(cd, 1, sum)) + for (iter in 1:generations) { + print(iter) + print(Sys.time()) + cat("---------------generation---------------", + iter, "starts") + cat("\n") + cat("tournament selection") + cat("\n") + matingPool <- tournamentSelection(parent, popSize, tourSize) + cat("crossover operator") + cat("\n") + childAfterX <- boundedSBXover(matingPool[, 1:varNo], + lowerBounds, upperBounds, cprob, XoverDistIdx) + cat("mutation operator") + cat("\n") + childAfterM <- boundedPolyMutation(childAfterX, lowerBounds, + upperBounds, mprob, MuDistIdx) + cat("evaluate the objective fns of childAfterM") + cat("\n") + # childAfterM_old = childAfterM + # childAfterM_classic <- cbind(childAfterM, t(apply(childAfterM, + # 1, fn))) + childAfterM <- cbind(childAfterM, t(parallel::mclapply(split(childAfterM, 1:nrow(childAfterM)), fn, mc.cores = ncores) %>% unlist() %>% matrix(nrow = objDim))) + + cat("Rt = Pt + Qt") + cat("\n") + parentNext <- rbind(parent[, 1:(varNo + objDim)], childAfterM) + cat("ranking again") + cat("\n") + ranking <- fastNonDominatedSorting(parentNext[, (varNo + + 1):(varNo + objDim)]) + i <- 1 + while (i <= length(ranking)) { + rnkIndex[ranking[[i]]] <- i + i <- i + 1 + } + parentNext <- cbind(parentNext, rnkIndex) + cat("crowded comparison again") + cat("\n") + objRange <- apply(parentNext[, (varNo + 1):(varNo + objDim)], + 2, max) - apply(parentNext[, (varNo + 1):(varNo + + objDim)], 2, min) + cd <- crowdingDist4frnt(parentNext, ranking, objRange) + parentNext <- cbind(parentNext, apply(cd, 1, sum)) + parentNext.sort <- parentNext[order(parentNext[, varNo + + objDim + 1], -parentNext[, varNo + objDim + 2]), + ] + cat("environmental selection") + cat("\n") + parent <- parentNext.sort[1:popSize, ] + cat("---------------generation---------------", + iter, "ends") + cat("\n") + if (iter != generations) { + # intermed_output_list_params[[iter]] = parent[, 1:varNo] + # intermed_output_list_obj[[iter]] = parent[, (varNo + 1):(varNo + objDim)] + cat("\n") + cat("********** new iteration *********") + cat("\n") + + } + else { + cat("********** stop the evolution *********") + cat("\n") + } + intermed_output_list_params[[iter]] = parent[, 1:varNo] + intermed_output_list_obj[[iter]] = parent[, (varNo + 1):(varNo + objDim)] + } + result = list(intermed_output_list_params = intermed_output_list_params, + intermed_output_list_obj = intermed_output_list_obj, + functions = fn, + parameterDim = varNo, + objectiveDim = objDim, + lowerBounds = lowerBounds, + upperBounds = upperBounds, + popSize = popSize, + tournamentSize = tourSize, + generations = generations, + XoverProb = cprob, + XoverDistIndex = XoverDistIdx, + mutationProb = mprob, + mutationDistIndex = MuDistIdx, + parameters = parent[,1:varNo], + objectives = parent[, (varNo + 1):(varNo + objDim)], + paretoFrontRank = parent[, varNo + objDim + 1], + crowdingDistance = parent[, varNo + objDim + 2]) + class(result) = "nsga2R" + return(result) +} + +#' @title Get optimized parameters from the output of \code{run_nsga2R_cluster}. +#' @description \code{get_optimized_parameters_nsga2} will take as input the output of \code{run_nsga2R_cluster} and extract the optimal parameter values, either from the best solution at the end of the generations or the best solution across all generations. +#' @usage +#' get_optimized_parameters_nsga2(result_nsga2r, source_names, search_all_iterations = FALSE, top_n = NULL, summarise_weights = TRUE) +#' +#' @param result_nsga2r The output of \code{run_nsga2R_cluster}. +#' @param source_names Character vector containing the names of the data sources. +#' @param search_all_iterations Logical indicating whether the best solution across all generations should be considered (TRUE) or only the best solution at the end of the generations (FALSE). +#' @param top_n If search_all_iterations=TRUE, this indicates how many of the best solutions should be considered. +#' @param summarise_weights If search_all_iterations=TRUE, a logical indicating whether the weights should be summarised by taking the mean and median. +#' +#' @return A list containing two dataframes, the optimal data source weights and the optimal hyperparameters. +#' +#' @examples +#' \dontrun{ +#' results <- run_nsga2R_cluster(model_evaluation_optimization_nsga2r, varNo=n_param, objDim=n_obj, +#' lowerBounds=lower_bounds, upperBounds=upper_bounds, popSize = 360, tourSize = 2, generations = 15, ncores = 8) +#' +#' # Get the best solution at the end of the generations +#' optimized_parameters <- get_optimized_parameters_nsga2(results, source_names, search_all_iterations = FALSE, top_n = NULL, summarise_weights = TRUE) +#' +#' # Get the best solution across all generations, consider top 25 solutions and summarise weights +#' optimized_parameters <- get_optimized_parameters_nsga2(results, source_names, search_all_iterations = TRUE, top_n = 25, summarise_weights = TRUE) +#' } +#' +#' @export +#' +get_optimized_parameters_nsga2r = function(result_nsga2r, source_names, search_all_iterations = FALSE, top_n = NULL, summarise_weights = TRUE){ + + if (!search_all_iterations & is.numeric(top_n)){ + message("search_all_iterations is FALSE, so top_n will be ignored") + } + + if (search_all_iterations & !is.numeric(top_n)){ + stop("search_all_iterations is TRUE but top_n is not a number") + } + + if (!is.logical(search_all_iterations)){ + stop("search_all_iterations must be TRUE or FALSE") + } + + if (!is.logical(summarise_weights)){ + stop("summarise_weights must be TRUE or FALSE") + } + + optimization_results = result_nsga2r[[1]] + list_element <- ifelse(search_all_iterations, "intermed_output_list_obj", "objectives") + hyperparam_names <- c("lr_sig_hub", "gr_hub", "ltf_cutoff", "damping_factor") + + processed_optimization <- optimization_results[[list_element]] %>% + { if (search_all_iterations) lapply(., as_tibble) %>% bind_rows() else . } %>% + magrittr::set_colnames(c("y_1","y_2","y_3","y_4")) %>% as_tibble() %>% + mutate_all(function(x){-x}) %>% + mutate(average = apply(.,1,function(x){exp(mean(log(x)))}), index = seq(nrow(.))) %>% + arrange(-average) + + + if (!search_all_iterations){ + # take the best parameter setting considering the geometric mean of the objective function results + parameter_set_index = processed_optimization %>% filter(average == max(average)) %>% .$index + params = optimization_results$parameters[parameter_set_index,] + + # Data source weights + source_weight_df = tibble(source = source_names, weight = params[1:length(source_names)]) + + # Hyperparameters + hyperparams_df = tibble(parameter = hyperparam_names, weight = params[(length(source_names)+1):length(params)]) + + } else { + + # Get best weights for all variables + all_weights <- processed_optimization %>% + dplyr::top_n(top_n, average) %>% pull(index) %>% + lapply(function(index) { + params = optimization_results$intermed_output_list_params %>% lapply(data.frame) %>% bind_rows() %>% .[index,] %>% as.double() + source_weights = tibble(source = source_names, weight = params[1:length(source_names)], index = index) + + other_params = params[(length(source_names)+1): length(params)] + other_params_df = tibble(parameter = hyperparam_names, weight = c(other_params[1], other_params[2], other_params[3], other_params[4]), index = index) + + list(source_weights = source_weights, + hyperparams = other_params_df) + }) + + # Extract data source and hyperparameter weights + source_weight_df <- purrr::map(all_weights, "source_weights") %>% bind_rows() %>% + mutate_cond(weight <= 0.025 & source %in% source_names_zero_possible, weight = 0) %>% + mutate_cond(weight >= 0.975 & source %in% source_names, weight = 1) %>% + # If summarise_weights is TRUE, summarise weights by taking the mean and median + {if (summarise_weights) group_by(., source) %>% summarise(avg_weight = mean(weight), median_weight = median(weight)) else . } + + hyperparams_df <- purrr::map(all_weights, "hyperparams") %>% bind_rows() %>% + # If summarise_weights is TRUE, summarise weights by taking the mean and median + {if (summarise_weights) group_by(.,parameter) %>% summarise(avg_weight = mean(weight), median_weight = median(weight)) else . } + } + + output_optimization = list() + output_optimization$source_weight_df = source_weight_df + output_optimization$hyperparams_df = hyperparams_df + + return(output_optimization) +} + + #' @title Construct and evaluate a ligand-target model given input parameters with the purpose of parameter optimization for multi-ligand application. #' #' @description \code{model_evaluation_optimization_application} will take as input a setting of parameters (data source weights and hyperparameters) and layer-specific networks to construct a ligand-target matrix and evaluate its performance on input application settings (average performance for target gene prediction, as measured via the auroc and aupr). @@ -384,7 +815,7 @@ process_mlrmbo_nichenet_optimization = function(optimization_results,source_name #' @usage #' model_evaluation_optimization_application(x, source_names, algorithm, correct_topology, lr_network, sig_network, gr_network, settings, secondary_targets = FALSE, remove_direct_links = "no",classification_algorithm = "lda",damping_factor = NULL,...) #' -#' @inheritParams model_evaluation_optimization +#' @inheritParams model_evaluation_optimization_mlrmbo #' @param classification_algorithm The name of the classification algorithm to be applied. Should be supported by the caret package. Examples of algorithms we recommend: with embedded feature selection: "rf","glm","fda","glmnet","sdwd","gam","glmboost", "pls" (load "pls" package before!); without: "lda","naive_bayes", "pcaNNet". Please notice that not all these algorithms work when the features (i.e. ligand vectors) are categorical (i.e. discrete class assignments). #' @param ... Additional arguments to \code{evaluate_multi_ligand_target_prediction}. #' @@ -503,19 +934,15 @@ model_evaluation_optimization_application = function(x, source_names, algorithm, #' settings = lapply(expression_settings_validation[1:4], convert_expression_settings_evaluation) #' weights_settings_loi = prepare_settings_leave_one_in_characterization(lr_network = lr_network, sig_network = sig_network, gr_network = gr_network, source_weights_df) #' weights_settings_loi = lapply(weights_settings_loi,add_hyperparameters_parameter_settings, lr_sig_hub = 0.25,gr_hub = 0.5,ltf_cutoff = 0,algorithm = "PPR", damping_factor = 0.2, correct_topology = TRUE) - #' doMC::registerDoMC(cores = 4) #' job_characterization_loi = parallel::mclapply(weights_settings_loi[1:4], evaluate_model,lr_network = lr_network, sig_network = sig_network, gr_network = gr_network, settings,calculate_popularity_bias_target_prediction = FALSE, calculate_popularity_bias_ligand_prediction = FALSE, ncitations, mc.cores = 4) #' loi_performances = process_characterization_target_prediction_average(job_characterization_loi) - # run characterization loo #' weights_settings_loo = prepare_settings_leave_one_out_characterization(lr_network = lr_network, sig_network = sig_network, gr_network = gr_network, source_weights_df) #' weights_settings_loo = lapply(weights_settings_loo,add_hyperparameters_parameter_settings, lr_sig_hub = 0.25,gr_hub = 0.5,ltf_cutoff = 0,algorithm = "PPR", damping_factor = 0.2, correct_topology = TRUE) - #' doMC::registerDoMC(cores = 4) #' job_characterization_loo = parallel::mclapply(weights_settings_loo[1:4], evaluate_model,lr_network = lr_network, sig_network = sig_network, gr_network = gr_network, settings,calculate_popularity_bias_target_prediction = FALSE, calculate_popularity_bias_ligand_prediction = FALSE,ncitations,mc.cores = 4) #' loo_performances = process_characterization_target_prediction_average(job_characterization_loo) - # run the regression #' sources_oi = c("kegg_cytokines") #' output = estimate_source_weights_characterization(loi_performances,loo_performances,source_weights_df %>% filter(source != "kegg_cytokines"), sources_oi, random_forest =FALSE) @@ -679,3 +1106,92 @@ evaluate_model_cv = function(parameters_setting, lr_network, sig_network, gr_net return(list(performances_target_prediction = performances_target_prediction, importances_ligand_prediction = all_importances, performances_ligand_prediction_single = performances_ligand_prediction_single)) } + +#' @title Visualize parameter values from the output of \code{run_nsga2R_cluster}. +#' @description \code{visualize_parameter_values} will take as input the output of \code{run_nsga2R_cluster} and visualize the data source weights and hyperparameters of the best and worst solutions +#' @usage +#' visualize_parameter_values(result_nsga2r, source_names, top_ns = c(5, 10, -10, -25)) +#' +#' @param result_nsga2r The output of \code{run_nsga2R_cluster}. +#' @param source_names Character vector containing the names of the data sources. +#' @param top_ns Numeric vector indicating how many of the best and worst solutions should be considered (negative values indicate the worst solutions; default: c(5, 10, -10, -25)). +#' +#' @return A list containing two ggplot objects, one for the data source weights and one for the hyperparameters. +#' +#' @examples +#' \dontrun{ +#' results <- run_nsga2R_cluster(model_evaluation_optimization_nsga2r, varNo=n_param, objDim=n_obj, +#' lowerBounds=lower_bounds, upperBounds=upper_bounds, popSize = 360, tourSize = 2, generations = 15, ncores = 8) +#' +#' # Visualize the best and worst 5 solutions +#' visualize_parameter_values(results, source_names, top_ns = c(5, -5)) +#' } +#' +#' @export +#' +visualize_parameter_values = function(result_nsga2r, source_names, top_ns = c(5, 10, -10, -25)){ + + optimized_params <- lapply(top_ns, function(n) { + get_optimized_parameters_nsga2r(result_nsga2r, source_names, search_all_iterations = TRUE, top_n = n, summarise_weights = FALSE) %>% + lapply(mutate, n = paste0(ifelse(n < 0, "bad", "top"), abs(n))) + }) + + source_weights_boxplot = purrr::map(optimized_params, "source_weight_df") %>% bind_rows() %>% + ggplot(aes(x = n, y = weight, group = n, color = n)) + geom_boxplot() + geom_point() + facet_wrap(.~source) + theme_bw() + + hyperparameters_boxplot = purrr::map(optimized_params, "hyperparams_df") %>% bind_rows() %>% + ggplot(aes(x = n, y = weight, group = n, color = n)) + geom_boxplot() + geom_point() + facet_wrap(.~parameter) + theme_bw() + + return(list(source_weights_boxplot = source_weights_boxplot, hyperparameters_boxplot = hyperparameters_boxplot)) +} + + +#' @title Visualize parameter values from the output of \code{run_nsga2R_cluster} across cross-validation folds. +#' @description \code{visualize_parameter_values_across_folds} will take as input the output of \code{run_nsga2R_cluster} and visualize the data source weights and hyperparameters of the best solutions across all folds. +#' +#' @usage +#' visualize_parameter_values_across_folds(result_nsga2r_list, source_names, top_n = 25) +#' +#' @param result_nsga2r_list A list containing the outputs of \code{run_nsga2R_cluster} for each cross-validation fold. +#' @param source_names Character vector containing the names of the data sources. +#' @param top_n Numeric indicating how many of the best solutions should be considered. +#' +#' @return A list containing two ggplot objects, one for the data source weights and one for the hyperparameters. +#' +#' @examples +#' \dontrun{ +#' results_list <- lapply(cv_folds, function(fold){ +#' settings <- readRDS(paste0("settings_training_f", fold))$settings +#' forbidden_gr <- bind_rows( +#' gr_network %>% filter(database == "NicheNet_LT" & from %in% settings$forbidden_ligands_nichenet), +#' gr_network %>% filter(database == "CytoSig" & from %in% settings$forbidden_ligands_cytosig)) +#' gr_network_subset <- gr_network %>% setdiff(forbidden_gr) +#' run_nsga2R_cluster(model_evaluation_optimization_nsga2r, varNo=n_param, objDim=n_obj, +#' lowerBounds=lower_bounds, upperBounds=upper_bounds, popSize = 360, tourSize = 2, generations = 15, ncores = 8, +#' source_names = source_names, algorithm = "PPR", correct_topology = FALSE, lr_network = lr_network, sig_network = lr_network, gr_network = gr_network_subset, +#' settings = settings, secondary_targets = FALSE, remove_direct_links = "no", damping_factor = NULL) +#' }) +#' +#' # Visualize the best 25 solutions across all folds +#' visualize_parameter_values_across_folds(results_list, source_names, top_n = 25) +#' } +#' +#' @export +visualize_parameter_values_across_folds = function(result_nsga2r_list, source_names, top_n = 25){ + + # Get best weights for each fold + optimized_params <- lapply(1:length(result_nsga2r_list), function(i){ + get_optimized_parameters_nsga2r(result_nsga2r_list[[i]], source_names, search_all_iterations = TRUE, top_n = top_n, summarise_weights = FALSE) %>% + lapply(mutate, fold = paste0("Fold", i)) + + }) + + source_weights_boxplot = purrr::map(optimized_params, "source_weight_df") %>% bind_rows() %>% + ggplot(aes(x = fold, y = weight, group = fold, color = fold)) + geom_boxplot() + geom_point() + facet_wrap(.~source) + theme_bw() + + hyperparameters_boxplot = purrr::map(optimized_params, "hyperparams_df") %>% bind_rows() %>% + ggplot(aes(x = fold, y = weight, group = fold, color = fold)) + geom_boxplot() + geom_point() + facet_wrap(.~parameter) + theme_bw() + + + return(list(source_weights_boxplot = source_weights_boxplot, hyperparameters_boxplot = hyperparameters_boxplot)) +} \ No newline at end of file diff --git a/R/prioritization.R b/R/prioritization.R index 8bf0185..89baf42 100644 --- a/R/prioritization.R +++ b/R/prioritization.R @@ -22,7 +22,7 @@ check_names <- function(column, seurat_obj = NULL){ #' @param celltype_colname Name of the meta data column that indicates the cell type of a cell #' @param condition_oi If provided, subset seurat_obj so DE is only calculated for cells belonging to condition_oi #' @param condition_colname Name of the meta data column that indicates from which group/condition a cell comes from -#' @param assay_oi Which assay need to be used for DE calculation. Default RNA, alternatives: SCT. +#' @param assay_oi Which assay need to be used for DE calculation. If NULL, will use \code{DefaultAssay} #' @param ... Arguments passed to Seurat::FindAllMarkers(by default: features = NULL, min.pct = 0, logfc.threshold = 0, return.thresh = 1) #' #' @return A dataframe containing the DE results @@ -40,7 +40,7 @@ check_names <- function(column, seurat_obj = NULL){ #' @export #' calculate_de = function(seurat_obj, celltype_colname, - condition_oi = NA, condition_colname = NA, + condition_oi = NULL, condition_colname = NULL, assay_oi = "RNA", ...){ @@ -53,12 +53,12 @@ calculate_de = function(seurat_obj, celltype_colname, # Replace this with user arguments FindAllMarkers_args[names(list(...))] = list(...) - if (any(!is.na(condition_colname), !is.na(condition_oi)) & !all(!is.na(condition_colname), !is.na(condition_oi))){ + if (any(!is.null(condition_colname), !is.null(condition_oi)) & !all(!is.null(condition_colname), !is.null(condition_oi))){ stop("Please input both condition_colname and condition_oi") } # Subset seurat obj to condition of interest - if (!is.na(condition_oi)) { + if (!is.null(condition_oi)) { seurat_obj = seurat_obj[,seurat_obj[[condition_colname]] == condition_oi] } @@ -80,11 +80,10 @@ calculate_de = function(seurat_obj, celltype_colname, #' @title Calculate average of gene expression per cell type. #' #' @description \code{get_exprs_avg} Calculate average of gene expression per cell type. If condition_oi is provided, only consider cells from that condition. -#' @usage -#' get_exprs_avg(seurat_obj, celltype_colname, condition_oi = NA, condition_colname = NA) #' #' @inheritParams calculate_de #' @param condition_oi If provided, subset seurat_obj so average expression is only calculated for cells belonging to condition_oi +#' @param ... Arguments passed to Seurat::AverageExpression, usually for slot/layer to use (default: data) #' #' @return Data frame with average gene expression per cell type. #' @@ -105,28 +104,37 @@ calculate_de = function(seurat_obj, celltype_colname, #' @export #' get_exprs_avg = function(seurat_obj, celltype_colname, - condition_oi = NA, condition_colname = NA){ + condition_oi = NULL, condition_colname = NULL, + assay_oi = NULL, ...){ requireNamespace("dplyr") - if (any(!is.na(condition_colname), !is.na(condition_oi)) & !all(!is.na(condition_colname), !is.na(condition_oi))){ + # Check that assay_oi is in seurat_obj + if (!is.null(assay_oi)){ + if (!assay_oi %in% names(seurat_obj@assays)) { + stop(paste0("assay_oi '", assay_oi, "' does not exist in seurat_obj")) + } + # Otherwise, use DefaultAssay + } else { + assay_oi <- DefaultAssay(seurat_obj) + } + + if (any(!is.null(condition_colname), !is.null(condition_oi)) & !all(!is.null(condition_colname), !is.null(condition_oi))){ stop("Please input both condition_colname and condition_oi") } # Subset seurat object - if (!is.na(condition_oi)) { + if (!is.null(condition_oi)) { seurat_obj = seurat_obj[,seurat_obj[[condition_colname]] == condition_oi] } seurat_obj <- NormalizeData(seurat_obj, verbose = FALSE) - avg_celltype <- AverageExpression(seurat_obj, assays = "RNA", slot = "data", group.by = celltype_colname) %>% - .$RNA %>% data.frame(check.names=FALSE) %>% rownames_to_column("gene") %>% + avg_celltype <- AverageExpression(seurat_obj, group.by = celltype_colname, assays = assay_oi, ...) %>% + .[[assay_oi]] %>% data.frame(check.names=FALSE) %>% rownames_to_column("gene") %>% pivot_longer(!gene, names_to = "cluster_id", values_to = "avg_expr") return (avg_celltype) - - } #' @title Process DE or expression information into intercellular communication focused information. #' @@ -167,6 +175,12 @@ get_exprs_avg = function(seurat_obj, celltype_colname, process_table_to_ic = function(table_object, table_type = "expression", lr_network, senders_oi = NULL, receivers_oi = NULL){ + # Rename lr_network columns to "ligand", "receptor" if the columns "from","to" exist + if (all(c("from", "to") %in% colnames(lr_network)) & + !all(c("ligand", "receptor") %in% colnames(lr_network))){ + lr_network <- lr_network %>% dplyr::rename(ligand = from, receptor = to) + } + ligands = lr_network %>% dplyr::pull(ligand) %>% unique() receptors = lr_network %>% dplyr::pull(receptor) %>% unique() @@ -182,13 +196,13 @@ process_table_to_ic = function(table_object, table_type = "expression", if (is.null(senders_oi)) warning("senders_oi is NULL For DE filtering, it is best if this parameter is given.") if (is.null(receivers_oi)) warning("receivers_oi is NULL For DE filtering, it is best if this parameter is given.") - sender_table <- table_object %>% dplyr::rename(sender = cluster_id, ligand = gene, avg_ligand = avg_log2FC, p_val_ligand = p_val, p_adj_ligand = p_val_adj, pct_expressed_sender = pct.1) - receiver_table <- table_object %>% dplyr::rename(receiver = cluster_id, receptor = gene, avg_receptor = avg_log2FC, p_val_receptor = p_val, p_adj_receptor = p_val_adj, pct_expressed_receiver = pct.1) + sender_table <- table_object %>% dplyr::rename(sender = tidyr::starts_with("cluster"), ligand = gene, avg_ligand = avg_log2FC, p_val_ligand = p_val, p_adj_ligand = p_val_adj, pct_expressed_sender = pct.1) + receiver_table <- table_object %>% dplyr::rename(receiver = tidyr::starts_with("cluster"), receptor = gene, avg_receptor = avg_log2FC, p_val_receptor = p_val, p_adj_receptor = p_val_adj, pct_expressed_receiver = pct.1) columns_select <- c("sender", "receiver", "ligand", "receptor", "lfc_ligand", "lfc_receptor", "ligand_receptor_lfc_avg", "p_val_ligand", "p_adj_ligand", "p_val_receptor", "p_adj_receptor", "pct_expressed_sender", "pct_expressed_receiver") } else if (table_type == "group_DE") { - if (!is.null(senders_oi)) stop("senders_oi is given. Since we do not consider cell type specificity, please change this to NULL") - if (!is.null(receivers_oi)) stop("receivers_oi is given. Since we do not consider cell type specificity, please change this to NULL") + if (!is.null(senders_oi)) stop("senders_oi is given. Since we do not consider cell type specificity with group DE, please change this to NULL") + if (!is.null(receivers_oi)) stop("receivers_oi is given. Since we do not consider cell type specificity with group DE, please change this to NULL") sender_table = table_object %>% dplyr::rename(ligand = gene, avg_ligand = avg_log2FC, p_val_ligand = p_val, p_adj_ligand = p_val_adj) receiver_table = table_object %>% dplyr::rename(receptor = gene, avg_receptor = avg_log2FC, p_val_receptor = p_val, p_adj_receptor = p_val_adj) @@ -196,7 +210,7 @@ process_table_to_ic = function(table_object, table_type = "expression", } - # Filter senders and receivers if it is not NA + # Filter senders and receivers if it is not NULL sender_table <- sender_table %>% {if (!is.null(senders_oi)) filter(., sender %in% senders_oi) else (.)} receiver_table <- receiver_table %>% {if (!is.null(receivers_oi)) filter(., receiver %in% receivers_oi) else (.)} @@ -220,21 +234,155 @@ process_table_to_ic = function(table_object, table_type = "expression", } +#' @title Generate tables used for \code{generate_prioritization_tables} +#' @description Calculate differential expression, average expression, and condition specificity of ligands and receptors. +#' @param seuratObj Seurat object +#' @param lr_network_filtered Ligand-receptor network that has been filtered to only contain ligands and receptors that are expressed +#' @param condition_reference Reference condition for condition specificity calculation +#' @param scenario "case_control" or "one_condition": if "case_control", calculate condition specificity. If "one_condition", only calculate cell type specificity. +#' @param ... Arguments passed to \code{FindAllMarkers}, \code{FindMarkers}, and \code{AverageExpression} +#' @inheritParams calculate_de +#' @inheritParams process_table_to_ic +#' @return List of dataframes containing sender-receiver DE, sender-receiver expression, and condition DE +#' @export +generate_info_tables <- function(seuratObj, + celltype_colname, + senders_oi, + receivers_oi, + lr_network_filtered, + condition_colname = NULL, + condition_oi = NULL, + condition_reference = NULL, + scenario = "case_control", + assay_oi = NULL, + ...){ + requireNamespace("dplyr") -#' @title generate_prioritization_tables -#' -#' @description \code{generate_prioritization_tables} Perform a prioritization of cell-cell interactions (similar to MultiNicheNet). -#' User can choose the importance attached to each of the following prioritization criteria: differential expression of ligand and receptor, cell-type specificity of expression of ligand and receptor, NicheNet ligand activity -#' @usage generate_prioritization_tables(sender_receiver_info, sender_receiver_de, ligand_activities, lr_condition_de = NULL, -#' prioritizing_weights = c("de_ligand" = 1,"de_receptor" = 1,"activity_scaled" = 2, -#' "exprs_ligand" = 1,"exprs_receptor" = 1, -#' "ligand_condition_specificity" = 0, "receptor_condition_specificity"=0)) -#' -#' @param sender_receiver_info Output of \code{get_exprs_avg} -> \code{process_table_to_ic} -#' @param sender_receiver_de Output of\code{calculate_de} -> \code{process_table_to_ic} + # Check that celltype_colname exists in Seurat object + if (!celltype_colname %in% names(seuratObj@meta.data)){ + stop(paste0("celltype_colname '", celltype_colname, "' does not exist in seuratObj")) + } + + if (any(!is.null(condition_colname), !is.null(condition_oi), !is.null(condition_reference)) & + !all(!is.null(condition_colname), !is.null(condition_oi), !is.null(condition_reference))){ + stop("condition_* arguments must be either all NULL or all provided") + } + + # If condition is not provided + if (all(is.null(condition_colname), is.null(condition_oi), is.null(condition_reference)) & + scenario == "case_control"){ + stop("condition_* arguments are not provided but the 'case_control' scenario is selected. Please either provide condition_* arguments or change scenario to 'one_condition', as condition specificity cannot be calculated.") + } + + # If condition is provided but scenario is one_condition + if (!is.null(condition_colname)) + # Check that condition_colname exists in Seurat object + if (!condition_colname %in% names(seuratObj@meta.data)){ + stop(paste0("condition_colname '", condition_colname, "' does not exist in seuratObj")) + } + if(scenario == "one_condition"){ + warning("condition_* arguments are provided but the 'one_condition' scenario is selected. Only cells from the condition_oi will be used to calculate cell type specificity, condition specificity will not be calculated.") + } + + # Check that senders_oi is in Seurat object + if (!all(senders_oi %in% unique(seuratObj[[celltype_colname, drop=TRUE]]))){ + stop(paste0("Not all senders_oi exist in the seurat object")) + } + + # Check that receivers_oi is in Seurat object + if (!all(receivers_oi %in% unique(seuratObj[[celltype_colname, drop=TRUE]]))){ + stop(paste0("Not all receivers_oi exist in the seurat object")) + } + + # Check that scenario is either case_control or one_condition + if (!scenario %in% c("case_control", "one_condition")){ + stop(paste0("scenario must be either 'case_control' or 'one_condition'")) + } + + # Check that assay_oi is in seurat_obj + if (!is.null(assay_oi)){ + if (!assay_oi %in% names(seuratObj@assays)) { + stop(paste0("assay_oi '", assay_oi, "' does not exist in Seurat object")) + } + # Otherwise, use DefaultAssay + } else { + assay_oi <- DefaultAssay(seuratObj) + } + + # If condition_colname is given, give message + if (!is.null(condition_colname)) { + message("condition_* is given. Only cells from that condition will be considered in cell type specificity calculation.") + } + + # Calculate DE of ligands and receptors + DE_table <- calculate_de(seuratObj, + celltype_colname = celltype_colname, + condition_colname = condition_colname, + condition_oi = condition_oi, + features = unique(unlist(lr_network_filtered)), + ...) + + # Average expression information + expression_info <- get_exprs_avg(seuratObj, + celltype_colname = celltype_colname, + condition_colname = condition_colname, + condition_oi = condition_oi, + features = unique(unlist(lr_network_filtered)), + assay_oi = assay_oi, + ...) + + if (scenario == "case_control"){ + # Calculate condition specificity + Idents(seuratObj) <- seuratObj[[condition_colname, drop=TRUE]] + + # Default settings to return all genes with their p-val and LFC + FindMarkers_args <- list(object = seuratObj, + ident.1 = condition_oi, + ident.2 = condition_reference, + group.by = "aggregate", + assay = assay_oi, + features = unique(unlist(lr_network_filtered)), + min.pct = 0, + logfc.threshold = 0) + + # Replace this with user arguments + FindMarkers_args[names(list(...))] <- list(...) + + condition_markers <- do.call(FindMarkers, FindMarkers_args) %>% + rownames_to_column("gene") + + processed_condition_markers <- process_table_to_ic(condition_markers, + table_type = "group_DE", + lr_network_filtered) + } else { + processed_condition_markers <- NULL + } + + # Combine DE of senders and receivers -> used for prioritization + processed_DE_table <- process_table_to_ic(DE_table, + table_type = "celltype_DE", + lr_network_filtered, + senders_oi = senders_oi, + receivers_oi = receivers_oi) + + processed_expr_table <- process_table_to_ic(expression_info, + table_type = "expression", + lr_network_filtered) + + return (list(sender_receiver_de = processed_DE_table, + sender_receiver_info = processed_expr_table, + lr_condition_de = processed_condition_markers)) + +} + +#' @title Perform a prioritization of cell-cell interactions (similar to MultiNicheNet). +#' @description User can choose the importance attached to each of the following prioritization criteria: differential expression of ligand and receptor, cell-type specificity of expression of ligand and receptor, NicheNet ligand activity +#' @param sender_receiver_info Output of \code{generate_info_tables} OR \code{get_exprs_avg} -> \code{process_table_to_ic} +#' @param sender_receiver_de Output of \code{generate_info_tables} OR \code{calculate_de} -> \code{process_table_to_ic} #' @param ligand_activities Output of \code{predict_ligand_activities} -#' @param lr_condition_de Output of \code{FindMarkers} -> \code{process_table_to_ic} -#' @param prioritizing_weights Named vector indicating the relative weights of each prioritization criterion +#' @param lr_condition_de Output of \code{generate_info_tables} OR \code{FindMarkers} -> \code{process_table_to_ic} +#' @param prioritizing_weights Named vector indicating the relative weights of each prioritization criterion (default: NULL). If NULL, the weights are determined by the chosen "scenario". If provided, the vector must contain the following names: "de_ligand", "de_receptor", "activity_scaled", "exprs_ligand", "exprs_receptor", "ligand_condition_specificity", "receptor_condition_specificity" +#' @param scenario "case_control" or "one_condition": if "case_control", all weights are set to 1. If "one_condition", the weights are set to 0 for condition specificity and 1 for the remaining criteria. #' #' @return Data frames of prioritized sender-ligand-receiver-receptor interactions. #' The resulting dataframe contains columns from the input dataframes, but columns from \code{lr_condition_de} are suffixed with \code{_group} (some columns from \code{lr_condition_de} are also not present). @@ -245,39 +393,24 @@ process_table_to_ic = function(table_object, table_type = "expression", #' \item \code{activity_zscore}: z-score of the ligand activity #' \item \code{prioritization_score}: The prioritization score for each interaction, calculated as a weighted sum of the prioritization criteria. #' } -#' Moreover, \code{scaled_*} columns are scaled using the corresponding column's ranking or the \code{scale_quantile_adapted} function. +#' Moreover, \code{scaled_*} columns are scaled using the corresponding column's ranking or the \code{scale_quantile_adapted} function. The columns used for prioritization are scaled_p_val_ligand_adapted, scaled_p_val_receptor_adapted, scaled_activity, scaled_avg_exprs_ligand, scaled_avg_exprs_receptor, scaled_p_val_ligand_adapted_group, scaled_p_val_receptor_adapted_group #' #' @import dplyr #' #' @examples #' \dontrun{ -#' library(dplyr) -#' lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds")) -#' lr_network = lr_network %>% dplyr::rename(ligand = from, receptor = to) %>% dplyr::distinct(ligand, receptor) -#' ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds")) -#' seurat_obj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) -#' seurat_obj$celltype <- make.names(seuratObj$celltype) -#' sender_celltypes = c("CD4.T","Treg", "Mono", "NK", "B", "DC") -#' receiver = "CD8.T" #' -#' # Convert lr_network from mouse to human -#' lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na() -#' colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols() -#' rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols() -#' ligand_target_matrix = ligand_target_matrix %>% .[!is.na(rownames(ligand_target_matrix)), !is.na(colnames(ligand_target_matrix))] +#' # Calculate tables with generate_info_tables +#' info_tables <- generate_info_tables(seurat_obj, "celltype", sender_celltypes, receiver, expressed_ligands, expressed_receptors, lr_network, "aggregate", "LCMV", "SS") #' -#' # Ligand activity analysis -#' seurat_obj_receiver = subset(seurat_obj, idents = receiver) %>% SetIdent(value = .[["aggregate", drop = TRUE]]) -#' geneset_oi = FindMarkers(object = seurat_obj_receiver, ident.1 = "LCMV, ident.2 = "SS, min.pct = 0.10) %>% rownames_to_column("gene") %>% -#' filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] -#' expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seurat_obj, 0.10) %>% unlist() %>% unique() -#' expressed_genes_receiver = get_expressed_genes(receiver, seurat_obj, pct = 0.10) -#' expressed_ligands = intersect(lr_network %>% pull(ligand) %>% unique(), expressed_genes_sender) -#' expressed_receptors = intersect(lr_network %>% pull(receiver) %>% unique(), expressed_genes_receiver) -#' potential_ligands = lr_network %>% filter(ligand %in% expressed_ligands & receptor %in% expressed_receptors) %>% pull(from) %>% unique() -#' ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)], -#' ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +#' # Generate prioritization tables +#' generate_prioritization_tables(info_tables$sender_receiver_info, +#' info_tables$sender_receiver_de, +#' ligand_activities, +#' info_tables$lr_condition_de, +#' scenario = "case_control") #' +#' # Alternatively, these tables can also be calculated manually: #' # Calculate LCMV-specific average expression #' expression_info = get_exprs_avg(seurat_obj, "celltype", condition_oi = "LCMV", condition_colname = "aggregate") #' @@ -294,28 +427,79 @@ process_table_to_ic = function(table_object, table_type = "expression", #' senders_oi = sender_celltypes, receivers_oi = receiver) #' processed_condition_DE_table <- process_table_to_ic(condition_markers, table_type = "group_DE", lr_network) #' -#' # Generate prioritization tables -#' prioritizing_weights = c("de_ligand" = 1, "de_receptor" = 1, "activity_scaled" = 2, "exprs_ligand" = 1, "exprs_receptor" = 1, "ligand_condition_specificity" = 0, "receptor_condition_specificity" = 0) -#' generate_prioritization_tables(processed_expr_info, -#' processed_DE_table, -#' ligand_activities, -#' processed_condition_DE_table, -#' prioritizing_weights) -#'} -#' @export +#' # Custom weights +#' prioritizing_weights = c("de_ligand" = 1, "de_receptor" = 1, "activity_scaled" = 2, "exprs_ligand" = 1, "exprs_receptor" = 1, "ligand_condition_specificity" = 0.5, "receptor_condition_specificity" = 0.5) #' +#' generate_prioritization_tables(processed_expr_info,processed_DE_table, ligand_activities, processed_condition_DE_table, prioritizing_weights) #' +#'} +#' @export generate_prioritization_tables = function(sender_receiver_info, sender_receiver_de, ligand_activities, lr_condition_de = NULL, - prioritizing_weights = c("de_ligand" = 1,"de_receptor" = 1,"activity_scaled" = 2, "exprs_ligand" = 1,"exprs_receptor" = 1, - "ligand_condition_specificity" = 0, "receptor_condition_specificity"=0)){ - + prioritizing_weights = NULL, + scenario = "case_control"){ requireNamespace("dplyr") + + # If prioritizing weights is NULL, check that scenario is either case_control or one_condition + if (is.null(prioritizing_weights)){ + if (!scenario %in% c("case_control", "one_condition")){ + stop(paste0("scenario must be either 'case_control' or 'one_condition'")) + } + if(scenario == "case_control"){ + # Check if lr_condition_de is NULL + if (is.null(lr_condition_de)){ + stop("lr_condition_de is NULL. Please provide lr_condition_de or change scenario to 'one_condition'") + } + + prioritizing_weights = c("de_ligand" = 1, + "de_receptor" = 1, + "activity_scaled" = 1, + "exprs_ligand" = 1, + "exprs_receptor" = 1, + "ligand_condition_specificity" = 1, + "receptor_condition_specificity" = 1) + } + else if(scenario == "one_condition"){ + if (!is.null(lr_condition_de)){ + warning("lr_condition_de is provided but will not be used for prioritization. Change scenario to 'case_control' if condition specificity should also be considered.") + } + + prioritizing_weights = c("de_ligand" = 1, + "de_receptor" = 1, + "activity_scaled" = 1, + "exprs_ligand" = 1, + "exprs_receptor" = 1, + "ligand_condition_specificity" = 0, + "receptor_condition_specificity" = 0) + } + + } else { + # Check that prioritizing weights has correct names + weight_names <- c("de_ligand", "de_receptor", "activity_scaled", "exprs_ligand", "exprs_receptor", "ligand_condition_specificity", "receptor_condition_specificity") + if (!all(weight_names %in% names(prioritizing_weights))){ + stop(paste0("prioritizing_weights must have the following names:", weight_names)) + } + # Check that prioritizing_weights is a named vector + if (!is.vector(prioritizing_weights) | !is.numeric(prioritizing_weights)){ + stop("prioritizing_weights must be a named numeric vector") + } + + # message using custom weights + message("Using custom weights for prioritization") + scenario <- NULL + } + + # If "rank" column doesn't exist for ligand_activities, calculate rank + if (!"rank" %in% colnames(ligand_activities)){ + ligand_activities <- ligand_activities %>% dplyr::mutate(rank = rank(desc(aupr_corrected))) + } + sender_receiver_tbl = sender_receiver_de %>% dplyr::distinct(sender, receiver) # Ligand DE prioritization - sender_ligand_prioritization = sender_receiver_de %>% dplyr::ungroup() %>% dplyr::select(sender, ligand, lfc_ligand, p_val_ligand) %>% dplyr::distinct() %>% - dplyr::mutate(lfc_pval_ligand = -log10(p_val_ligand)*lfc_ligand, - p_val_ligand_adapted = -log10(p_val_ligand)*sign(lfc_ligand)) + sender_ligand_prioritization = sender_receiver_de %>% dplyr::ungroup() %>% + dplyr::select(sender, ligand, lfc_ligand, p_val_ligand) %>% dplyr::distinct() %>% + dplyr::mutate(lfc_pval_ligand = -log10(p_val_ligand)*lfc_ligand, + p_val_ligand_adapted = -log10(p_val_ligand)*sign(lfc_ligand)) sender_ligand_prioritization = sender_ligand_prioritization %>% dplyr::mutate(scaled_lfc_ligand = rank(lfc_ligand, ties.method = "average", na.last = FALSE)/max(rank(lfc_ligand, ties.method = "average", na.last = FALSE)), scaled_p_val_ligand = rank(desc(p_val_ligand), ties.method = "average", na.last = FALSE)/max(rank(desc(p_val_ligand), ties.method = "average", na.last = FALSE)), scaled_lfc_pval_ligand = rank(lfc_pval_ligand, ties.method = "average", na.last = FALSE)/max(rank(lfc_pval_ligand, ties.method = "average", na.last = FALSE)), @@ -332,7 +516,9 @@ generate_prioritization_tables = function(sender_receiver_info, sender_receiver_ scaled_p_val_receptor_adapted = rank(p_val_receptor_adapted, ties.method = "average", na.last = FALSE)/max(rank(p_val_receptor_adapted, ties.method = "average", na.last = FALSE))) %>% dplyr::arrange(-lfc_pval_receptor) # Ligand activity prioritization - ligand_activity_prioritization = ligand_activities %>% select(test_ligand, aupr_corrected, rank) %>% rename(activity=aupr_corrected, ligand=test_ligand) %>% + ligand_activity_prioritization = ligand_activities %>% + {if ('receiver' %in% colnames(.)) select(., test_ligand, aupr_corrected, rank, receiver) else select(., test_ligand, aupr_corrected, rank)} %>% + rename(activity=aupr_corrected, ligand=test_ligand) %>% dplyr::mutate(activity_zscore = nichenetr::scaling_zscore(activity), scaled_activity = scale_quantile_adapted(activity, outlier_cutoff = 0.01)) %>% dplyr::arrange(-activity_zscore) @@ -386,19 +572,17 @@ generate_prioritization_tables = function(sender_receiver_info, sender_receiver_ # have a weighted average the final score (no product!!) - sum_prioritization_weights = 2*weights["de_ligand"] + 2*weights["de_receptor"] + weights["activity_scaled"] + weights["exprs_ligand"] + weights["exprs_receptor"] + weights["ligand_condition_specificity"] + weights["receptor_condition_specificity"] + sum_prioritization_weights = weights["de_ligand"] + weights["de_receptor"] + weights["activity_scaled"] + weights["exprs_ligand"] + weights["exprs_receptor"] + weights["ligand_condition_specificity"] + weights["receptor_condition_specificity"] group_prioritization_tbl = group_prioritization_tbl %>% rowwise() %>% dplyr::mutate(prioritization_score = ( - (prioritizing_weights["de_ligand"] * ifelse("scaled_lfc_ligand" %in% names(group_prioritization_tbl), scaled_lfc_ligand, 0)) + - (prioritizing_weights["de_receptor"] * ifelse("scaled_lfc_receptor" %in% names(group_prioritization_tbl), scaled_lfc_receptor, 0)) + (prioritizing_weights["de_ligand"] * ifelse("scaled_p_val_ligand_adapted" %in% names(group_prioritization_tbl), scaled_p_val_ligand_adapted, 0)) + (prioritizing_weights["de_receptor"] * ifelse("scaled_p_val_receptor_adapted" %in% names(group_prioritization_tbl), scaled_p_val_receptor_adapted, 0)) + (prioritizing_weights["activity_scaled"] * ifelse("scaled_activity" %in% names(group_prioritization_tbl), scaled_activity, 0)) + (prioritizing_weights["exprs_ligand"] * ifelse("scaled_avg_exprs_ligand" %in% names(group_prioritization_tbl), scaled_avg_exprs_ligand, 0)) + (prioritizing_weights["exprs_receptor"] * ifelse("scaled_avg_exprs_receptor" %in% names(group_prioritization_tbl), scaled_avg_exprs_receptor, 0)) + - (prioritizing_weights["ligand_condition_specificity"] * ifelse("scaled_lfc_ligand_group" %in% names(group_prioritization_tbl), scaled_lfc_ligand_group, 0)) + - (prioritizing_weights["receptor_condition_specificity"] * ifelse("scaled_lfc_receptor_group" %in% names(group_prioritization_tbl), scaled_lfc_receptor_group, 0)) + (prioritizing_weights["ligand_condition_specificity"] * ifelse("scaled_p_val_ligand_adapted_group" %in% names(group_prioritization_tbl), scaled_p_val_ligand_adapted_group, 0)) + + (prioritizing_weights["receptor_condition_specificity"] * ifelse("scaled_p_val_receptor_adapted_group" %in% names(group_prioritization_tbl), scaled_p_val_receptor_adapted_group, 0)) )* (1/sum_prioritization_weights)) %>% dplyr::arrange(-prioritization_score) %>% ungroup() diff --git a/README.Rmd b/README.Rmd index 4f41f99..4e22ea2 100644 --- a/README.Rmd +++ b/README.Rmd @@ -25,93 +25,109 @@ rmarkdown::render("README.Rmd",output_format = "md_document") We describe the NicheNet algorithm in the following paper: [NicheNet: modeling intercellular communication by linking ligands to target genes](https://www.nature.com/articles/s41592-019-0667-5). -### Major updates (20-06-2023)! - -* MultiNicheNet - a multi-sample, multi-condition extension of NicheNet - is now available on [biorxiv](https://www.biorxiv.org/content/10.1101/2023.06.13.544751v1) and [Github](https://github.com/saeyslab/multinichenetr). -* MultiNicheNet uses an [updated prior model (v2)](https://zenodo.org/record/7074291/) consisting of additional ligand-receptor interactions from the [Omnipath database](https://omnipathdb.org/) and from [Verschueren et al. (2020)](https://www.sciencedirect.com/science/article/pii/S0092867420306942?via%3Dihub). We have now also updated the vignettes of NicheNet to use the new model instead. -* **New functionality:** we have included additional functions to prioritize ligands not only based on the ligand activity, but also on the ligand and receptor expression, cell type specificity, and condition specificity. This is similar to the criteria used in Differential NicheNet and MultiNicheNet. See the [Prioritizing ligands based on expression values](vignettes/seurat_steps_prioritization.md) vignette for more information. -* Due to this more generalizable prioritization scheme, we will no longer provide support for Differential NicheNet. -* We included code for making a ligand-receptor-target circos plot in the [Circos plot visualization](vignettes/circos.md) vignette. +## Installation of nichenetr -## Introduction to NicheNet +Installation typically takes a few minutes, depending on the number of +dependencies that has already been installed on your PC. You can install +nichenetr (and required dependencies) from github with: -The figure below shows a graphical representation of the NicheNet workflow. Interactions inferred from several complementary ligand-receptor, signaling and gene regulatory data sources were aggregated in respective integrated networks from which ligand-target regulatory potential scores were calculated. This model of prior information on potential ligand-target links can then be used to infer active ligand-target links between interacting cells. NicheNet prioritizes ligands according to their activity (i.e., how well they predict observed changes in gene expression in the receiver cell) and looks for affected targets with high potential to be regulated by these prioritized ligands. +```{r gh-installation, eval = FALSE} +if(!requireNamespace("devtools", quietly = TRUE)) { + install.packages("devtools") +} -We offer the option to use the prebuilt prior model (such that the network integration steps should not be repeated), or to create and use your own prior model (see reference to detailed vignette below). +devtools::install_github("saeyslab/nichenetr") +``` -

-![](vignettes/workflow_nichenet.jpg) -

+nichenetr was tested on both Windows and Linux (most recently tested R version: R 4.3.2) -NicheNet strongly differs from most current computational approaches to study intercellular communication. Current approaches study intercellular communication from (single-cell) expression data by linking ligands expressed by sender cells to their corresponding receptors expressed by receiver cells. However, functional understanding of a cellular communication process also requires knowing how these inferred ligand-receptor interactions result in changes in the expression of downstream target genes within the receiver cells. To address this need, we developed NicheNet. Contrary to existing approaches, NicheNet looks at gene regulatory effects of ligands because the used prior knowledge goes beyond ligand-receptor interactions and incorporates intracellular signaling and transcriptional regulation as well. As a result, NicheNet allows to predict which ligands influence the expression in another cell, which target genes are affected by each ligand and which signaling mediators may be involved. By generating these novel types of hypotheses, NicheNet can drive an improved functional understanding of a cell-cell communication process of interest. The figure below summarizes the conceptual differences between most current ligand-receptor network inference approaches (top panel) and NicheNet (bottom panel) and visualizes the power of NicheNet in prioritizing ligand-receptor interactions based on gene expression effects. +## Overview of NicheNet +
+

Background

+NicheNet strongly differs from most computational approaches to study cell-cell communication (CCC), as summarized conceptually by the figure below (**top panel:** current ligand-receptor inference approaches; **bottom panel:** NicheNet). Many approaches to study CCC from expression data involve linking ligands expressed by sender cells to their corresponding receptors expressed by receiver cells. However, functional understanding of a CCC process also requires knowing how these inferred ligand-receptor interactions result in changes in the expression of downstream target genes within the receiver cells. Therefore, we developed NicheNet to consider the gene regulatory effects of ligands.

-![](vignettes/comparison_other_approaches_2.jpg){width=450} +![](vignettes/images/comparison_other_approaches_2.jpg){width=450}

-## Main functionalities of nichenetr +At the core of NicheNet is a prior knowledge model, created by integrating three types of databases—ligand-receptor interactions, signaling pathways, and transcription factor (TF) regulation—to form a complete communication network spanning from ligands to their downstream target genes (see figure below). Therefore, this model goes beyond ligand-receptor interactions and incorporates intracellular signaling and transcriptional regulation as well. As a result, NicheNet is able to predict which ligands influence the expression in another cell, which target genes are affected by each ligand, and which signaling mediators may be involved. By generating these novel types of hypotheses, NicheNet can drive an improved functional understanding of a CCC process of interest. Note that although we provide a pre-built prior model, it is also possible to construct your own model (see vignettes below). -Specific functionalities of this package include: +![](vignettes/images/nichenet_prior_model.png){width=70%} +
-* assessing how well ligands expressed by a sender cell can predict changes in gene expression in the receiver cell -* prioritizing ligands based on their effect on gene expression -* inferring putative ligand-target links active in the system under study -* inferring potential signaling paths between ligands and target genes of interest: to generate causal hypotheses and check which data sources support the predictions -* validation of the prior ligand-target model -* construction of user-defined prior ligand-target models +
+

Main functionalities of nichenetr

+ +* Assessing how well ligands expressed by a sender cell can predict changes in gene expression in the receiver cell +* Prioritizing ligands based on their effect on gene expression +* Inferring putative ligand-target links active in the system under study +* Inferring potential signaling paths between ligands and target genes of interest: to generate causal hypotheses and check which data sources support the predictions +* Validation of the prior ligand-target model +* Construction of user-defined prior ligand-target models Moreover, we provide instructions on how to make intuitive visualizations of the main predictions (e.g., via circos plots as shown here below).

-![](vignettes/circos_plot_adapted.jpg) +![](vignettes/images/circos_plot_adapted.jpg){width=600} -## Installation of nichenetr +
-Installation typically takes a few minutes, depending on the number of -dependencies that has already been installed on your pc. You can install -nichenetr (and required dependencies) from github with: +As input to NicheNet, users must provide cell type-annotated expression data that reflects a cell-cell communication (CCC) event. The input can be single-cell or sorted bulk data from human or mouse. As output, NicheNet returns the ranking of ligands that best explain the CCC event of interest, as well as candidate target genes with high potential to be regulated by these ligands. As an intermediate step, we extract the three features required for the analysis: a list of potential ligands, a gene set that captures the downstream effects of the CCC event of interest, and a background set of genes. Further explanation on each feature can be found in the introductory vignette. + +![](vignettes/images/figure1.svg) +

-```{r gh-installation, eval = FALSE} -# install.packages("devtools") -devtools::install_github("saeyslab/nichenetr") -``` -nichenetr was tested on both Windows and Linux (most recently tested R version: R 4.0.0) ## Learning to use nichenetr -To learn using nichenetr, read one of the following vignettes explaining several types of analyses: +The following vignettes contain the explanation on how to perform a basic NicheNet analysis on a Seurat object. This includes prioritizing ligands and predicting target genes of prioritized ligands. We recommend starting with the step-by-step analysis, but we also demonstrate the use of a single wrapper function. This demo analysis takes only a few minutes to run. -Following vignette contains the explanation on how to perform a basic NicheNet analysis. This includes prioritizing ligands and predicting target genes of prioritized ligands. This demo analysis takes only a few minutes to run: +* [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](vignettes/seurat_steps.md):`vignette("seurat_steps", package="nichenetr")` +* [Perform NicheNet analysis starting from a Seurat object](vignettes/seurat_wrapper.md):`vignette("seurat_wrapper", package="nichenetr")` -* [NicheNet's ligand activity analysis on a gene set of interest: predict active ligands and their target genes](vignettes/ligand_activity_geneset.md): `vignette("ligand_activity_geneset", package="nichenetr")` +Case study on HNSCC tumor which demonstrates the flexibility of NicheNet. Here, the gene set of interest was determined by the original authors, and the expression data is a matrix rather than a Seurat object. -To facilitate the use of NicheNet on single-cell data, we demonstrate the use of NicheNet on a Seurat object in following vignettes. One demonstrates the use of a single wrapper function, the other demonstrates what's behind the wrapper (recommended). +* [NicheNet's ligand activity analysis on a gene set of interest](vignettes/ligand_activity_geneset.md): `vignette("ligand_activity_geneset", package="nichenetr")` -* [Perform NicheNet analysis starting from a Seurat object](vignettes/seurat_wrapper.md):`vignette("seurat_wrapper", package="nichenetr")` -* [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](vignettes/seurat_steps.md):`vignette("seurat_steps", package="nichenetr")` -Following vignettes contain explanation on how to do some follow-up analyses after performing the most basic analysis: +The following vignettes contain explanation on how to do some follow-up analyses after performing the most basic analysis: * [Prioritization of ligands based on expression values](vignettes/seurat_steps_prioritization.md): `vignette("seurat_steps_prioritization", package="nichenetr")` * [Inferring ligand-to-target signaling paths](vignettes/ligand_target_signaling_path.md): `vignette("ligand_target_signaling_path", package="nichenetr")` * [Assess how well top-ranked ligands can predict a gene set of interest](vignettes/target_prediction_evaluation_geneset.md): `vignette("target_prediction_evaluation_geneset", package="nichenetr")` * [Single-cell NicheNet's ligand activity analysis](vignettes/ligand_activity_single_cell.md): `vignette("ligand_activity_single_cell", package="nichenetr")` -If you want to make a circos plot visualization of the NicheNet output, you can check following vignettes: +If you want to make a circos plot visualization of the NicheNet output to show active ligand-target links between interacting cells, you can check following vignettes: -* [Circos plot visualization to show active ligand-target links between interacting cells](vignettes/circos.md):`vignette("circos", package="nichenetr")`. -* [Seurat Wrapper + Circos visualization](vignettes/seurat_wrapper_circos.md):`vignette("seurat_wrapper_circos", package="nichenetr")`. +* [Seurat Wrapper + circos visualization](vignettes/seurat_wrapper_circos.md):`vignette("seurat_wrapper_circos", package="nichenetr")`. +* [HNSCC case study + double circos visualization](vignettes/circos.md):`vignette("circos", package="nichenetr")`. -People interested in building own models or benchmark own models against NicheNet can read one of the following vignettes: +People interested in building their own models or benchmarking their own models against NicheNet can read one of the following vignettes: * [Model construction](vignettes/model_construction.md): `vignette("model_construction", package="nichenetr")` +* [Using LIANA ligand-receptor databases to construct the ligand-target model](vignettes/model_construction_with_liana.md): `vignette("model_construction_with_liana", package="nichenetr")` * [Model evaluation: target gene and ligand activity prediction](vignettes/model_evaluation.md): `vignette("model_evaluation", package="nichenetr")` -* [Parameter optimization via mlrMBO](vignettes/parameter_optimization.md): `vignette("parameter_optimization", package="nichenetr")` +* [Parameter optimization via NSGAII-R](vignettes/parameter_optimization.md): `vignette("parameter_optimization", package="nichenetr")` -##### Deprecated vignettes -Differential NicheNet has been deprecated; you may want to consider using the [general prioritization scheme](vignettes/seurat_steps_prioritization.md) instead. +## FAQ + +Check the FAQ page at [FAQ NicheNet](vignettes/faq.md): `vignette("faq", package="nichenetr")` + +
+

Previous updates

+ +**20-06-2023:** + +* MultiNicheNet - a multi-sample, multi-condition extension of NicheNet - is now available on [biorxiv](https://www.biorxiv.org/content/10.1101/2023.06.13.544751v1) and [Github](https://github.com/saeyslab/multinichenetr). +* MultiNicheNet uses an [updated prior model (v2)](https://zenodo.org/record/7074291/) consisting of additional ligand-receptor interactions from the [Omnipath database](https://omnipathdb.org/) and from [Verschueren et al. (2020)](https://www.sciencedirect.com/science/article/pii/S0092867420306942?via%3Dihub). We have now also updated the vignettes of NicheNet to use the new model instead. +* **New functionality:** we have included additional functions to prioritize ligands not only based on the ligand activity, but also on the ligand and receptor expression, cell type specificity, and condition specificity. This is similar to the criteria used in Differential NicheNet and MultiNicheNet. See the [Prioritizing ligands based on expression values](vignettes/seurat_steps_prioritization.md) vignette for more information. +* Due to this more generalizable prioritization scheme, we will no longer provide support for Differential NicheNet. +* We included code for making a ligand-receptor-target circos plot in the [Circos plot visualization](vignettes/circos.md) vignette. + +
Deprecated vignettes
+Differential NicheNet has been deprecated: we will not longer provide support or code fixes on Differential NicheNet and its vignettes. You may want to consider using the [general prioritization scheme](vignettes/seurat_steps_prioritization.md) instead. * [Differential NicheNet analysis between niches of interest](vignettes/differential_nichenet.md):`vignette("differential_nichenet", package="nichenetr")` * [Differential NicheNet analysis between conditions of interest](vignettes/differential_nichenet_pEMT.md):`vignette("differential_nichenet_pEMT", package="nichenetr")` @@ -120,11 +136,6 @@ In NicheNet v2, the mouse and human ligand-target models are uploaded separately * [Converting NicheNet's model from human to mouse symbols](vignettes/symbol_conversion.md): `vignette("symbol_conversion", package="nichenetr")` -## FAQ - -Check the FAQ page at [FAQ NicheNet](vignettes/faq.md): `vignette("faq", package="nichenetr")` - -## Previous updates **12-01-2022:** In the Liver Atlas paper from Guilliams et al.: [Spatial proteogenomics reveals distinct and evolutionarily conserved hepatic macrophage niches](https://www.sciencedirect.com/science/article/pii/S0092867421014811), we used Differential NicheNet, an extension to the default NicheNet algorithm. **Differential NicheNet** can be used to compare cell-cell interactions between different niches and better predict niche-specific ligand-receptor (L-R) pairs. It was used in that paper to predict ligand-receptor pairs specific for the Kupffer cell niche in mouse and human. @@ -134,6 +145,7 @@ So if you have data of multiple conditions or niches, and you want to include di **15-10-2019:** Bonnardel, T'Jonck et al. used NicheNet to predict upstream niche signals driving Kupffer cell differentiation [Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1). +
## References diff --git a/README.md b/README.md index 0b5a021..9b447b2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ - -This vignette shows how NicheNet can be used to predict active -ligand-target links between multiple interacting cells and how you can -make a circos plot to summarize the top-predicted links (via the -circlize package). This vignette starts in the same way as the main, -basis, NicheNet vignette [NicheNet’s ligand activity analysis on a gene -set of interest: predict active ligands and their target -genes](ligand_activity_geneset.md):`vignette("ligand_activity_geneset", package="nichenetr")`. -Make sure you understand the different steps described in that vignette -before proceeding with this vignette. In contrast to the basic vignette, -we will look communication between multiple cell types. More -specifically, we will predict which ligands expressed by both CAFs and -endothelial cells can induce the p-EMT program in neighboring malignant -cells (See Puram et al. 2017). - -### Load packages required for this vignette +This vignette shows how to visualize the output of a NicheNet analysis +in a circos plot. This is an additional demonstration to [Seurat +Wrapper + circos visualization](seurat_wrapper_circos.md). Here, we will +use a normal matrix as input instead of a Seurat object. + +We will use the same dataset as from [NicheNet’s ligand activity +analysis on a gene set of interest](ligand_activity_geneset.md) Puram et al. (2017). In +contrast to the basic vignette, we will look at communication between +multiple cell types. More specifically, we will predict which ligands +expressed by both CAFs and endothelial cells can induce the p-EMT +program in neighboring malignant cells. + +### Load packages ``` r library(nichenetr) @@ -30,187 +27,213 @@ library(tidyverse) library(circlize) ``` -### Read in expression data of interacting cells - -First, we will read in the publicly available single-cell data from -CAFs, endothelial cells and malignant cells from HNSCC tumors. +### Read in NicheNet’s networks ``` r -hnscc_expression = readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) -expression = hnscc_expression$expression -sample_info = hnscc_expression$sample_info # contains meta-information about the cells +lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) + +lr_network <- lr_network %>% distinct(from, to) ``` -Secondly, we will determine which genes are expressed in CAFs, -endothelial and malignant cells from high quality primary tumors. -Therefore, we wil not consider cells from tumor samples of less quality -or from lymph node metastases. To determine expressed genes, we use the -definition used by of Puram et al. +### Read in the expression data of interacting cells -``` r -tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") +This is publicly available single-cell data from CAF and malignant cells +from HNSCC tumors. -CAF_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `non-cancer cell type` == "CAF") %>% pull(cell) -endothelial_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `non-cancer cell type` == "Endothelial") %>% pull(cell) -malignant_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `classified as cancer cell` == 1) %>% pull(cell) +``` r +hnscc_expression <- readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) +expression <- hnscc_expression$expression +sample_info <- hnscc_expression$sample_info # contains meta-information about the cells -expressed_genes_CAFs = expression[CAF_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() -expressed_genes_endothelial = expression[endothelial_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() -expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() +# Convert gene names +colnames(expression) <- convert_alias_to_symbols(colnames(expression), "human", verbose = FALSE) ``` -### Load the ligand-target model we want to use +### Define a set of potential ligands ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## A2M AANAT ABCA1 ACE ACE2 -## A-GAMMA3'E 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 -## A1BG 0.0018503922 0.0011108718 0.0014225077 0.0028594037 0.001139013 -## A1BG-AS1 0.0007400797 0.0004677614 0.0005193137 0.0007836698 0.000375007 -## A1CF 0.0024799266 0.0013026348 0.0020420890 0.0047921048 0.003273375 -## A2M 0.0084693452 0.0040689323 0.0064256379 0.0105191365 0.005719199 -``` +tumors_remove <- c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") -### Load the gene set of interest and background of genes +CAF_ids <- sample_info %>% + filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & + `non-cancer cell type` == "CAF") %>% pull(cell) +endothelial_ids <- sample_info %>% + filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & + `non-cancer cell type` == "Endothelial") %>% pull(cell) +malignant_ids <- sample_info %>% filter(`Lymph node` == 0 & + !(tumor %in% tumors_remove) & + `classified as cancer cell` == 1) %>% pull(cell) -As gene set of interest, we consider the genes of which the expression -is possibly affected due to communication with other cells. -Because we here want to investigate how CAFs and endothelial cells -regulate the expression of p-EMT genes in malignant cells, we will use -the p-EMT gene set defined by Puram et al. as gene set of interest and -use all genes expressed in malignant cells as background of genes. +# Define expressed genes in CAFs, endothelial cells and malignant cells +expressed_genes_CAFs <- expression[CAF_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% + names() -``` r -pemt_geneset = readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), col_names = "gene") %>% pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] # only consider genes also present in the NicheNet model - this excludes genes from the gene list for which the official HGNC symbol was not used by Puram et al. -head(pemt_geneset) -## [1] "SERPINE1" "TGFBI" "MMP10" "LAMC2" "P4HA2" "PDPN" +expressed_genes_endothelial <- expression[endothelial_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% + names() -background_expressed_genes = expressed_genes_malignant %>% .[. %in% rownames(ligand_target_matrix)] -head(background_expressed_genes) -## [1] "RPS11" "ELMO2" "PNMA1" "MMP2" "TMEM216" "ERCC5" -``` +expressed_genes_malignant <- expression[malignant_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% + names() -### Perform NicheNet’s ligand activity analysis on the gene set of interest +# Define expressed ligands and receptors +ligands <- lr_network %>% pull(from) %>% unique() +expressed_ligands_CAFs <- intersect(ligands,expressed_genes_CAFs) +expressed_ligands_endothelial <- intersect(ligands,expressed_genes_endothelial) +expressed_ligands <- union(expressed_ligands_CAFs, expressed_genes_endothelial) -In a first step, we will define a set of potentially active ligands. As -potentially active ligands, we will use ligands that are 1) expressed by -CAFs and/or endothelial cells and 2) can bind a (putative) receptor -expressed by malignant cells. Putative ligand-receptor links were -gathered from NicheNet’s ligand-receptor data sources. +receptors <- lr_network %>% pull(to) %>% unique() +expressed_receptors <- intersect(receptors,expressed_genes_malignant) -Note that we combine the ligands from CAFs and endothelial cells in one -ligand activity analysis now. Later on, we will look which of the -top-ranked ligands is mainly expressed by which of both cell types. +# Define potential ligands +potential_ligands <- lr_network %>% + filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% + pull(from) %>% unique() +``` -``` r -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +### Define the gene set of interest and background of genes -ligands = lr_network %>% pull(from) %>% unique() -expressed_ligands_CAFs = intersect(ligands,expressed_genes_CAFs) -expressed_ligands_endothelial = intersect(ligands,expressed_genes_endothelial) -expressed_ligands = union(expressed_ligands_CAFs, expressed_genes_endothelial) +We will use the p-EMT gene set defined by Puram et al. as gene set of +interest and use all genes expressed in malignant cells as background of +genes. -receptors = lr_network %>% pull(to) %>% unique() -expressed_receptors = intersect(receptors,expressed_genes_malignant) +``` r +pemt_geneset <- readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), + col_names = "gene") %>% + pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] -potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() -head(potential_ligands) -## [1] "A2M" "ACE" "ADAM10" "ADAM12" "ADAM15" "ADAM17" +background_expressed_genes <- expressed_genes_malignant %>% + .[. %in% rownames(ligand_target_matrix)] ``` -Now perform the ligand activity analysis: infer how well NicheNet’s -ligand-target potential scores can predict whether a gene belongs to the -p-EMT program or not. +### Perform NicheNet’s ligand activity analysis + +With the ligand activity analysis, we assess how well each potential +ligand can predict the p-EMT gene set compared to the background of +expressed genes. Note that we combine the ligands from CAFs and +endothelial cells in one ligand activity analysis now. Later on, we will +look which of the top-ranked ligands is mainly expressed by which of +both cell types. ``` r -ligand_activities = predict_ligand_activities(geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +ligand_activities <- predict_ligand_activities(geneset = pemt_geneset, + background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix, + potential_ligands = potential_ligands) ``` -Now, we want to rank the ligands based on their ligand activity. In our -validation study, we showed that the AUPR between a ligand’s target -predictions and the observed transcriptional response was the most -informative measure to define ligand activity. Therefore, we will rank -the ligands based on their AUPR. We will choose the top 20 ligands -here - as opposed to the top 30 in the main vignette - to avoid -overcrowding the circos plot. +Now, we want to rank the ligands based on their ligand activity (AUPR). +We will choose the top 20 ligands here - as opposed to the top 30 in the +main vignette - to avoid overcrowding the circos plot. ``` r ligand_activities %>% arrange(-aupr_corrected) -## # A tibble: 232 × 5 +## # A tibble: 242 × 5 ## test_ligand auroc aupr aupr_corrected pearson ## -## 1 TGFB2 0.768 0.123 0.107 0.199 -## 2 CXCL12 0.708 0.0884 0.0721 0.144 -## 3 BMP8A 0.770 0.0880 0.0718 0.177 -## 4 INHBA 0.773 0.0866 0.0703 0.124 -## 5 GDF3 0.758 0.0817 0.0654 0.156 -## 6 LTBP1 0.722 0.0785 0.0622 0.163 -## 7 ACE 0.711 0.0780 0.0617 0.151 -## 8 TNXB 0.713 0.0737 0.0574 0.158 -## 9 ENG 0.759 0.0732 0.0569 0.157 -## 10 BMP5 0.745 0.0715 0.0552 0.150 -## # … with 222 more rows -best_upstream_ligands = ligand_activities %>% top_n(20, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) +## 1 TGFB2 0.772 0.120 0.105 0.195 +## 2 BMP8A 0.774 0.0852 0.0699 0.175 +## 3 INHBA 0.777 0.0837 0.0685 0.122 +## 4 CXCL12 0.714 0.0829 0.0676 0.141 +## 5 GDF3 0.763 0.0788 0.0635 0.154 +## 6 LTBP1 0.727 0.0762 0.0609 0.160 +## 7 ACE 0.717 0.0740 0.0587 0.146 +## 8 CCN2 0.736 0.0734 0.0581 0.141 +## 9 TNXB 0.719 0.0717 0.0564 0.157 +## 10 ENG 0.764 0.0703 0.0551 0.145 +## # ℹ 232 more rows +best_upstream_ligands <- ligand_activities %>% + top_n(20, aupr_corrected) %>% + arrange(-aupr_corrected) %>% + pull(test_ligand) + head(best_upstream_ligands) -## [1] "TGFB2" "CXCL12" "BMP8A" "INHBA" "GDF3" "LTBP1" +## [1] "TGFB2" "BMP8A" "INHBA" "CXCL12" "GDF3" "LTBP1" ``` -We see here that the top-ranked ligands can predict the p-EMT genes -reasonably, this implies that ranking of the ligands might be accurate -as shown in our study. However, it is possible that for some gene sets, -the target gene prediction performance of the top-ranked ligands would -not be much better than random prediction. In that case, prioritization -of ligands will be less trustworthy. - Determine now which prioritized ligands are expressed by CAFs and or -endothelial cells +endothelial cells. ``` r -best_upstream_ligands %>% intersect(expressed_ligands_CAFs) -## [1] "TGFB2" "CXCL12" "BMP8A" "INHBA" "LTBP1" "TNXB" "ENG" "BMP5" "VCAN" "HGF" "COMP" "COL3A1" "MMP14" "COL4A1" "FBN1" "TIMP2" "COL5A1" -best_upstream_ligands %>% intersect(expressed_ligands_endothelial) -## [1] "CXCL12" "GDF3" "LTBP1" "ACE" "TNXB" "ENG" "VCAN" "HGF" "COL4A1" "FBN1" "TIMP2" "EDN1" - -# lot of overlap between both cell types in terms of expressed ligands -# therefore, determine which ligands are more strongly expressed in which of the two -ligand_expression_tbl = tibble( - ligand = best_upstream_ligands, - CAF = expression[CAF_ids,best_upstream_ligands] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}), - endothelial = expression[endothelial_ids,best_upstream_ligands] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)})) - -CAF_specific_ligands = ligand_expression_tbl %>% filter(CAF > endothelial + 2) %>% pull(ligand) -endothelial_specific_ligands = ligand_expression_tbl %>% filter(endothelial > CAF + 2) %>% pull(ligand) -general_ligands = setdiff(best_upstream_ligands,c(CAF_specific_ligands,endothelial_specific_ligands)) +# There is a lot of overlap between both cell types in terms of expressed ligands +intersect(best_upstream_ligands %>% intersect(expressed_ligands_CAFs), + best_upstream_ligands %>% intersect(expressed_ligands_endothelial)) +## [1] "CXCL12" "LTBP1" "CCN2" "TNXB" "ENG" "VCAN" "CCN3" "COL4A1" "HGF" "TIMP2" "FBN1" -ligand_type_indication_df = tibble( +# Therefore, determine which ligands are more strongly expressed in which of the two +# Calculate average expression of each ligand in CAFs and endothelial cells +ligand_expression_tbl <- tibble( + ligand = best_upstream_ligands, + CAF = expression[CAF_ids,best_upstream_ligands] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}), + endothelial = expression[endothelial_ids,best_upstream_ligands] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)})) + +head(ligand_expression_tbl) +## # A tibble: 6 × 3 +## ligand CAF endothelial +## +## 1 TGFB2 4.62 0.722 +## 2 BMP8A 4.66 1.71 +## 3 INHBA 8.14 0.423 +## 4 CXCL12 11.0 6.56 +## 5 GDF3 0 4.98 +## 6 LTBP1 7.53 4.43 + +# Assign ligand to a cell type based on which cell type has much higher average expression +# If ligand is not expressed more highly enough in either cell type, assign to general category +CAF_specific_ligands <- ligand_expression_tbl %>% filter(CAF > endothelial + 2) %>% pull(ligand) +endothelial_specific_ligands <- ligand_expression_tbl %>% filter(endothelial > CAF + 2) %>% pull(ligand) +general_ligands <- setdiff(best_upstream_ligands,c(CAF_specific_ligands,endothelial_specific_ligands)) + +ligand_type_indication_df <- tibble( ligand_type = c(rep("CAF-specific", times = CAF_specific_ligands %>% length()), rep("General", times = general_ligands %>% length()), rep("Endothelial-specific", times = endothelial_specific_ligands %>% length())), ligand = c(CAF_specific_ligands, general_ligands, endothelial_specific_ligands)) + +head(ligand_type_indication_df) +## # A tibble: 6 × 2 +## ligand_type ligand +## +## 1 CAF-specific TGFB2 +## 2 CAF-specific BMP8A +## 3 CAF-specific INHBA +## 4 CAF-specific CXCL12 +## 5 CAF-specific LTBP1 +## 6 CAF-specific BMP5 ``` ### Infer target genes of top-ranked ligands and visualize in a circos plot -Now we will show how you can look at the regulatory potential scores -between ligands and target genes of interest. In this case, we will look -at links between top-ranked p-EMT-regulating ligands and p-EMT genes. In -this example, inferred target genes should belong to the p-EMT gene set -and to the 250 most strongly predicted targets of at least one of the -selected top-ranked ligands (the top 250 targets according to the -general prior model, so not the top 250 targets for this dataset). - -Get first the active ligand-target links by looking which of the p-EMT +First, get the active ligand-target links by looking which of the p-EMT genes are among the top-predicted target genes for the prioritized ligands: ``` r -active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = pemt_geneset, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows() +active_ligand_target_links_df <- best_upstream_ligands %>% + lapply(get_weighted_ligand_target_links, + geneset = pemt_geneset, + ligand_target_matrix = ligand_target_matrix, + n = 250) %>% bind_rows() -active_ligand_target_links_df = active_ligand_target_links_df %>% mutate(target_type = "p_emt") %>% inner_join(ligand_type_indication_df) # if you want ot make circos plots for multiple gene sets, combine the different data frames and differentiate which target belongs to which gene set via the target type +active_ligand_target_links_df <- active_ligand_target_links_df %>% + mutate(target_type = "p_emt") ``` +Note that you can make a circos plot for multiple gene sets by combining +different dataframes and differentiating which target belongs to which +gene set via the “target type” column. + To avoid making a circos plots with too many ligand-target links, we will show only links with a weight higher than a predefined cutoff: links belonging to the 66% of lowest scores were removed. Not that this @@ -218,263 +241,142 @@ cutoffs and other cutoffs used for this visualization can be changed according to the user’s needs. ``` r -cutoff_include_all_ligands = active_ligand_target_links_df$weight %>% quantile(0.66) - -active_ligand_target_links_df_circos = active_ligand_target_links_df %>% filter(weight > cutoff_include_all_ligands) - -ligands_to_remove = setdiff(active_ligand_target_links_df$ligand %>% unique(), active_ligand_target_links_df_circos$ligand %>% unique()) -targets_to_remove = setdiff(active_ligand_target_links_df$target %>% unique(), active_ligand_target_links_df_circos$target %>% unique()) - -circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove) +circos_links <- get_ligand_target_links_oi(ligand_type_indication_df, + active_ligand_target_links_df, + cutoff = 0.66) ``` -Prepare the circos visualization: give each segment of ligands and -targets a specific color and order +Prepare the circos visualization by giving each segment of ligands and +targets a specific color and order, as well as gaps between different +cell types. By default, cell types are ordered alphabetically, followed +by “General” (then they are drawn counter-clockwise). Users can give a +specific order to the cell types by providing a vector of cell types to +the argument `celltype_order`. The gaps between the different segments +can also be defined by providing a named list to the argument `widths`. ``` r -grid_col_ligand =c("General" = "lawngreen", - "CAF-specific" = "royalblue", - "Endothelial-specific" = "gold") -grid_col_target =c( - "p_emt" = "tomato") - -grid_col_tbl_ligand = tibble(ligand_type = grid_col_ligand %>% names(), color_ligand_type = grid_col_ligand) -grid_col_tbl_target = tibble(target_type = grid_col_target %>% names(), color_target_type = grid_col_target) - -circos_links = circos_links %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as target! -circos_links = circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_target) -links_circle = circos_links %>% select(ligand,target, weight) +ligand_colors <- c("General" = "lawngreen", + "CAF-specific" = "royalblue", + "Endothelial-specific" = "gold") +target_colors <- c("p_emt" = "tomato") -ligand_color = circos_links %>% distinct(ligand,color_ligand_type) -grid_ligand_color = ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) -target_color = circos_links %>% distinct(target,color_target_type) -grid_target_color = target_color$color_target_type %>% set_names(target_color$target) -grid_col =c(grid_ligand_color,grid_target_color) - -# give the option that links in the circos plot will be transparant ~ ligand-target potential score -transparency = circos_links %>% mutate(weight =(weight-min(weight))/(max(weight)-min(weight))) %>% mutate(transparency = 1-weight) %>% .$transparency +vis_circos_obj <- prepare_circos_visualization(circos_links, + ligand_colors = ligand_colors, + target_colors = target_colors, + celltype_order = NULL) ``` -Prepare the circos visualization: order ligands and targets +Render the circos plot where all links have the same transparency. Here, +only the widths of the blocks that indicate each target gene is +proportional the ligand-target regulatory potential (~prior knowledge +supporting the regulatory interaction). ``` r -target_order = circos_links$target %>% unique() -ligand_order = c(CAF_specific_ligands,general_ligands,endothelial_specific_ligands) %>% c(paste(.," ")) %>% intersect(circos_links$ligand) -order = c(ligand_order,target_order) +make_circos_plot(vis_circos_obj, transparency = FALSE, args.circos.text = list(cex = 0.5)) ``` -Prepare the circos visualization: define the gaps between the different -segments +![](circos_files/figure-gfm/ligand-target-circos-1.png) + +Render the circos plot where the degree of transparency determined by +the regulatory potential value of a ligand-target interaction. ``` r -width_same_cell_same_ligand_type = 0.5 -width_different_cell = 6 -width_ligand_target = 15 -width_same_cell_same_target_type = 0.5 - -gaps = c( - # width_ligand_target, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "CAF-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Endothelial-specific") %>% distinct(ligand) %>% nrow() -1)), - width_ligand_target, - rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_type == "p_emt") %>% distinct(target) %>% nrow() -1)), - width_ligand_target - ) +make_circos_plot(vis_circos_obj, transparency = TRUE, args.circos.text = list(cex = 0.5)) ``` -Render the circos plot (all links same transparancy). Only the widths of -the blocks that indicate each target gene is proportional the -ligand-target regulatory potential (\~prior knowledge supporting the -regulatory interaction). +![](circos_files/figure-gfm/ligand-target-circos-transparent-1.png) + +To create a legend for the circos plot, we can use the +`ComplexHeatmap::Legend` function and creating a gTree object from it +with `grid::grid.grabExpr`. As the circos plot is drawn on base R +graphics (i.e., it is not a ggplot object), we will get the plot using +`recordPlot()`. ``` r -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) -``` +par(bg = "transparent") -![](circos_files/figure-gfm/unnamed-chunk-94-1.png) +# Default celltype order +celltype_order <- unique(circos_links$ligand_type) %>% sort() %>% .[. != "General"] %>% c(., "General") -``` r -circos.clear() -``` +# Create legend +circos_legend <- ComplexHeatmap::Legend( + labels = celltype_order, + background = ligand_colors[celltype_order], + type = "point", + grid_height = unit(3, "mm"), + grid_width = unit(3, "mm"), + labels_gp = grid::gpar(fontsize = 8) + ) -Render the circos plot (degree of transparancy determined by the -regulatory potential value of a ligand-target interaction) +circos_legend_grob <- grid::grid.grabExpr(ComplexHeatmap::draw(circos_legend)) -``` r -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) # +make_circos_plot(vis_circos_obj, transparency = TRUE, args.circos.text = list(cex = 0.5)) +p_circos_no_legend <- recordPlot() ``` -![](circos_files/figure-gfm/unnamed-chunk-95-1.png) +We can combine the circos plot and the legend using +`cowplot::plot_grid`. ``` r -circos.clear() +cowplot::plot_grid(p_circos_no_legend, circos_legend_grob, rel_widths = c(1, 0.1)) ``` -Save circos plot to an svg file +![](circos_files/figure-gfm/ligand-target-circos-with-legend-1.png) + +We can save this plot to an svg file. ``` r svg("ligand_target_circos.svg", width = 10, height = 10) -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) # -circos.clear() +cowplot::plot_grid(p_circos_no_legend, circos_legend_grob, rel_widths = c(1, 0.1)) dev.off() -## png -## 2 ``` ### Visualize ligand-receptor interactions of the prioritized ligands in a circos plot -``` r -# get the ligand-receptor network of the top-ranked ligands -lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) -best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() - -# get the weights of the ligand-receptor interactions as used in the NicheNet model -weighted_networks = readRDS(url("https://zenodo.org/record/3260758/files/weighted_networks.rds")) -lr_network_top_df = weighted_networks$lr_sig %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) %>% rename(ligand = from, receptor = to) - -lr_network_top_df = lr_network_top_df %>% mutate(receptor_type = "p_emt_receptor") %>% inner_join(ligand_type_indication_df) -``` +To create a ligand-receptor chord diagram, we can perform similar steps +as above using the weighted ligand-receptor dataframe instead. However, +as as `prepare_circos_visualization` accesses “target” and “target_type” +columns, it is necessary to rename the columns accordingly even though +the dataframe contains receptor and not target gene information. ``` r -grid_col_ligand =c("General" = "lawngreen", - "CAF-specific" = "royalblue", - "Endothelial-specific" = "gold") -grid_col_receptor =c( - "p_emt_receptor" = "darkred") +lr_network_top_df <- get_weighted_ligand_receptor_links(best_upstream_ligands, + expressed_receptors, + lr_network, + weighted_networks$lr_sig) %>% + rename(ligand=from, target=to) %>% + mutate(target_type = "p_emt_receptor") %>% + inner_join(ligand_type_indication_df) -grid_col_tbl_ligand = tibble(ligand_type = grid_col_ligand %>% names(), color_ligand_type = grid_col_ligand) -grid_col_tbl_receptor = tibble(receptor_type = grid_col_receptor %>% names(), color_receptor_type = grid_col_receptor) +receptor_colors <- c("p_emt_receptor" = "darkred") -circos_links = lr_network_top_df %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as receptor! -circos_links = circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_receptor) -links_circle = circos_links %>% select(ligand,receptor, weight) - -ligand_color = circos_links %>% distinct(ligand,color_ligand_type) -grid_ligand_color = ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) -receptor_color = circos_links %>% distinct(receptor,color_receptor_type) -grid_receptor_color = receptor_color$color_receptor_type %>% set_names(receptor_color$receptor) - -grid_col =c(grid_ligand_color,grid_receptor_color) - -# give the option that links in the circos plot will be transparant ~ ligand-receptor potential score -transparency = circos_links %>% mutate(weight =(weight-min(weight))/(max(weight)-min(weight))) %>% mutate(transparency = 1-weight) %>% .$transparency +vis_circos_receptor_obj <- prepare_circos_visualization(lr_network_top_df, + ligand_colors = ligand_colors, + target_colors = receptor_colors) ``` -Prepare the circos visualization: order ligands and receptors +When drawing the plot, the argument `link.visible` = TRUE is also +necessary for making all links visible, since no cutoff is used to +filter out ligand-receptor interactions. ``` r -receptor_order = circos_links$receptor %>% unique() -ligand_order = c(CAF_specific_ligands,general_ligands,endothelial_specific_ligands) %>% c(paste(.," ")) %>% intersect(circos_links$ligand) -order = c(ligand_order,receptor_order) +make_circos_plot(vis_circos_receptor_obj, transparency = FALSE, + link.visible = TRUE, args.circos.text = list(cex = 0.8)) ``` -Prepare the circos visualization: define the gaps between the different -segments - -``` r -width_same_cell_same_ligand_type = 0.5 -width_different_cell = 6 -width_ligand_receptor = 15 -width_same_cell_same_receptor_type = 0.5 - -gaps = c( - # width_ligand_receptor, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "CAF-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Endothelial-specific") %>% distinct(ligand) %>% nrow() -1)), - width_ligand_receptor, - rep(width_same_cell_same_receptor_type, times = (circos_links %>% filter(receptor_type == "p_emt_receptor") %>% distinct(receptor) %>% nrow() -1)), - width_ligand_receptor - ) -``` +![](circos_files/figure-gfm/ligand-receptor-circos-1.png) -Render the circos plot (all links same transparancy). Only the widths of -the blocks that indicate each receptor is proportional the -ligand-receptor interaction weight (\~prior knowledge supporting the -interaction). +Just as above, if `transparency = TRUE`, the degree of transparency is +determined by the prior interaction weight of the ligand-receptor +interaction. ``` r -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1, order=order, link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) -}, bg.border = NA) # +make_circos_plot(vis_circos_receptor_obj, transparency = TRUE, + link.visible = TRUE, args.circos.text = list(cex = 0.8)) ``` -![](circos_files/figure-gfm/unnamed-chunk-101-1.png) - -``` r -circos.clear() -``` - -Render the circos plot (degree of transparancy determined by the prior -interaction weight of the ligand-receptor interaction - just as the -widths of the blocks indicating each receptor) - -``` r -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) -}, bg.border = NA) # -``` - -![](circos_files/figure-gfm/unnamed-chunk-102-1.png) - -``` r -circos.clear() -``` - -Save circos plot to an svg file - -``` r -svg("ligand_receptor_circos.svg", width = 15, height = 15) -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) -}, bg.border = NA) # -circos.clear() -dev.off() -## png -## 2 -``` +![](circos_files/figure-gfm/ligand-receptor-circos-transparent-1.png) ### Adding an outer track to the circos plot (ligand-receptor-target circos plot) @@ -500,38 +402,20 @@ but gives you the flexibility to draw receptor arcs of different lengths, while the `highlight.sector` function is constrained to the widths of the targets in the inner track. -First, let’s rerun a code chunk from above to redefine `circos_links` -and the color scheme. - -``` r -circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove) - -grid_col_ligand =c("General" = "lawngreen", - "CAF-specific" = "royalblue", - "Endothelial-specific" = "gold") -grid_col_target =c( - "p_emt" = "tomato") - -grid_col_tbl_ligand = tibble(ligand_type = grid_col_ligand %>% names(), color_ligand_type = grid_col_ligand) -grid_col_tbl_target = tibble(target_type = grid_col_target %>% names(), color_target_type = grid_col_target) - -circos_links = circos_links %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as target! -circos_links = circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_target) -links_circle = circos_links %>% select(ligand, target, weight) - -ligand_color = circos_links %>% distinct(ligand,color_ligand_type) -grid_ligand_color = ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) -target_color = circos_links %>% distinct(target,color_target_type) -grid_target_color = target_color$color_target_type %>% set_names(target_color$target) - -grid_col = c(grid_ligand_color,grid_target_color) - -ligand_order = c(CAF_specific_ligands,general_ligands,endothelial_specific_ligands) %>% c(paste(.," ")) %>% intersect(circos_links$ligand) -``` - #### Using the `draw.sector` function -For demonstration purposes, we will use a subset of ligands and divide them into four groups that differ in the signaling pathways they interact with. Then, we assign targets and receptors to each ligand group based on their relative rankings (and summed weights in case the rankings are the same). The way we assigned targets and receptors to ligand groups here does not always lead to the most biologically meaningful results, as you will see in the final plot. Hence, it is best if you curate this list manually, e.g., through prior knowledge and by also looking at the ligand-target and ligand-receptor heatmap. Also keep in mind that there is no real correspondence between receptors and targets, as explained more [here](https://github.com/saeyslab/nichenetr/issues/20#issuecomment-611601039). +For demonstration purposes, we will use a subset of ligands and divide +them into four groups that differ in the signaling pathways they +interact with. Then, we assign targets and receptors to each ligand +group based on their relative rankings (and summed weights in case the +rankings are the same). The way we assigned targets and receptors to +ligand groups here does not always lead to the most biologically +meaningful results, as you will see in the final plot. Hence, it is best +if you curate this list manually, e.g., through prior knowledge and by +also looking at the ligand-target and ligand-receptor heatmap. Also keep +in mind that there is no real correspondence between receptors and +targets, as explained more +[here](https://github.com/saeyslab/nichenetr/issues/20#issuecomment-611601039). ``` r groups <- list(group1 = c("TGFB2", "ENG"), @@ -574,8 +458,9 @@ targets_filtered <- targets %>% group_by(target) %>% filter(avg_rank == min(avg_ # Do the same for receptors receptor_colors <- c("#387D7A", "#9DA9A0", "#DD7373", "#725752") %>% setNames(names(groups)) + receptors <- lapply(names(groups), function(i) { - weighted_networks$lr_sig %>% filter(from %in% groups[[i]] & to %in% best_upstream_receptors) %>% + weighted_networks$lr_sig %>% filter(from %in% groups[[i]] & to %in% unique(lr_network_top_df$target)) %>% group_by(from) %>% mutate(receptor_rank = dense_rank(desc(weight))) %>% group_by(to) %>% summarise(summed_weight=sum(weight), avg_weight=mean(weight), avg_rank = mean(receptor_rank)) %>% mutate(type=i, color = receptor_colors[i]) %>% rename(receptor = to) @@ -590,77 +475,69 @@ lapply(paste0("group", 1:5), function(group_name) setdiff(receptors %>% filter(t ## character(0) ## ## [[3]] -## [1] "CD47" "ITGA2" "ITGA3" "ITGB5" "ITGB6" "ITGB8" +## [1] "ADAM9" "CD47" "DDR1" "ITGA2" "ITGA3" "ITGB5" "ITGB6" "ITGB8" "ST14" ## ## [[4]] -## [1] "BDKRB2" +## [1] "ACKR3" ## ## [[5]] ## character(0) # Assign receptor to a specific group -receptors_filtered <- receptors %>% group_by(receptor) %>% filter(avg_rank == min(avg_rank)) %>% +receptors_filtered <- receptors %>% group_by(receptor) %>% filter(avg_rank == min(avg_rank)) %>% filter(summed_weight == max(summed_weight)) %>% ungroup() ``` -We will then have to redefine some variables. - -``` r -# Filter out targets and ligands that are no longer present -links_circle_approach1 <- links_circle %>% filter(ligand %in% paste0(unlist(groups), " "), - target %in% targets_filtered$target) - -order <- c(ligand_order %>%.[. %in% paste0(unlist(groups), " ")], targets_filtered$target) - -# Redefine gaps between sectors -width_same_cell_same_ligand_type = 0.6 -width_different_cell = 4.5 -width_ligand_target = 12 -width_same_cell_same_target_type = 0.6 # Added -width_different_target = 4.5 # Added - -group_widths <- sapply(paste0("group", 1:4), function(group) { - # Gap between targets of the same group - paste0(rep(width_same_cell_same_target_type, times =(targets_filtered %>% filter(type==group) %>% nrow)-1), collapse=",")}) %>% - # Separate this with gap of different group - paste0(., collapse=paste0(",",width_different_target,",")) %>% - str_split(., ",") %>% .[[1]] %>% as.numeric - -gaps = c( - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "CAF-specific", - ligand %in% paste0(unlist(groups), " "), - target %in% targets_filtered$target) %>% - distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General", - ligand %in% paste0(unlist(groups), " "), - target %in% targets_filtered$target) %>% - distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Endothelial-specific", - ligand %in% paste0(unlist(groups), " "), - target %in% targets_filtered$target) %>% - distinct(ligand) %>% nrow() -1)), - width_ligand_target, - group_widths, - width_ligand_target - ) - -gaps = gaps %>% .[!is.na(.)] -``` - -Finally, we create the plot. What’s different here is we add an extra -layer in `preAllocateTracks`, and we add a `for` loop at the end to draw -the outer layer. As mentioned previously, the resulting plot will -require some further manual tweaking (e.g., ITG receptors from group 1 -should be moved to group 3). - -``` r -circos.par(gap.degree = gaps) - -chordDiagram(links_circle_approach1, order=order, transparency=0, directional = 1, link.sort = TRUE, link.decreasing = FALSE, - grid.col = grid_col, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"), - link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands, annotationTrack = "grid", +Next, run the same functions as the plots before, after filtering the +ligand assignment dataframe and the ligand-target links dataframe. We +also add the group information to the targets + +``` r + +circos_links_subset <- get_ligand_target_links_oi( + ligand_type_indication_df %>% filter(ligand %in% unlist(groups)), + active_ligand_target_links_df %>% filter(ligand %in% unlist(groups)) %>% + # Add group information + inner_join(targets_filtered, by="target") %>% + select(-target_type, n, summed_weight, avg_rank) %>% + rename(target_type=type), + cutoff = 0.66) + +ligand_colors <- c("General" = "lawngreen", + "CAF-specific" = "royalblue", + "Endothelial-specific" = "gold") +target_colors <- rep("tomato", 4) %>% setNames(names(groups)) + +# Define specific gaps +vis_circos_obj_subset <- prepare_circos_visualization(circos_links_subset, + ligand_colors = ligand_colors, + target_colors = target_colors, + widths = list(width_same_cell_same_ligand_type = 0.6, + width_different_cell = 4.5, + width_ligand_target = 12, + width_same_cell_same_target_type = 0.6)) +``` + +Finally, we create the plot by adding an extra layer in +`preAllocateTracks`, and we add a `for` loop at the end to draw the +outer layer. + +``` r +circos.par(gap.degree = vis_circos_obj_subset$gaps) + +chordDiagram(vis_circos_obj_subset$links_circle, + order=vis_circos_obj_subset$order, + transparency=0, + directional = 1, + link.sort = TRUE, + link.decreasing = FALSE, + grid.col = vis_circos_obj_subset$ligand_colors, + diffHeight = 0.005, + direction.type = c("diffHeight", "arrows"), + link.arr.type = "big.arrow", + link.visible = vis_circos_obj_subset$links_circle$weight >= + attr(vis_circos_obj_subset$links_circle, "cutoff_include_all_ligands"), + annotationTrack = "grid", # Add extra track for outer layer preAllocateTracks = list(list(track.height = 0.025), list(track.height = 0.25))) @@ -689,7 +566,8 @@ for (group in unique(targets_filtered$type)){ theta2 <- circlize:::get.sector.data((targets_subset %>% pull(target) %>% .[length(.)]))["end.degree"] # Scale the arc lengths according to the summed ligand-receptor weights - receptors_subset_scaled <- receptors_subset %>% mutate(scaled_weight = summed_weight/sum(summed_weight)*(theta1-theta2)) + receptors_subset_scaled <- receptors_subset %>% + mutate(scaled_weight = summed_weight/sum(summed_weight)*(theta1-theta2)) # For each receptor current_theta <- theta1 for (i in 1:nrow(receptors_subset_scaled)){ @@ -716,14 +594,16 @@ for (group in unique(targets_filtered$type)){ } ``` -![](circos_files/figure-gfm/unnamed-chunk-107-1.png) +![](circos_files/figure-gfm/double-circos-method-1-1.png) ``` r - circos.clear() ``` +As mentioned previously, the resulting plot will require some further +manual tweaking (e.g., ITGA5 should be in group 3). + #### Using the `highlight.sector` function With this function, it is not possible to draw receptor arcs that end at @@ -732,52 +612,40 @@ randomly assign the target genes into one of three groups (Receptors A, B, and C). ``` r -target_gene_groups <- sample(c("Receptor A", "Receptor B", "Receptor C"), length(unique(circos_links$target)), replace = TRUE) %>% - setNames(unique(circos_links$target)) -target_gene_groups -## ACTN1 C1S COL17A1 COL1A1 COL4A2 F3 FSTL3 IGFBP3 ITGA5 LAMC2 MFAP2 MMP2 MYH9 PDLIM7 -## "Receptor B" "Receptor A" "Receptor A" "Receptor B" "Receptor C" "Receptor C" "Receptor B" "Receptor C" "Receptor C" "Receptor C" "Receptor B" "Receptor C" "Receptor C" "Receptor C" -## PSMD2 PTHLH SERPINE1 SERPINE2 TAGLN TGFBI TNC TPM1 MMP1 MMP10 MT2A PRSS23 SLC31A2 THBS1 -## "Receptor A" "Receptor B" "Receptor C" "Receptor A" "Receptor C" "Receptor A" "Receptor C" "Receptor B" "Receptor C" "Receptor B" "Receptor C" "Receptor C" "Receptor C" "Receptor B" -## TPM4 APP COL5A2 DKK3 GJA1 HTRA1 PLAU SEMA3C VIM CAV1 ITGA6 MAGED1 MAGED2 PLOD2 -## "Receptor A" "Receptor B" "Receptor B" "Receptor B" "Receptor C" "Receptor B" "Receptor C" "Receptor C" "Receptor B" "Receptor A" "Receptor B" "Receptor A" "Receptor B" "Receptor A" -## SLC39A14 FSTL1 LGALS1 P4HA2 IL32 FHL2 ITGB1 -## "Receptor B" "Receptor C" "Receptor C" "Receptor C" "Receptor C" "Receptor B" "Receptor C" - -target_gene_group_colors <- c("#387D7A", "#9DA9A0", "#704E2E") %>% setNames(unique(target_gene_groups)) +set.seed(10) +target_gene_groups <- data.frame(target_type = sample(c("Receptor A", "Receptor B", "Receptor C"), + length(unique(circos_links$target)), replace = TRUE), + target = unique(circos_links$target)) +head(target_gene_groups) +## target_type target +## 1 Receptor C ACTN1 +## 2 Receptor A C1S +## 3 Receptor B COL17A1 +## 4 Receptor C COL1A1 +## 5 Receptor B COL4A2 +## 6 Receptor C F3 ``` -Again, we will redefine some variables. +Again, define the colors of the receptors (outer layer) and targets +(inner layer). Then, run `prepare_circos_visualization` to get the order +of the targets and the gaps between them. ``` r -# Order targets according to receptor they belong to -order = c(ligand_order, target_gene_groups %>% sort %>% names) - -# Redefine gaps between sectors -width_same_cell_same_ligand_type = 0.6 -width_different_cell = 4.5 -width_ligand_target = 12 -width_same_cell_same_target_type = 0.6 # Added -width_different_target = 4.5 # Added - -# Add this to circos_links -circos_links = circos_links %>% mutate(target_receptor = target_gene_groups[target]) - -gaps = c( - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "CAF-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Endothelial-specific") %>% distinct(ligand) %>% nrow() -1)), - width_ligand_target, - # Add code to define gaps between different target groups - rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_receptor == "Receptor A") %>% distinct(target) %>% nrow() -1)), - width_different_target, - rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_receptor == "Receptor B") %>% distinct(target) %>% nrow() -1)), - width_different_target, - rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_receptor == "Receptor C") %>% distinct(target) %>% nrow() -1)), - width_ligand_target - ) + +receptor_group_colors <- c("#387D7A", "#9DA9A0", "#704E2E") %>% + setNames(unique(target_gene_groups$target_type)) + +target_colors <- rep("tomato", 3) %>% setNames(unique(target_gene_groups$target_type)) + +vis_circos_obj2 <- prepare_circos_visualization(circos_links %>% select(-target_type) %>% + # Combine target gene group information to circos_links + inner_join(target_gene_groups, by="target"), + ligand_colors = ligand_colors, + target_colors = target_colors, + widths = list(width_same_cell_same_ligand_type = 0.6, + width_different_cell = 4.5, + width_ligand_target = 12, + width_same_cell_same_target_type = 0.6)) ``` The general idea here is similar - adding an extra layer in @@ -785,10 +653,20 @@ The general idea here is similar - adding an extra layer in layer - but the function allows for much cleaner code. ``` r -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1, transparency=0, order=order,link.sort = TRUE, link.decreasing = FALSE, - grid.col = grid_col, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"), - link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", +circos.par(gap.degree = vis_circos_obj2$gaps) +chordDiagram(vis_circos_obj2$links_circle, + directional = 1, + transparency=0, + order=vis_circos_obj2$order, + link.sort = TRUE, + link.decreasing = FALSE, + grid.col = vis_circos_obj2$ligand_colors, + diffHeight = 0.005, + direction.type = c("diffHeight", "arrows"), + link.arr.type = "big.arrow", + link.visible = vis_circos_obj2$links_circle$weight >= + attr(vis_circos_obj2$links_circle, "cutoff_include_all_ligands"), + annotationTrack = "grid", # Add extra track for outer layer preAllocateTracks = list(list(track.height = 0.025), list(track.height = 0.2))) @@ -800,16 +678,16 @@ circos.track(track.index = 2, panel.fun = function(x, y) { }, bg.border = NA) # # Add outer layer -for (target_gene_group in unique(target_gene_groups)){ - highlight.sector(target_gene_groups %>% .[. == target_gene_group] %>% names, +for (target_gene_group in unique(target_gene_groups$target_type)){ + highlight.sector(target_gene_groups %>% filter(target_type == target_gene_group) %>% pull(target), track.index = 1, - col = target_gene_group_colors[target_gene_group], + col = receptor_group_colors[target_gene_group], text = target_gene_group, cex = 0.8, facing="bending.inside", niceFacing = TRUE, text.vjust = "5mm") } ``` -![](circos_files/figure-gfm/unnamed-chunk-110-1.png) +![](circos_files/figure-gfm/double-circos-method-2-1.png) ``` r diff --git a/vignettes/circos_files/figure-gfm/double-circos-method-1-1.png b/vignettes/circos_files/figure-gfm/double-circos-method-1-1.png new file mode 100644 index 0000000..5b8763e Binary files /dev/null and b/vignettes/circos_files/figure-gfm/double-circos-method-1-1.png differ diff --git a/vignettes/circos_files/figure-gfm/double-circos-method-2-1.png b/vignettes/circos_files/figure-gfm/double-circos-method-2-1.png new file mode 100644 index 0000000..f8942c1 Binary files /dev/null and b/vignettes/circos_files/figure-gfm/double-circos-method-2-1.png differ diff --git a/vignettes/circos_files/figure-gfm/ligand-receptor-circos-1.png b/vignettes/circos_files/figure-gfm/ligand-receptor-circos-1.png new file mode 100644 index 0000000..bd72d5e Binary files /dev/null and b/vignettes/circos_files/figure-gfm/ligand-receptor-circos-1.png differ diff --git a/vignettes/circos_files/figure-gfm/ligand-receptor-circos-transparent-1.png b/vignettes/circos_files/figure-gfm/ligand-receptor-circos-transparent-1.png new file mode 100644 index 0000000..55e07bf Binary files /dev/null and b/vignettes/circos_files/figure-gfm/ligand-receptor-circos-transparent-1.png differ diff --git a/vignettes/circos_files/figure-gfm/ligand-target-circos-1.png b/vignettes/circos_files/figure-gfm/ligand-target-circos-1.png new file mode 100644 index 0000000..5912ea4 Binary files /dev/null and b/vignettes/circos_files/figure-gfm/ligand-target-circos-1.png differ diff --git a/vignettes/circos_files/figure-gfm/ligand-target-circos-transparent-1.png b/vignettes/circos_files/figure-gfm/ligand-target-circos-transparent-1.png new file mode 100644 index 0000000..a395e32 Binary files /dev/null and b/vignettes/circos_files/figure-gfm/ligand-target-circos-transparent-1.png differ diff --git a/vignettes/circos_files/figure-gfm/ligand-target-circos-unused-1.png b/vignettes/circos_files/figure-gfm/ligand-target-circos-unused-1.png new file mode 100644 index 0000000..14fd407 Binary files /dev/null and b/vignettes/circos_files/figure-gfm/ligand-target-circos-unused-1.png differ diff --git a/vignettes/circos_files/figure-gfm/ligand-target-circos-with-legend-1.png b/vignettes/circos_files/figure-gfm/ligand-target-circos-with-legend-1.png new file mode 100644 index 0000000..df29211 Binary files /dev/null and b/vignettes/circos_files/figure-gfm/ligand-target-circos-with-legend-1.png differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-101-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-101-1.png deleted file mode 100644 index 90e6ebd..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-101-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-102-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-102-1.png deleted file mode 100644 index 738b22b..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-102-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-107-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-107-1.png deleted file mode 100644 index 592c468..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-107-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-110-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-110-1.png deleted file mode 100644 index 1dce9c9..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-110-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-94-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-94-1.png deleted file mode 100644 index 41fca5e..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-94-1.png and /dev/null differ diff --git a/vignettes/circos_files/figure-gfm/unnamed-chunk-95-1.png b/vignettes/circos_files/figure-gfm/unnamed-chunk-95-1.png deleted file mode 100644 index f949f34..0000000 Binary files a/vignettes/circos_files/figure-gfm/unnamed-chunk-95-1.png and /dev/null differ diff --git a/vignettes/comparison_other_approaches_2.png b/vignettes/comparison_other_approaches_2.png deleted file mode 100644 index 2130db1..0000000 Binary files a/vignettes/comparison_other_approaches_2.png and /dev/null differ diff --git a/vignettes/circos_plot_adapted.jpg b/vignettes/images/circos_plot_adapted.jpg similarity index 100% rename from vignettes/circos_plot_adapted.jpg rename to vignettes/images/circos_plot_adapted.jpg diff --git a/vignettes/circos_plot_adapted.png b/vignettes/images/circos_plot_adapted.png similarity index 100% rename from vignettes/circos_plot_adapted.png rename to vignettes/images/circos_plot_adapted.png diff --git a/vignettes/comparison_other_approaches_2.jpg b/vignettes/images/comparison_other_approaches_2.jpg similarity index 100% rename from vignettes/comparison_other_approaches_2.jpg rename to vignettes/images/comparison_other_approaches_2.jpg diff --git a/vignettes/images/figure1.svg b/vignettes/images/figure1.svg new file mode 100644 index 0000000..61c71d6 --- /dev/null +++ b/vignettes/images/figure1.svg @@ -0,0 +1,3895 @@ + +image/svg+xmlLigandactivityanalysis:prioritizeligandsaccordingtohowwellitspatterninthepriormodelcorrespondstotheobservedenrichmentofthegenesetofinterestcomparedtobackgroundTargetgeneprediction:inferwhichgenesfromthegenesetaretoptargetsoftheprioritizedligandsLigand X regulatory potential in prior modelArea under theprecision-recall curve(AUPR)Background gene setGene set of interest0101GenesCell type-annotatedgene expression matrixFeature extractionNicheNetLigandrankingsa.Potential ligandsb.Gene set of interestc.Background gene setTarget genesAUPRLigand XLigand YLigand Z...GenesRegulatory potentialInputOutputCreated by Rean-mefrom the Noun ProjectCreated by Rean-mefrom the Noun ProjectCreated by Rean-mefrom the Noun ProjectCreated by Rean-mefrom the Noun ProjectCreated by Rean-mefrom the Noun ProjectCreated by Rean-mefrom the Noun ProjectCreated by Rean-mefrom the Noun ProjectSender(s)ReceiverForeachreceivercelltype,extractthefollowingthreefeatures:a.Potentialligands:ligandswhosecognatereceptorsareexpressedinthereceiver;canbefurtherfilteredbasedonexpressioninsendercelltype(s)b.Genesetofinterest:genesthatrepresenttheCCCeventofinterestinthereceiver;usuallydifferentiallyexpressedgenesbetweentwoconditionsc.Backgroundgeneset:allexpressedgenesinthereceiverReceiverObserved expression pattern diff --git a/vignettes/images/figure2.svg b/vignettes/images/figure2.svg new file mode 100644 index 0000000..e79f226 --- /dev/null +++ b/vignettes/images/figure2.svg @@ -0,0 +1 @@ +Are the sender cell population(s) and their secreted ligands captured in the dataset?Sender-agnostic approach:all ligands in LR databasewhose cognate receptors areexpressed in the receiverSender-focusedapproach:ligands expressed in sender(s)whose cognate receptorsare expressed in the receiverDoes the dataset have two or more conditions?NicheNet analysisnot suitableYesNoYesNoPotentialligandsGene setof interestBackgroundgene setDE genes betweenrelevant cell (sub)typesAll genes in the genomeDE genes incondition of interestwithin the receiverAll expressed genesin the receiverFor each receiver cell populationYesNo /I don’t knowIs the CCCeventof interest one of the following?a)Cell Differentiation:differences between aprogenitoranddifferentiatedcelltypeb)Cell Localization:differences between cell subtypes located in different niches or areas \ No newline at end of file diff --git a/vignettes/ligand_activity_prediction_workflow_new.png b/vignettes/images/ligand_activity_prediction_workflow_new.png similarity index 100% rename from vignettes/ligand_activity_prediction_workflow_new.png rename to vignettes/images/ligand_activity_prediction_workflow_new.png diff --git a/vignettes/images/nichenet_prior_model.png b/vignettes/images/nichenet_prior_model.png new file mode 100644 index 0000000..0a8e8af Binary files /dev/null and b/vignettes/images/nichenet_prior_model.png differ diff --git a/vignettes/workflow_model_construction.png b/vignettes/images/workflow_model_construction.png similarity index 100% rename from vignettes/workflow_model_construction.png rename to vignettes/images/workflow_model_construction.png diff --git a/vignettes/workflow_nichenet.jpg b/vignettes/images/workflow_nichenet.jpg similarity index 100% rename from vignettes/workflow_nichenet.jpg rename to vignettes/images/workflow_nichenet.jpg diff --git a/vignettes/workflow_nichenet.png b/vignettes/images/workflow_nichenet.png similarity index 100% rename from vignettes/workflow_nichenet.png rename to vignettes/images/workflow_nichenet.png diff --git a/vignettes/ligand_activity_geneset.Rmd b/vignettes/ligand_activity_geneset.Rmd index e9708f3..dd522b6 100644 --- a/vignettes/ligand_activity_geneset.Rmd +++ b/vignettes/ligand_activity_geneset.Rmd @@ -23,246 +23,216 @@ knitr::opts_chunk$set( ) ``` -In this vignette, you can learn how to perform a basic NicheNet analysis. A NicheNet analysis can help you to generate hypotheses about an intercellular communication process of interest for which you have bulk or single-cell gene expression data. Specifically, NicheNet can predict 1) which ligands from one cell population ("sender/niche") are most likely to affect target gene expression in an interacting cell population ("receiver/target") and 2) which specific target genes are affected by which of these predicted ligands. +This vignette follows the steps described in [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md) with two major differences: a predefined gene set of interest is given, and a different definition of expressed genes. -Because NicheNet studies how ligands affect gene expression in neighboring cells, you need to have data about this effect in gene expression you want to study. So, you need to have a clear set of genes that are putatively affected by ligands from one of more interacting cells. - -The pipeline of a basic NicheNet analysis consist mainly of the following steps: - -* 1. Define a “sender/niche” cell population and a “receiver/target” cell population present in your expression data and determine which genes are expressed in both populations -* 2. Define a gene set of interest: these are the genes in the “receiver/target” cell population that are potentially affected by ligands expressed by interacting cells (e.g. genes differentially expressed upon cell-cell interaction) -* 3. Define a set of potential ligands: these are ligands that are expressed by the “sender/niche” cell population and bind a (putative) receptor expressed by the “receiver/target” population -* 4) Perform NicheNet ligand activity analysis: rank the potential ligands based on the presence of their target genes in the gene set of interest (compared to the background set of genes) -* 5) Infer top-predicted target genes of ligands that are top-ranked in the ligand activity analysis - -This vignette guides you in detail through all these steps. As example expression data of interacting cells, we will use data from Puram et al. to explore intercellular communication in the tumor microenvironment in head and neck squamous cell carcinoma (HNSCC) [See @puram_single-cell_2017]. More specifically, we will look at which ligands expressed by cancer-associated fibroblasts (CAFs) can induce a specific gene program in neighboring malignant cells. This program, a partial epithelial-mesenschymal transition (p-EMT) program, could be linked to metastasis by Puram et al. +Here, we use explore intercellular communication in the tumor microenvironment of head and neck squamous cell carcinoma (HNSCC) [@puram_single-cell_2017]. More specifically, we will look at which ligands expressed by cancer-associated fibroblasts (CAFs) can induce a specific gene program in neighboring malignant cells. The original authors of the study have linked this partial epithelial-mesenschymal transition (p-EMT) program to metastasis. The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and example [expression data](https://doi.org/10.5281/zenodo.3260758) of interacting cells can be downloaded from Zenodo. -## Step 0: Load required packages, NicheNet's ligand-target prior model and processed expression data of interacting cells +# Prepare NicheNet analysis -Packages: +### Load packages ```{r} library(nichenetr) library(tidyverse) ``` -Ligand-target model: +### Read in NicheNet's networks -This model denotes the prior potential that a particular ligand might regulate the expression of a specific target gene. -In Nichenet v2, networks and matrices for both mouse and human are made separately [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7074291.svg)](https://doi.org/10.5281/zenodo.7074291). +The ligand-target prior model, ligand-receptor network, and weighted integrated networks are needed for this vignette. The ligand-target prior model is a matrix describing the potential that a ligand may regulate a target gene, and it is used to run the ligand activity analysis. The ligand-receptor network contains information on potential ligand-receptor bindings, and it is used to identify potential ligands. Finally, the weighted ligand-receptor network contains weights representing the potential that a ligand will bind to a receptor, and it is used for visualization. ```{r} -options(timeout = 600) -organism = "human" +organism <- "human" if(organism == "human"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) } else if(organism == "mouse"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) } -lr_network = lr_network %>% distinct(from, to) +lr_network <- lr_network %>% distinct(from, to) +head(lr_network) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns + +head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network +head(weighted_networks$gr) # interactions and their weights in the gene regulatory network + ``` +### Read in the expression data of interacting cells -Expression data of interacting cells: publicly available single-cell data from CAF and malignant cells from HNSCC tumors: +This is publicly available single-cell data from CAF and malignant cells from HNSCC tumors. ```{r} -hnscc_expression = readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) -expression = hnscc_expression$expression -sample_info = hnscc_expression$sample_info # contains meta-information about the cells +hnscc_expression <- readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) +expression <- hnscc_expression$expression +sample_info <- hnscc_expression$sample_info # contains meta-information about the cells ``` -Because the NicheNet 2.0. networks are in the most recent version of the official gene symbols, we will make sure that the gene symbols used in the expression data are also updated (= converted from their “aliases” to official gene symbols). +Because the NicheNet v2 networks are in the most recent version of the official gene symbols, we will make sure that the gene symbols used in the expression data are also updated (= converted from their “aliases” to official gene symbols). ```{r} # If this is not done, there will be 35 genes fewer in lr_network_expressed! -colnames(expression) = convert_alias_to_symbols(colnames(expression), "human", verbose = FALSE) +colnames(expression) <- convert_alias_to_symbols(colnames(expression), "human", verbose = FALSE) ``` -## Step 1: Define expressed genes in sender and receiver cell populations +## 1. Define a set of potential ligands -Our research question is to prioritize which ligands expressed by CAFs can induce p-EMT in neighboring malignant cells. Therefore, CAFs are the sender cells in this example and malignant cells are the receiver cells. This is an example of paracrine signaling. Note that autocrine signaling can be considered if sender and receiver cell type are the same. +Our research question is to prioritize which ligands expressed by CAFs can induce p-EMT in neighboring malignant cells. Hence, we will only use on the **sender-focused** approach, with CAFs as senders and malignant cells as receivers. -Now, we will determine which genes are expressed in the sender cells (CAFs) and receiver cells (malignant cells) from high quality primary tumors. Therefore, we wil not consider cells from tumor samples of less quality or from lymph node metastases. +The set of potential ligands is defined as ligands that are expressed in sender cells whose cognate receptors are also expressed in receiver cells. -To determine expressed genes in this case study, we use the definition used by Puram et al. (the authors of this dataset), which is: Ea, the aggregate expression of each gene i across the k cells, calculated as Ea(i) = log2(average(TPM(i)1…k)+1), should be >= 4. We recommend users to define expressed genes in the way that they consider to be most appropriate for their dataset. For single-cell data generated by the 10x platform in our lab, we don't use the definition used here, but we consider genes to be expressed in a cell type when they have non-zero values in at least 10% of the cells from that cell type. This is described as well in the other vignette [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md):`vignette("seurat_steps", package="nichenetr")`. +So first, we will determine which genes are expressed in the sender cells (CAFs) and receiver cells (malignant cells). We will only consider samples from high quality primary tumors and also remove samples from lymph node metastases. We will use the definition of expressed genes by the original authors, that is, the aggregate expression of each gene $i$ across the $k$ cells, calculated as $E_a(i) = log_{2}(average(TPM(i)1…k)+1)$, should be >= 4. + +We recommend users to define expressed genes in the way that they consider to be most appropriate for their dataset. For single-cell data generated by the 10x platform in our lab, we consider genes to be expressed in a cell type when they have non-zero values in a certain fraction of the cells from that cell type (usually 10%). This is used in the vignette [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md). ```{r} -tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") +tumors_remove <- c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") + +CAF_ids <- sample_info %>% + filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & + `non-cancer cell type` == "CAF") %>% pull(cell) +malignant_ids <- sample_info %>% filter(`Lymph node` == 0 & + !(tumor %in% tumors_remove) & + `classified as cancer cell` == 1) %>% pull(cell) -CAF_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `non-cancer cell type` == "CAF") %>% pull(cell) -malignant_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `classified as cancer cell` == 1) %>% pull(cell) +expressed_genes_sender <- expression[CAF_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% + names() -expressed_genes_sender = expression[CAF_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() -expressed_genes_receiver = expression[malignant_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() +expressed_genes_receiver <- expression[malignant_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% + names() -# Check the number of expressed genes: should be a 'reasonable' number of total expressed genes in a cell type, e.g. between 5000-10000 (and not 500 or 20000) length(expressed_genes_sender) length(expressed_genes_receiver) ``` -## Step 2: Define the gene set of interest and a background of genes - -As gene set of interest, we consider the genes of which the expression is possibly affected due to communication with other cells. The definition of this gene set depends on your research question and is a crucial step in the use of NicheNet. - -Because we here want to investigate how CAFs regulate the expression of p-EMT genes in malignant cells, we will use the p-EMT gene set defined by Puram et al. as gene set of interest and use all genes expressed in malignant cells as background of genes. - -```{r} -geneset_oi = readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), col_names = "gene") %>% pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] # only consider genes also present in the NicheNet model - this excludes genes from the gene list for which the official HGNC symbol was not used by Puram et al. -head(geneset_oi) - -background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] -head(background_expressed_genes) -``` - -## Step 3: Define a set of potential ligands - -As potentially active ligands, we will use ligands that are 1) expressed by CAFs and 2) can bind a (putative) receptor expressed by malignant cells. Putative ligand-receptor links were gathered from NicheNet's ligand-receptor data sources. +Now, we can filter the expressed ligands and receptors to only those that putatively bind together. This information is stored in NicheNet's ligand-receptor network by gathering various data sources. ```{r} -# If wanted, users can remove ligand-receptor interactions that were predicted based on protein-protein interactions and only keep ligand-receptor interactions that are described in curated databases. To do this: uncomment following line of code: -# lr_network = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") +ligands <- lr_network %>% pull(from) %>% unique() +expressed_ligands <- intersect(ligands,expressed_genes_sender) -ligands = lr_network %>% pull(from) %>% unique() -expressed_ligands = intersect(ligands,expressed_genes_sender) +receptors <- lr_network %>% pull(to) %>% unique() +expressed_receptors <- intersect(receptors,expressed_genes_receiver) -receptors = lr_network %>% pull(to) %>% unique() -expressed_receptors = intersect(receptors,expressed_genes_receiver) +potential_ligands <- lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% + pull(from) %>% unique() -lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) -head(lr_network_expressed) -``` - -This ligand-receptor network contains the expressed ligand-receptor interactions. As potentially active ligands for the NicheNet analysis, we will consider the ligands from this network. - -```{r} -potential_ligands = lr_network_expressed %>% pull(from) %>% unique() head(potential_ligands) ``` +## 2. Define the gene set of interest and a background of genes -## Step 4: Perform NicheNet's ligand activity analysis on the gene set of interest +The gene set of interest consists of genes for which the expression is possibly affected due to communication with other cells. The definition of this gene set depends on your research question and is a crucial step in the use of NicheNet. -Now perform the ligand activity analysis: in this analysis, we will calculate the ligand activity of each ligand, or in other words, we will assess how well each CAF-ligand can predict the p-EMT gene set compared to the background of expressed genes (predict whether a gene belongs to the p-EMT program or not). +Here, we will use the p-EMT gene set defined by the original authors as gene set of interest to investigate how CAFs can induce p-EMT in malignant cells. ```{r} -ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -``` +# Only consider genes also present in the NicheNet model - this excludes genes from the gene list for which the official HGNC symbol was not used by Puram et al. +geneset_oi <- readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), + col_names = "gene") %>% + pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] -Now, we want to rank the ligands based on their ligand activity. In our validation study, we showed that the area under the precision-recall curve (AUPR) between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity (this was the Pearson correlation for v1). Therefore, we will rank the ligands based on their AUPR. This allows us to prioritize p-EMT-regulating ligands. - -```{r} -ligand_activities %>% arrange(-aupr_corrected) -best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) -head(best_upstream_ligands) +length(geneset_oi) ``` -We see here that the performance metrics indicate that the 30 top-ranked ligands can predict the p-EMT genes reasonably, this implies that ranking of the ligands might be accurate as shown in our study. However, it is possible that for some gene sets, the target gene prediction performance of the top-ranked ligands would not be much better than random prediction. In that case, prioritization of ligands will be less trustworthy. +## 3. Define background genes -Additional note: we looked at the top 30 ligands here and will continue the analysis by inferring p-EMT target genes of these 30 ligands. However, the choice of looking only at the 30 top-ranked ligands for further biological interpretation is based on biological intuition and is quite arbitrary. Therefore, users can decide to continue the analysis with a different number of ligands. We recommend to check the selected cutoff by looking at the distribution of the ligand activity values. Here, we show the ligand activity histogram (the score for the 30th ligand is indicated via the dashed line). +We will all genes expressed in malignant cells as the background set. ```{r} -# show histogram of ligand activity scores -p_hist_lig_activity = ggplot(ligand_activities, aes(x=aupr_corrected)) + - geom_histogram(color="black", fill="darkorange") + - # geom_density(alpha=.1, fill="orange") + - geom_vline(aes(xintercept=min(ligand_activities %>% top_n(30, aupr_corrected) %>% pull(aupr_corrected))), color="red", linetype="dashed", size=1) + - labs(x="ligand activity (PCC)", y = "# ligands") + - theme_classic() -p_hist_lig_activity -``` +background_expressed_genes <- expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] +length(background_expressed_genes) +``` -## Step 5: Infer target genes of top-ranked ligands and visualize in a heatmap +## 4. Perform NicheNet ligand activity analysis -Now we will show how you can look at the regulatory potential scores between ligands and target genes of interest. In this case, we will look at links between top-ranked p-EMT regulating ligands and p-EMT genes. In the ligand-target heatmaps, we show here regulatory potential scores for interactions between the 20 top-ranked ligands and following target genes: genes that belong to the gene set of interest and to the 250 most strongly predicted targets of at least one of the 20 top-ranked ligands (the top 250 targets according to the general prior model, so not the top 250 targets for this dataset). Consequently, genes of your gene set that are not a top target gene of one of the prioritized ligands, will not be shown on the heatmap. +With the ligand activity analysis, we assess how well each CAF-ligand can predict the p-EMT gene set compared to the background of expressed genes. ```{r} -active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows() +ligand_activities <- predict_ligand_activities(geneset = geneset_oi, + background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix, + potential_ligands = potential_ligands) -nrow(active_ligand_target_links_df) -head(active_ligand_target_links_df) ``` -For visualization purposes, we adapted the ligand-target regulatory potential matrix as follows. Regulatory potential scores were set as 0 if their score was below a predefined threshold, which was here the 0.25 quantile of scores of interactions between the 30 top-ranked ligands and each of their respective top targets (see the ligand-target network defined in the data frame). +Ligands are ranked based on the area under the precision-recall curve (AUPR) between a ligand's target predictions and the observed transcriptional response. Although other metrics like the AUROC and pearson correlation coefficient are also computed, we demonstrated in our validation study that the AUPR was the most informative measure to define ligand activity (this was the Pearson correlation for v1). The vignette on how we performed the validation can be found at [Evaluation of NicheNet's ligand-target predictions](model_evaluation.md). ```{r} -active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25) +(ligand_activities <- ligand_activities %>% arrange(-aupr_corrected) %>% + mutate(rank = rank(desc(aupr_corrected)))) + +best_upstream_ligands <- ligand_activities %>% top_n(30, aupr_corrected) %>% + arrange(-aupr_corrected) %>% pull(test_ligand) -nrow(active_ligand_target_links_df) -head(active_ligand_target_links_df) +best_upstream_ligands ``` +We will use the top 30 ligands to predict active target genes and construct an active ligand-receptor network. -The putatively active ligand-target links will now be visualized in a heatmap. -The order of the ligands accord to the ranking according to the ligand activity prediction. +## 5. Infer target genes and receptors of top-ranked ligands -```{r, fig.width=8, fig.height=6} -order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() -order_targets = active_ligand_target_links_df$target %>% unique() -vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t() +Active target genes are defined as genes in the gene set of interest that have the highest regulatory potential for each top-ranked ligand. These top targets of each ligand are based on the prior model. Specifically, the function get_weighted_ligand_target_links will return genes that are in the gene set of interest and are the top `n` targets of a ligand (default: `n = 200`). -p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized CAF-ligands","p-EMT genes in malignant cells", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.005,0.01)) + theme(axis.text.x = element_text(face = "italic")) +```{r ligand-target-heatmap, fig.width=8, fig.height=6} +active_ligand_target_links_df <- best_upstream_ligands %>% + lapply(get_weighted_ligand_target_links, + geneset = geneset_oi, + ligand_target_matrix = ligand_target_matrix, + n = 200) %>% bind_rows() -p_ligand_target_network +active_ligand_target_links <- prepare_ligand_target_visualization( + ligand_target_df = active_ligand_target_links_df, + ligand_target_matrix = ligand_target_matrix, + cutoff = 0.25) -``` +order_ligands <- intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() +order_targets <- active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) -Note that the choice of these cutoffs for visualization is quite arbitrary. We recommend users to test several cutoff values. +vis_ligand_target <- t(active_ligand_target_links[order_targets,order_ligands]) -If you would consider more than the top 250 targets based on prior information, you will infer more, but less confident, ligand-target links; by considering less than 250 targets, you will be more stringent. +p_ligand_target_network <- make_heatmap_ggplot(vis_ligand_target, "Prioritized CAF-ligands", "p-EMT genes in malignant cells", + color = "purple", legend_title = "Regulatory potential") + + scale_fill_gradient2(low = "whitesmoke", high = "purple") -If you would change the quantile cutoff that is used to set scores to 0 (for visualization purposes), lowering this cutoff will result in a more dense heatmap, whereas highering this cutoff will result in a more sparse heatmap. +p_ligand_target_network -## Follow-up analysis 1: Ligand-receptor network inference for top-ranked ligands +``` -One type of follow-up analysis is looking at which receptors of the receiver cell population (here: malignant cells) can potentially bind to the prioritized ligands from the sender cell population (here: CAFs). +We can also look at which receptors of the receiver cell population (malignant cells) can potentially bind to the prioritized ligands from the sender cell population (CAFs). -So, we will now infer the predicted ligand-receptor interactions of the top-ranked ligands and visualize these in a heatmap. +```{r ligand-receptor-heatmap, fig.width=9, fig.height=6} -```{r} -# get the ligand-receptor network of the top-ranked ligands -lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) -best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() - -# get the weights of the ligand-receptor interactions as used in the NicheNet model -weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) -lr_network_top_df = weighted_networks$lr_sig %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) - -# convert to a matrix -lr_network_top_df = lr_network_top_df %>% spread("from","weight",fill = 0) -lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) - -# perform hierarchical clustering to order the ligands and receptors -dist_receptors = dist(lr_network_top_matrix, method = "binary") -hclust_receptors = hclust(dist_receptors, method = "ward.D2") -order_receptors = hclust_receptors$labels[hclust_receptors$order] - -dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") -hclust_ligands = hclust(dist_ligands, method = "ward.D2") -order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] -``` +ligand_receptor_links_df <- get_weighted_ligand_receptor_links( + best_upstream_ligands, expressed_receptors, + lr_network, weighted_networks$lr_sig) -Show a heatmap of the ligand-receptor interactions +vis_ligand_receptor_network <- prepare_ligand_receptor_visualization( + ligand_receptor_links_df, + best_upstream_ligands, + order_hclust = "both") -```{r, fig.width=9, fig.height=6} -vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] -p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Prioritized CAF-ligands","Receptors expressed by malignant cells", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") -p_ligand_receptor_network +(make_heatmap_ggplot(t(vis_ligand_receptor_network), + y_name = "Prioritized CAF-ligands", x_name = "Receptors expressed by malignant cells", + color = "mediumvioletred", legend_title = "Prior interaction potential")) ``` -## Follow-up analysis 2: Visualize expression of top-predicted ligands and their target genes in a combined heatmap -NicheNet only considers expressed ligands of sender cells, but does not take into account their expression for ranking the ligands. The ranking is purely based on the potential that a ligand might regulate the gene set of interest, given prior knowledge. Because it is also useful to further look into expression of ligands and their target genes, we demonstrate here how you could make a combined figure showing ligand activity, ligand expression, target gene expression and ligand-target regulatory potential. +## 6. Summary visualizations of the NicheNet analysis -#### Load additional packages required for the visualization: +### Load additional packages required for the visualization ```{r} library(RColorBrewer) @@ -270,16 +240,18 @@ library(cowplot) library(ggpubr) ``` -#### Prepare the ligand activity matrix +### Prepare the ligand activity matrix ```{r} -ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) - -vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") +vis_ligand_aupr <- ligand_activities %>% filter(test_ligand %in% best_upstream_ligands) %>% + column_to_rownames("test_ligand") %>% select(aupr_corrected) %>% arrange(aupr_corrected) %>% as.matrix(ncol = 1) ``` -```{r, fig.width=5, fig.height=6} -p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized CAF-ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") +```{r ligand-activity-heatmap, fig.width=5, fig.height=6} +p_ligand_aupr <- make_heatmap_ggplot(vis_ligand_aupr, + "Prioritized CAF-ligands", "Ligand activity", + color = "darkorange", legend_title = "AUPR") + + theme(axis.text.x.top = element_blank()) p_ligand_aupr ``` @@ -288,59 +260,85 @@ p_ligand_aupr Because the single-cell data was collected from multiple tumors, we will show here the average expression of the ligands per tumor. ```{r} -expression_df_CAF = expression[CAF_ids,order_ligands] %>% data.frame() %>% rownames_to_column("cell") %>% as_tibble() %>% inner_join(sample_info %>% select(cell,tumor), by = "cell") +expression_df_CAF <- expression[CAF_ids, best_upstream_ligands] %>% data.frame() %>% + rownames_to_column("cell") %>% as_tibble() %>% + inner_join(sample_info %>% select(cell,tumor), by = "cell") + +aggregated_expression_CAF <- expression_df_CAF %>% group_by(tumor) %>% + select(-cell) %>% summarise_all(mean) -aggregated_expression_CAF = expression_df_CAF %>% group_by(tumor) %>% select(-cell) %>% summarise_all(mean) +aggregated_expression_df_CAF <- aggregated_expression_CAF %>% select(-tumor) %>% t() %>% + magrittr::set_colnames(aggregated_expression_CAF$tumor) %>% + data.frame() %>% rownames_to_column("ligand") %>% as_tibble() -aggregated_expression_df_CAF = aggregated_expression_CAF %>% select(-tumor) %>% t() %>% magrittr::set_colnames(aggregated_expression_CAF$tumor) %>% data.frame() %>% rownames_to_column("ligand") %>% as_tibble() +aggregated_expression_matrix_CAF <- aggregated_expression_df_CAF %>% select(-ligand) %>% as.matrix() %>% + magrittr::set_rownames(aggregated_expression_df_CAF$ligand) -aggregated_expression_matrix_CAF = aggregated_expression_df_CAF %>% select(-ligand) %>% as.matrix() %>% magrittr::set_rownames(aggregated_expression_df_CAF$ligand) +# This order was determined based on the paper from Puram et al. Tumors are ordered according to p-EMT score. +order_tumors <- c("HN6","HN20","HN26","HN28","HN22","HN25","HN5","HN18","HN17","HN16") +vis_ligand_tumor_expression <- aggregated_expression_matrix_CAF[rev(best_upstream_ligands), order_tumors] -order_tumors = c("HN6","HN20","HN26","HN28","HN22","HN25","HN5","HN18","HN17","HN16") # this order was determined based on the paper from Puram et al. Tumors are ordered according to p-EMT score. -vis_ligand_tumor_expression = aggregated_expression_matrix_CAF[order_ligands,order_tumors] ``` -```{r, fig.width=9, fig.height=6} -library(RColorBrewer) -color = colorRampPalette(rev(brewer.pal(n = 7, name ="RdYlBu")))(100) -p_ligand_tumor_expression = vis_ligand_tumor_expression %>% make_heatmap_ggplot("Prioritized CAF-ligands","Tumor", color = color[100],legend_position = "top", x_axis_position = "top", legend_title = "Expression\n(averaged over\nsingle cells)") + theme(axis.text.y = element_text(face = "italic")) +```{r ligand-expression-heatmap, fig.width=9, fig.height=6} +color <- colorRampPalette(rev(brewer.pal(n = 7, name ="RdYlBu")))(100) +p_ligand_tumor_expression <- make_heatmap_ggplot(vis_ligand_tumor_expression, + "Prioritized CAF-ligands", "Tumor", + color = color[100], + legend_title = "Expression\n(averaged over\nsingle cells)") p_ligand_tumor_expression ``` #### Prepare expression of target genes in malignant cells per tumor ```{r} -expression_df_target = expression[malignant_ids,geneset_oi] %>% data.frame() %>% rownames_to_column("cell") %>% as_tibble() %>% inner_join(sample_info %>% select(cell,tumor), by = "cell") +expression_df_target <- expression[malignant_ids,geneset_oi] %>% data.frame() %>% + rownames_to_column("cell") %>% as_tibble() %>% + inner_join(sample_info %>% select(cell,tumor), by = "cell") -aggregated_expression_target = expression_df_target %>% group_by(tumor) %>% select(-cell) %>% summarise_all(mean) +aggregated_expression_target <- expression_df_target %>% group_by(tumor) %>% + select(-cell) %>% summarise_all(mean) -aggregated_expression_df_target = aggregated_expression_target %>% select(-tumor) %>% t() %>% magrittr::set_colnames(aggregated_expression_target$tumor) %>% data.frame() %>% rownames_to_column("target") %>% as_tibble() +aggregated_expression_df_target <- aggregated_expression_target %>% select(-tumor) %>% t() %>% + magrittr::set_colnames(aggregated_expression_target$tumor) %>% + data.frame() %>% rownames_to_column("target") %>% as_tibble() -aggregated_expression_matrix_target = aggregated_expression_df_target %>% select(-target) %>% as.matrix() %>% magrittr::set_rownames(aggregated_expression_df_target$target) +aggregated_expression_matrix_target <- aggregated_expression_df_target %>% select(-target) %>%as.matrix() %>% + magrittr::set_rownames(aggregated_expression_df_target$target) -vis_target_tumor_expression_scaled = aggregated_expression_matrix_target %>% t() %>% scale_quantile() %>% .[order_tumors,order_targets] +vis_target_tumor_expression_scaled <- aggregated_expression_matrix_target %>% t() %>% scale_quantile() %>% + .[order_tumors, order_targets] ``` -```{r, fig.width=9, fig.height=6} -p_target_tumor_scaled_expression = vis_target_tumor_expression_scaled %>% make_threecolor_heatmap_ggplot("Tumor","Target", low_color = color[1],mid_color = color[50], mid = 0.5, high_color = color[100], legend_position = "top", x_axis_position = "top" , legend_title = "Scaled expression\n(averaged over\nsingle cells)") + theme(axis.text.x = element_text(face = "italic")) +```{r target-expression-heatmap, fig.width=9, fig.height=6} +p_target_tumor_scaled_expression <- make_threecolor_heatmap_ggplot(vis_target_tumor_expression_scaled, + "Tumor", "Target", + low_color = color[1], mid_color = color[50], mid = 0.5, + high_color = color[100], + legend_title = "Scaled expression\n(averaged over\nsingle cells)") p_target_tumor_scaled_expression ``` #### Combine the different heatmaps in one overview figure -```{r, fig.width=13, fig.height=7} +```{r summary-vis, fig.width=15, fig.height=9} + figures_without_legend = plot_grid( - p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - p_ligand_tumor_expression + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), - p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), + p_ligand_aupr + theme(legend.position = "none"), + p_ligand_tumor_expression + theme(legend.position = "none", + axis.title.y = element_blank()), + p_ligand_target_network + theme(legend.position = "none", + axis.ticks = element_blank(), + axis.title.y = element_blank()), NULL, NULL, - p_target_tumor_scaled_expression + theme(legend.position = "none", axis.ticks = element_blank()) + xlab(""), + p_target_tumor_scaled_expression + theme(legend.position = "none", + axis.title.x = element_blank()), align = "hv", nrow = 2, - rel_widths = c(ncol(vis_ligand_aupr)+ 4.5, ncol(vis_ligand_tumor_expression), ncol(vis_ligand_target)) -2, - rel_heights = c(nrow(vis_ligand_aupr), nrow(vis_target_tumor_expression_scaled) + 3)) + rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_tumor_expression), ncol(vis_ligand_target))-2, + rel_heights = c(nrow(vis_ligand_aupr), nrow(vis_target_tumor_expression_scaled)+3)) legends = plot_grid( as_ggplot(get_legend(p_ligand_aupr)), @@ -357,12 +355,9 @@ plot_grid(figures_without_legend, ``` -## Other follow-up analyses: - -As another follow-up analysis, you can infer possible signaling paths between ligands and targets of interest. You can read how to do this in the following vignette [Inferring ligand-to-target signaling paths](ligand_target_signaling_path.md):`vignette("ligand_target_signaling_path", package="nichenetr")`. - -Another follow-up analysis is getting a "tangible" measure of how well top-ranked ligands predict the gene set of interest and assess which genes of the gene set can be predicted well. You can read how to do this in the following vignette [Assess how well top-ranked ligands can predict a gene set of interest](target_prediction_evaluation_geneset.md):`vignette("target_prediction_evaluation_geneset", package="nichenetr")`. +```{r} +sessionInfo() +``` -In case you want to visualize ligand-target links between multiple interacting cells, you can make an appealing circos plot as shown in vignette [Circos plot visualization to show active ligand-target links between interacting cells](circos.md):`vignette("circos", package="nichenetr")`. ## References diff --git a/vignettes/ligand_activity_geneset.md b/vignettes/ligand_activity_geneset.md index ef52c4f..d9260b4 100644 --- a/vignettes/ligand_activity_geneset.md +++ b/vignettes/ligand_activity_geneset.md @@ -8,88 +8,69 @@ Robin Browaeys rmarkdown::render("vignettes/ligand_activity_geneset.Rmd", output_format = "github_document") --> -In this vignette, you can learn how to perform a basic NicheNet -analysis. A NicheNet analysis can help you to generate hypotheses about -an intercellular communication process of interest for which you have -bulk or single-cell gene expression data. Specifically, NicheNet can -predict 1) which ligands from one cell population (“sender/niche”) are -most likely to affect target gene expression in an interacting cell -population (“receiver/target”) and 2) which specific target genes are -affected by which of these predicted ligands. - -Because NicheNet studies how ligands affect gene expression in -neighboring cells, you need to have data about this effect in gene -expression you want to study. So, you need to have a clear set of genes -that are putatively affected by ligands from one of more interacting -cells. - -The pipeline of a basic NicheNet analysis consist mainly of the -following steps: - -- 1. Define a “sender/niche” cell population and a “receiver/target” - cell population present in your expression data and determine - which genes are expressed in both populations - -- 2. Define a gene set of interest: these are the genes in the - “receiver/target” cell population that are potentially affected by - ligands expressed by interacting cells (e.g. genes differentially - expressed upon cell-cell interaction) - -- 3. Define a set of potential ligands: these are ligands that are - expressed by the “sender/niche” cell population and bind a - (putative) receptor expressed by the “receiver/target” population - -- 4) Perform NicheNet ligand activity analysis: rank the potential - ligands based on the presence of their target genes in the gene - set of interest (compared to the background set of genes) - -- 5) Infer top-predicted target genes of ligands that are top-ranked in - the ligand activity analysis - -This vignette guides you in detail through all these steps. As example -expression data of interacting cells, we will use data from Puram et -al. to explore intercellular communication in the tumor microenvironment -in head and neck squamous cell carcinoma (HNSCC) (See Puram et al. -2017). More specifically, we will look at which ligands expressed by -cancer-associated fibroblasts (CAFs) can induce a specific gene program -in neighboring malignant cells. This program, a partial -epithelial-mesenschymal transition (p-EMT) program, could be linked to -metastasis by Puram et al.  +This vignette follows the steps described in [Perform NicheNet analysis +starting from a Seurat object: step-by-step analysis](seurat_steps.md) +with two major differences: a predefined gene set of interest is given, +and a different definition of expressed genes. + +Here, we use explore intercellular communication in the tumor +microenvironment of head and neck squamous cell carcinoma (HNSCC) (Puram +et al. 2017). More specifically, we will look at which ligands expressed +by cancer-associated fibroblasts (CAFs) can induce a specific gene +program in neighboring malignant cells. The original authors of the +study have linked this partial epithelial-mesenschymal transition +(p-EMT) program to metastasis. The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and example [expression data](https://doi.org/10.5281/zenodo.3260758) of interacting cells can be downloaded from Zenodo. -## Step 0: Load required packages, NicheNet’s ligand-target prior model and processed expression data of interacting cells +# Prepare NicheNet analysis -Packages: +### Load packages ``` r library(nichenetr) library(tidyverse) ``` -Ligand-target model: +### Read in NicheNet’s networks -This model denotes the prior potential that a particular ligand might -regulate the expression of a specific target gene. In Nichenet v2, -networks and matrices for both mouse and human are made separately -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7074291.svg)](https://doi.org/10.5281/zenodo.7074291). +The ligand-target prior model, ligand-receptor network, and weighted +integrated networks are needed for this vignette. The ligand-target +prior model is a matrix describing the potential that a ligand may +regulate a target gene, and it is used to run the ligand activity +analysis. The ligand-receptor network contains information on potential +ligand-receptor bindings, and it is used to identify potential ligands. +Finally, the weighted ligand-receptor network contains weights +representing the potential that a ligand will bind to a receptor, and it +is used for visualization. ``` r -options(timeout = 600) -organism = "human" +organism <- "human" if(organism == "human"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) } else if(organism == "mouse"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) } -lr_network = lr_network %>% distinct(from, to) +lr_network <- lr_network %>% distinct(from, to) +head(lr_network) +## # A tibble: 6 × 2 +## from to +## +## 1 A2M MMP2 +## 2 A2M MMP9 +## 3 A2M LRP1 +## 4 A2M KLK3 +## 5 AANAT MTNR1A +## 6 AANAT MTNR1B ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ## A2M AANAT ABCA1 ACE ACE2 ## A-GAMMA3'E 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 @@ -97,340 +78,265 @@ ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ## A1BG-AS1 0.0007400797 0.0004677614 0.0005193137 0.0007836698 0.000375007 ## A1CF 0.0024799266 0.0013026348 0.0020420890 0.0047921048 0.003273375 ## A2M 0.0084693452 0.0040689323 0.0064256379 0.0105191365 0.005719199 + +head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network +## # A tibble: 6 × 3 +## from to weight +## +## 1 A-GAMMA3'E ACTG1P11 0.100 +## 2 A-GAMMA3'E AXIN2 0.0869 +## 3 A-GAMMA3'E BUB1B-PAK6 0.0932 +## 4 A-GAMMA3'E CEACAM7 0.0793 +## 5 A-GAMMA3'E CHRNA1 0.0901 +## 6 A-GAMMA3'E DTX2P1 0.0976 +head(weighted_networks$gr) # interactions and their weights in the gene regulatory network +## # A tibble: 6 × 3 +## from to weight +## +## 1 A1BG A2M 0.165 +## 2 AAAS GFAP 0.0906 +## 3 AADAC CTAG1B 0.104 +## 4 AADAC CYP3A4 0.177 +## 5 AADAC DIRAS3 0.0936 +## 6 AADAC IRF8 0.0892 ``` -Expression data of interacting cells: publicly available single-cell -data from CAF and malignant cells from HNSCC tumors: +### Read in the expression data of interacting cells + +This is publicly available single-cell data from CAF and malignant cells +from HNSCC tumors. ``` r -hnscc_expression = readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) -expression = hnscc_expression$expression -sample_info = hnscc_expression$sample_info # contains meta-information about the cells +hnscc_expression <- readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) +expression <- hnscc_expression$expression +sample_info <- hnscc_expression$sample_info # contains meta-information about the cells ``` -Because the NicheNet 2.0. networks are in the most recent version of the +Because the NicheNet v2 networks are in the most recent version of the official gene symbols, we will make sure that the gene symbols used in the expression data are also updated (= converted from their “aliases” to official gene symbols). ``` r # If this is not done, there will be 35 genes fewer in lr_network_expressed! -colnames(expression) = convert_alias_to_symbols(colnames(expression), "human", verbose = FALSE) +colnames(expression) <- convert_alias_to_symbols(colnames(expression), "human", verbose = FALSE) ``` -## Step 1: Define expressed genes in sender and receiver cell populations +## 1. Define a set of potential ligands Our research question is to prioritize which ligands expressed by CAFs -can induce p-EMT in neighboring malignant cells. Therefore, CAFs are the -sender cells in this example and malignant cells are the receiver cells. -This is an example of paracrine signaling. Note that autocrine signaling -can be considered if sender and receiver cell type are the same. - -Now, we will determine which genes are expressed in the sender cells -(CAFs) and receiver cells (malignant cells) from high quality primary -tumors. Therefore, we wil not consider cells from tumor samples of less -quality or from lymph node metastases. - -To determine expressed genes in this case study, we use the definition -used by Puram et al. (the authors of this dataset), which is: Ea, the -aggregate expression of each gene i across the k cells, calculated as -Ea(i) = log2(average(TPM(i)1…k)+1), should be \>= 4. We recommend users -to define expressed genes in the way that they consider to be most -appropriate for their dataset. For single-cell data generated by the 10x -platform in our lab, we don’t use the definition used here, but we -consider genes to be expressed in a cell type when they have non-zero -values in at least 10% of the cells from that cell type. This is -described as well in the other vignette [Perform NicheNet analysis -starting from a Seurat object: step-by-step -analysis](seurat_steps.md):`vignette("seurat_steps", package="nichenetr")`. +can induce p-EMT in neighboring malignant cells. Hence, we will only use +on the **sender-focused** approach, with CAFs as senders and malignant +cells as receivers. + +The set of potential ligands is defined as ligands that are expressed in +sender cells whose cognate receptors are also expressed in receiver +cells. + +So first, we will determine which genes are expressed in the sender +cells (CAFs) and receiver cells (malignant cells). We will only consider +samples from high quality primary tumors and also remove samples from +lymph node metastases. We will use the definition of expressed genes by +the original authors, that is, the aggregate expression of each gene $i$ +across the $k$ cells, calculated as +$E_a(i) = log_{2}(average(TPM(i)1…k)+1)$, should be \>= 4. + +We recommend users to define expressed genes in the way that they +consider to be most appropriate for their dataset. For single-cell data +generated by the 10x platform in our lab, we consider genes to be +expressed in a cell type when they have non-zero values in a certain +fraction of the cells from that cell type (usually 10%). This is used in +the vignette [Perform NicheNet analysis starting from a Seurat object: +step-by-step analysis](seurat_steps.md). ``` r -tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") +tumors_remove <- c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") + +CAF_ids <- sample_info %>% + filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & + `non-cancer cell type` == "CAF") %>% pull(cell) +malignant_ids <- sample_info %>% filter(`Lymph node` == 0 & + !(tumor %in% tumors_remove) & + `classified as cancer cell` == 1) %>% pull(cell) -CAF_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `non-cancer cell type` == "CAF") %>% pull(cell) -malignant_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `classified as cancer cell` == 1) %>% pull(cell) +expressed_genes_sender <- expression[CAF_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% + names() -expressed_genes_sender = expression[CAF_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() -expressed_genes_receiver = expression[malignant_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() +expressed_genes_receiver <- expression[malignant_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% + names() -# Check the number of expressed genes: should be a 'reasonable' number of total expressed genes in a cell type, e.g. between 5000-10000 (and not 500 or 20000) length(expressed_genes_sender) ## [1] 6706 length(expressed_genes_receiver) ## [1] 6351 ``` -## Step 2: Define the gene set of interest and a background of genes - -As gene set of interest, we consider the genes of which the expression -is possibly affected due to communication with other cells. The -definition of this gene set depends on your research question and is a -crucial step in the use of NicheNet. - -Because we here want to investigate how CAFs regulate the expression of -p-EMT genes in malignant cells, we will use the p-EMT gene set defined -by Puram et al. as gene set of interest and use all genes expressed in -malignant cells as background of genes. +Now, we can filter the expressed ligands and receptors to only those +that putatively bind together. This information is stored in NicheNet’s +ligand-receptor network by gathering various data sources. ``` r -geneset_oi = readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), col_names = "gene") %>% pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] # only consider genes also present in the NicheNet model - this excludes genes from the gene list for which the official HGNC symbol was not used by Puram et al. -head(geneset_oi) -## [1] "SERPINE1" "TGFBI" "MMP10" "LAMC2" "P4HA2" "PDPN" +ligands <- lr_network %>% pull(from) %>% unique() +expressed_ligands <- intersect(ligands,expressed_genes_sender) -background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] -head(background_expressed_genes) -## [1] "RPS11" "ELMO2" "PNMA1" "MMP2" "TMEM216" "ERCC5" -``` - -## Step 3: Define a set of potential ligands +receptors <- lr_network %>% pull(to) %>% unique() +expressed_receptors <- intersect(receptors,expressed_genes_receiver) -As potentially active ligands, we will use ligands that are 1) expressed -by CAFs and 2) can bind a (putative) receptor expressed by malignant -cells. Putative ligand-receptor links were gathered from NicheNet’s -ligand-receptor data sources. - -``` r -# If wanted, users can remove ligand-receptor interactions that were predicted based on protein-protein interactions and only keep ligand-receptor interactions that are described in curated databases. To do this: uncomment following line of code: -# lr_network = lr_network %>% filter(database != "ppi_prediction_go" & database != "ppi_prediction") +potential_ligands <- lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% + pull(from) %>% unique() -ligands = lr_network %>% pull(from) %>% unique() -expressed_ligands = intersect(ligands,expressed_genes_sender) - -receptors = lr_network %>% pull(to) %>% unique() -expressed_receptors = intersect(receptors,expressed_genes_receiver) - -lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) -head(lr_network_expressed) -## # A tibble: 6 × 2 -## from to -## -## 1 A2M MMP2 -## 2 A2M MMP9 -## 3 ADAM10 APP -## 4 ADAM10 CD44 -## 5 ADAM10 TSPAN5 -## 6 ADAM10 TSPAN15 -``` - -This ligand-receptor network contains the expressed ligand-receptor -interactions. As potentially active ligands for the NicheNet analysis, -we will consider the ligands from this network. - -``` r -potential_ligands = lr_network_expressed %>% pull(from) %>% unique() head(potential_ligands) ## [1] "A2M" "ADAM10" "ADAM12" "ADAM15" "ADAM17" "ADAM9" ``` -## Step 4: Perform NicheNet’s ligand activity analysis on the gene set of interest +## 2. Define the gene set of interest and a background of genes -Now perform the ligand activity analysis: in this analysis, we will -calculate the ligand activity of each ligand, or in other words, we will -assess how well each CAF-ligand can predict the p-EMT gene set compared -to the background of expressed genes (predict whether a gene belongs to -the p-EMT program or not). +The gene set of interest consists of genes for which the expression is +possibly affected due to communication with other cells. The definition +of this gene set depends on your research question and is a crucial step +in the use of NicheNet. -``` r -ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -``` - -Now, we want to rank the ligands based on their ligand activity. In our -validation study, we showed that the area under the precision-recall -curve (AUPR) between a ligand’s target predictions and the observed -transcriptional response was the most informative measure to define -ligand activity (this was the Pearson correlation for v1). Therefore, we -will rank the ligands based on their AUPR. This allows us to prioritize -p-EMT-regulating ligands. +Here, we will use the p-EMT gene set defined by the original authors as +gene set of interest to investigate how CAFs can induce p-EMT in +malignant cells. ``` r -ligand_activities %>% arrange(-aupr_corrected) -## # A tibble: 212 × 5 -## test_ligand auroc aupr aupr_corrected pearson -## -## 1 TGFB2 0.772 0.120 0.105 0.195 -## 2 BMP8A 0.774 0.0852 0.0699 0.175 -## 3 INHBA 0.777 0.0837 0.0685 0.122 -## 4 CXCL12 0.714 0.0829 0.0676 0.141 -## 5 LTBP1 0.727 0.0762 0.0609 0.160 -## 6 CCN2 0.736 0.0734 0.0581 0.141 -## 7 TNXB 0.719 0.0717 0.0564 0.157 -## 8 ENG 0.764 0.0703 0.0551 0.145 -## 9 BMP5 0.750 0.0691 0.0538 0.148 -## 10 VCAN 0.720 0.0687 0.0534 0.140 -## # … with 202 more rows -best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) -head(best_upstream_ligands) -## [1] "TGFB2" "BMP8A" "INHBA" "CXCL12" "LTBP1" "CCN2" +# Only consider genes also present in the NicheNet model - this excludes genes from the gene list for which the official HGNC symbol was not used by Puram et al. +geneset_oi <- readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), + col_names = "gene") %>% + pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] + +length(geneset_oi) +## [1] 96 ``` -We see here that the performance metrics indicate that the 30 top-ranked -ligands can predict the p-EMT genes reasonably, this implies that -ranking of the ligands might be accurate as shown in our study. However, -it is possible that for some gene sets, the target gene prediction -performance of the top-ranked ligands would not be much better than -random prediction. In that case, prioritization of ligands will be less -trustworthy. - -Additional note: we looked at the top 30 ligands here and will continue -the analysis by inferring p-EMT target genes of these 30 ligands. -However, the choice of looking only at the 30 top-ranked ligands for -further biological interpretation is based on biological intuition and -is quite arbitrary. Therefore, users can decide to continue the analysis -with a different number of ligands. We recommend to check the selected -cutoff by looking at the distribution of the ligand activity values. -Here, we show the ligand activity histogram (the score for the 30th -ligand is indicated via the dashed line). +## 3. Define background genes + +We will all genes expressed in malignant cells as the background set. ``` r -# show histogram of ligand activity scores -p_hist_lig_activity = ggplot(ligand_activities, aes(x=aupr_corrected)) + - geom_histogram(color="black", fill="darkorange") + - # geom_density(alpha=.1, fill="orange") + - geom_vline(aes(xintercept=min(ligand_activities %>% top_n(30, aupr_corrected) %>% pull(aupr_corrected))), color="red", linetype="dashed", size=1) + - labs(x="ligand activity (PCC)", y = "# ligands") + - theme_classic() -p_hist_lig_activity -``` +background_expressed_genes <- expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-11-1.png) +length(background_expressed_genes) +## [1] 6288 +``` -## Step 5: Infer target genes of top-ranked ligands and visualize in a heatmap +## 4. Perform NicheNet ligand activity analysis -Now we will show how you can look at the regulatory potential scores -between ligands and target genes of interest. In this case, we will look -at links between top-ranked p-EMT regulating ligands and p-EMT genes. In -the ligand-target heatmaps, we show here regulatory potential scores for -interactions between the 20 top-ranked ligands and following target -genes: genes that belong to the gene set of interest and to the 250 most -strongly predicted targets of at least one of the 20 top-ranked ligands -(the top 250 targets according to the general prior model, so not the -top 250 targets for this dataset). Consequently, genes of your gene set -that are not a top target gene of one of the prioritized ligands, will -not be shown on the heatmap. +With the ligand activity analysis, we assess how well each CAF-ligand +can predict the p-EMT gene set compared to the background of expressed +genes. ``` r -active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows() - -nrow(active_ligand_target_links_df) -## [1] 460 -head(active_ligand_target_links_df) -## # A tibble: 6 × 3 -## ligand target weight -## -## 1 TGFB2 ACTN1 0.0849 -## 2 TGFB2 C1S 0.124 -## 3 TGFB2 COL17A1 0.0732 -## 4 TGFB2 COL1A1 0.243 -## 5 TGFB2 COL4A2 0.148 -## 6 TGFB2 F3 0.0747 +ligand_activities <- predict_ligand_activities(geneset = geneset_oi, + background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix, + potential_ligands = potential_ligands) ``` -For visualization purposes, we adapted the ligand-target regulatory -potential matrix as follows. Regulatory potential scores were set as 0 -if their score was below a predefined threshold, which was here the 0.25 -quantile of scores of interactions between the 30 top-ranked ligands and -each of their respective top targets (see the ligand-target network -defined in the data frame). +Ligands are ranked based on the area under the precision-recall curve +(AUPR) between a ligand’s target predictions and the observed +transcriptional response. Although other metrics like the AUROC and +pearson correlation coefficient are also computed, we demonstrated in +our validation study that the AUPR was the most informative measure to +define ligand activity (this was the Pearson correlation for v1). The +vignette on how we performed the validation can be found at [Evaluation +of NicheNet’s ligand-target predictions](model_evaluation.md). ``` r -active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25) - -nrow(active_ligand_target_links_df) -## [1] 460 -head(active_ligand_target_links_df) -## # A tibble: 6 × 3 -## ligand target weight -## -## 1 TGFB2 ACTN1 0.0849 -## 2 TGFB2 C1S 0.124 -## 3 TGFB2 COL17A1 0.0732 -## 4 TGFB2 COL1A1 0.243 -## 5 TGFB2 COL4A2 0.148 -## 6 TGFB2 F3 0.0747 +(ligand_activities <- ligand_activities %>% arrange(-aupr_corrected) %>% + mutate(rank = rank(desc(aupr_corrected)))) +## # A tibble: 212 × 6 +## test_ligand auroc aupr aupr_corrected pearson rank +## +## 1 TGFB2 0.772 0.120 0.105 0.195 1 +## 2 BMP8A 0.774 0.0852 0.0699 0.175 2 +## 3 INHBA 0.777 0.0837 0.0685 0.122 3 +## 4 CXCL12 0.714 0.0829 0.0676 0.141 4 +## 5 LTBP1 0.727 0.0762 0.0609 0.160 5 +## 6 CCN2 0.736 0.0734 0.0581 0.141 6 +## 7 TNXB 0.719 0.0717 0.0564 0.157 7 +## 8 ENG 0.764 0.0703 0.0551 0.145 8 +## 9 BMP5 0.750 0.0691 0.0538 0.148 9 +## 10 VCAN 0.720 0.0687 0.0534 0.140 10 +## # ℹ 202 more rows + +best_upstream_ligands <- ligand_activities %>% top_n(30, aupr_corrected) %>% + arrange(-aupr_corrected) %>% pull(test_ligand) + +best_upstream_ligands +## [1] "TGFB2" "BMP8A" "INHBA" "CXCL12" "LTBP1" "CCN2" "TNXB" "ENG" "BMP5" "VCAN" "COMP" "CCN3" "COL3A1" "MMP14" +## [15] "COL4A1" "HGF" "TIMP2" "FBN1" "MMP2" "IBSP" "VCAM1" "CFH" "BMP4" "FN1" "B2M" "COL1A2" "GDF10" "COL14A1" +## [29] "CD47" "COL18A1" ``` -The putatively active ligand-target links will now be visualized in a -heatmap. The order of the ligands accord to the ranking according to the -ligand activity prediction. +We will use the top 30 ligands to predict active target genes and +construct an active ligand-receptor network. -``` r -order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() -order_targets = active_ligand_target_links_df$target %>% unique() -vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t() +## 5. Infer target genes and receptors of top-ranked ligands -p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized CAF-ligands","p-EMT genes in malignant cells", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.005,0.01)) + theme(axis.text.x = element_text(face = "italic")) +Active target genes are defined as genes in the gene set of interest +that have the highest regulatory potential for each top-ranked ligand. +These top targets of each ligand are based on the prior model. +Specifically, the function get_weighted_ligand_target_links will return +genes that are in the gene set of interest and are the top `n` targets +of a ligand (default: `n = 200`). -p_ligand_target_network -``` +``` r +active_ligand_target_links_df <- best_upstream_ligands %>% + lapply(get_weighted_ligand_target_links, + geneset = geneset_oi, + ligand_target_matrix = ligand_target_matrix, + n = 200) %>% bind_rows() -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-14-1.png) +active_ligand_target_links <- prepare_ligand_target_visualization( + ligand_target_df = active_ligand_target_links_df, + ligand_target_matrix = ligand_target_matrix, + cutoff = 0.25) -Note that the choice of these cutoffs for visualization is quite -arbitrary. We recommend users to test several cutoff values. +order_ligands <- intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() +order_targets <- active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) -If you would consider more than the top 250 targets based on prior -information, you will infer more, but less confident, ligand-target -links; by considering less than 250 targets, you will be more stringent. +vis_ligand_target <- t(active_ligand_target_links[order_targets,order_ligands]) -If you would change the quantile cutoff that is used to set scores to 0 -(for visualization purposes), lowering this cutoff will result in a more -dense heatmap, whereas highering this cutoff will result in a more -sparse heatmap. +p_ligand_target_network <- make_heatmap_ggplot(vis_ligand_target, "Prioritized CAF-ligands", "p-EMT genes in malignant cells", + color = "purple", legend_title = "Regulatory potential") + + scale_fill_gradient2(low = "whitesmoke", high = "purple") -## Follow-up analysis 1: Ligand-receptor network inference for top-ranked ligands +p_ligand_target_network +``` -One type of follow-up analysis is looking at which receptors of the -receiver cell population (here: malignant cells) can potentially bind to -the prioritized ligands from the sender cell population (here: CAFs). +![](ligand_activity_geneset_files/figure-gfm/ligand-target-heatmap-1.png) -So, we will now infer the predicted ligand-receptor interactions of the -top-ranked ligands and visualize these in a heatmap. +We can also look at which receptors of the receiver cell population +(malignant cells) can potentially bind to the prioritized ligands from +the sender cell population (CAFs). ``` r -# get the ligand-receptor network of the top-ranked ligands -lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) -best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() - -# get the weights of the ligand-receptor interactions as used in the NicheNet model -weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) -lr_network_top_df = weighted_networks$lr_sig %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) - -# convert to a matrix -lr_network_top_df = lr_network_top_df %>% spread("from","weight",fill = 0) -lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) - -# perform hierarchical clustering to order the ligands and receptors -dist_receptors = dist(lr_network_top_matrix, method = "binary") -hclust_receptors = hclust(dist_receptors, method = "ward.D2") -order_receptors = hclust_receptors$labels[hclust_receptors$order] - -dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") -hclust_ligands = hclust(dist_ligands, method = "ward.D2") -order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] -``` -Show a heatmap of the ligand-receptor interactions +ligand_receptor_links_df <- get_weighted_ligand_receptor_links( + best_upstream_ligands, expressed_receptors, + lr_network, weighted_networks$lr_sig) -``` r -vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] -p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Prioritized CAF-ligands","Receptors expressed by malignant cells", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") -p_ligand_receptor_network -``` +vis_ligand_receptor_network <- prepare_ligand_receptor_visualization( + ligand_receptor_links_df, + best_upstream_ligands, + order_hclust = "both") -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-16-1.png) +(make_heatmap_ggplot(t(vis_ligand_receptor_network), + y_name = "Prioritized CAF-ligands", x_name = "Receptors expressed by malignant cells", + color = "mediumvioletred", legend_title = "Prior interaction potential")) +``` -## Follow-up analysis 2: Visualize expression of top-predicted ligands and their target genes in a combined heatmap +![](ligand_activity_geneset_files/figure-gfm/ligand-receptor-heatmap-1.png) -NicheNet only considers expressed ligands of sender cells, but does not -take into account their expression for ranking the ligands. The ranking -is purely based on the potential that a ligand might regulate the gene -set of interest, given prior knowledge. Because it is also useful to -further look into expression of ligands and their target genes, we -demonstrate here how you could make a combined figure showing ligand -activity, ligand expression, target gene expression and ligand-target -regulatory potential. +## 6. Summary visualizations of the NicheNet analysis -#### Load additional packages required for the visualization: +### Load additional packages required for the visualization ``` r library(RColorBrewer) @@ -438,20 +344,22 @@ library(cowplot) library(ggpubr) ``` -#### Prepare the ligand activity matrix +### Prepare the ligand activity matrix ``` r -ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) - -vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") +vis_ligand_aupr <- ligand_activities %>% filter(test_ligand %in% best_upstream_ligands) %>% + column_to_rownames("test_ligand") %>% select(aupr_corrected) %>% arrange(aupr_corrected) %>% as.matrix(ncol = 1) ``` ``` r -p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized CAF-ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") +p_ligand_aupr <- make_heatmap_ggplot(vis_ligand_aupr, + "Prioritized CAF-ligands", "Ligand activity", + color = "darkorange", legend_title = "AUPR") + + theme(axis.text.x.top = element_blank()) p_ligand_aupr ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-19-1.png) +![](ligand_activity_geneset_files/figure-gfm/ligand-activity-heatmap-1.png) #### Prepare expression of ligands in fibroblast per tumor @@ -459,62 +367,87 @@ Because the single-cell data was collected from multiple tumors, we will show here the average expression of the ligands per tumor. ``` r -expression_df_CAF = expression[CAF_ids,order_ligands] %>% data.frame() %>% rownames_to_column("cell") %>% as_tibble() %>% inner_join(sample_info %>% select(cell,tumor), by = "cell") +expression_df_CAF <- expression[CAF_ids, best_upstream_ligands] %>% data.frame() %>% + rownames_to_column("cell") %>% as_tibble() %>% + inner_join(sample_info %>% select(cell,tumor), by = "cell") -aggregated_expression_CAF = expression_df_CAF %>% group_by(tumor) %>% select(-cell) %>% summarise_all(mean) +aggregated_expression_CAF <- expression_df_CAF %>% group_by(tumor) %>% + select(-cell) %>% summarise_all(mean) -aggregated_expression_df_CAF = aggregated_expression_CAF %>% select(-tumor) %>% t() %>% magrittr::set_colnames(aggregated_expression_CAF$tumor) %>% data.frame() %>% rownames_to_column("ligand") %>% as_tibble() +aggregated_expression_df_CAF <- aggregated_expression_CAF %>% select(-tumor) %>% t() %>% + magrittr::set_colnames(aggregated_expression_CAF$tumor) %>% + data.frame() %>% rownames_to_column("ligand") %>% as_tibble() -aggregated_expression_matrix_CAF = aggregated_expression_df_CAF %>% select(-ligand) %>% as.matrix() %>% magrittr::set_rownames(aggregated_expression_df_CAF$ligand) +aggregated_expression_matrix_CAF <- aggregated_expression_df_CAF %>% select(-ligand) %>% as.matrix() %>% + magrittr::set_rownames(aggregated_expression_df_CAF$ligand) -order_tumors = c("HN6","HN20","HN26","HN28","HN22","HN25","HN5","HN18","HN17","HN16") # this order was determined based on the paper from Puram et al. Tumors are ordered according to p-EMT score. -vis_ligand_tumor_expression = aggregated_expression_matrix_CAF[order_ligands,order_tumors] +# This order was determined based on the paper from Puram et al. Tumors are ordered according to p-EMT score. +order_tumors <- c("HN6","HN20","HN26","HN28","HN22","HN25","HN5","HN18","HN17","HN16") +vis_ligand_tumor_expression <- aggregated_expression_matrix_CAF[rev(best_upstream_ligands), order_tumors] ``` ``` r -library(RColorBrewer) -color = colorRampPalette(rev(brewer.pal(n = 7, name ="RdYlBu")))(100) -p_ligand_tumor_expression = vis_ligand_tumor_expression %>% make_heatmap_ggplot("Prioritized CAF-ligands","Tumor", color = color[100],legend_position = "top", x_axis_position = "top", legend_title = "Expression\n(averaged over\nsingle cells)") + theme(axis.text.y = element_text(face = "italic")) +color <- colorRampPalette(rev(brewer.pal(n = 7, name ="RdYlBu")))(100) +p_ligand_tumor_expression <- make_heatmap_ggplot(vis_ligand_tumor_expression, + "Prioritized CAF-ligands", "Tumor", + color = color[100], + legend_title = "Expression\n(averaged over\nsingle cells)") p_ligand_tumor_expression ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-21-1.png) +![](ligand_activity_geneset_files/figure-gfm/ligand-expression-heatmap-1.png) #### Prepare expression of target genes in malignant cells per tumor ``` r -expression_df_target = expression[malignant_ids,geneset_oi] %>% data.frame() %>% rownames_to_column("cell") %>% as_tibble() %>% inner_join(sample_info %>% select(cell,tumor), by = "cell") +expression_df_target <- expression[malignant_ids,geneset_oi] %>% data.frame() %>% + rownames_to_column("cell") %>% as_tibble() %>% + inner_join(sample_info %>% select(cell,tumor), by = "cell") -aggregated_expression_target = expression_df_target %>% group_by(tumor) %>% select(-cell) %>% summarise_all(mean) +aggregated_expression_target <- expression_df_target %>% group_by(tumor) %>% + select(-cell) %>% summarise_all(mean) -aggregated_expression_df_target = aggregated_expression_target %>% select(-tumor) %>% t() %>% magrittr::set_colnames(aggregated_expression_target$tumor) %>% data.frame() %>% rownames_to_column("target") %>% as_tibble() +aggregated_expression_df_target <- aggregated_expression_target %>% select(-tumor) %>% t() %>% + magrittr::set_colnames(aggregated_expression_target$tumor) %>% + data.frame() %>% rownames_to_column("target") %>% as_tibble() -aggregated_expression_matrix_target = aggregated_expression_df_target %>% select(-target) %>% as.matrix() %>% magrittr::set_rownames(aggregated_expression_df_target$target) +aggregated_expression_matrix_target <- aggregated_expression_df_target %>% select(-target) %>%as.matrix() %>% + magrittr::set_rownames(aggregated_expression_df_target$target) -vis_target_tumor_expression_scaled = aggregated_expression_matrix_target %>% t() %>% scale_quantile() %>% .[order_tumors,order_targets] +vis_target_tumor_expression_scaled <- aggregated_expression_matrix_target %>% t() %>% scale_quantile() %>% + .[order_tumors, order_targets] ``` ``` r -p_target_tumor_scaled_expression = vis_target_tumor_expression_scaled %>% make_threecolor_heatmap_ggplot("Tumor","Target", low_color = color[1],mid_color = color[50], mid = 0.5, high_color = color[100], legend_position = "top", x_axis_position = "top" , legend_title = "Scaled expression\n(averaged over\nsingle cells)") + theme(axis.text.x = element_text(face = "italic")) +p_target_tumor_scaled_expression <- make_threecolor_heatmap_ggplot(vis_target_tumor_expression_scaled, + "Tumor", "Target", + low_color = color[1], mid_color = color[50], mid = 0.5, + high_color = color[100], + legend_title = "Scaled expression\n(averaged over\nsingle cells)") p_target_tumor_scaled_expression ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png) +![](ligand_activity_geneset_files/figure-gfm/target-expression-heatmap-1.png) #### Combine the different heatmaps in one overview figure ``` r + figures_without_legend = plot_grid( - p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - p_ligand_tumor_expression + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), - p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), + p_ligand_aupr + theme(legend.position = "none"), + p_ligand_tumor_expression + theme(legend.position = "none", + axis.title.y = element_blank()), + p_ligand_target_network + theme(legend.position = "none", + axis.ticks = element_blank(), + axis.title.y = element_blank()), NULL, NULL, - p_target_tumor_scaled_expression + theme(legend.position = "none", axis.ticks = element_blank()) + xlab(""), + p_target_tumor_scaled_expression + theme(legend.position = "none", + axis.title.x = element_blank()), align = "hv", nrow = 2, - rel_widths = c(ncol(vis_ligand_aupr)+ 4.5, ncol(vis_ligand_tumor_expression), ncol(vis_ligand_target)) -2, - rel_heights = c(nrow(vis_ligand_aupr), nrow(vis_target_tumor_expression_scaled) + 3)) + rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_tumor_expression), ncol(vis_ligand_target))-2, + rel_heights = c(nrow(vis_ligand_aupr), nrow(vis_target_tumor_expression_scaled)+3)) legends = plot_grid( as_ggplot(get_legend(p_ligand_aupr)), @@ -529,27 +462,64 @@ plot_grid(figures_without_legend, rel_heights = c(10,2), nrow = 2, align = "hv") ``` -![](ligand_activity_geneset_files/figure-gfm/unnamed-chunk-24-1.png) - -## Other follow-up analyses: - -As another follow-up analysis, you can infer possible signaling paths -between ligands and targets of interest. You can read how to do this in -the following vignette [Inferring ligand-to-target signaling -paths](ligand_target_signaling_path.md):`vignette("ligand_target_signaling_path", package="nichenetr")`. +![](ligand_activity_geneset_files/figure-gfm/summary-vis-1.png) -Another follow-up analysis is getting a “tangible” measure of how well -top-ranked ligands predict the gene set of interest and assess which -genes of the gene set can be predicted well. You can read how to do this -in the following vignette [Assess how well top-ranked ligands can -predict a gene set of -interest](target_prediction_evaluation_geneset.md):`vignette("target_prediction_evaluation_geneset", package="nichenetr")`. - -In case you want to visualize ligand-target links between multiple -interacting cells, you can make an appealing circos plot as shown in -vignette [Circos plot visualization to show active ligand-target links -between interacting -cells](circos.md):`vignette("circos", package="nichenetr")`. +``` r +sessionInfo() +## R version 4.3.2 (2023-10-31) +## Platform: x86_64-redhat-linux-gnu (64-bit) +## Running under: CentOS Stream 8 +## +## Matrix products: default +## BLAS/LAPACK: /usr/lib64/libopenblaso-r0.3.15.so; LAPACK version 3.9.0 +## +## locale: +## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 +## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C +## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C +## +## time zone: Asia/Bangkok +## tzcode source: system (glibc) +## +## attached base packages: +## [1] stats graphics grDevices utils datasets methods base +## +## other attached packages: +## [1] ggpubr_0.6.0 cowplot_1.1.2 RColorBrewer_1.1-3 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.4 purrr_1.0.2 +## [8] readr_2.1.2 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.4 tidyverse_1.3.1 nichenetr_2.0.4 +## +## loaded via a namespace (and not attached): +## [1] fs_1.6.3 matrixStats_1.2.0 spatstat.sparse_3.0-3 bitops_1.0-7 lubridate_1.9.3 httr_1.4.7 +## [7] doParallel_1.0.17 tools_4.3.2 sctransform_0.4.0 backports_1.4.1 utf8_1.2.4 R6_2.5.1 +## [13] lazyeval_0.2.2 uwot_0.1.16 GetoptLong_1.0.5 withr_2.5.2 sp_2.1-2 gridExtra_2.3 +## [19] fdrtool_1.2.17 progressr_0.14.0 cli_3.6.2 spatstat.explore_3.2-1 labeling_0.4.3 Seurat_4.4.0 +## [25] spatstat.data_3.0-3 randomForest_4.7-1.1 proxy_0.4-27 ggridges_0.5.5 pbapply_1.7-2 foreign_0.8-85 +## [31] parallelly_1.36.0 limma_3.56.2 readxl_1.4.3 rstudioapi_0.15.0 visNetwork_2.1.2 generics_0.1.3 +## [37] shape_1.4.6 ica_1.0-3 spatstat.random_3.2-2 vroom_1.6.5 car_3.1-2 Matrix_1.6-4 +## [43] fansi_1.0.6 S4Vectors_0.38.1 abind_1.4-5 lifecycle_1.0.4 yaml_2.3.8 carData_3.0-5 +## [49] recipes_1.0.7 Rtsne_0.17 grid_4.3.2 promises_1.2.1 crayon_1.5.2 miniUI_0.1.1.1 +## [55] lattice_0.21-9 haven_2.4.3 pillar_1.9.0 knitr_1.45 ComplexHeatmap_2.16.0 rjson_0.2.21 +## [61] future.apply_1.11.0 codetools_0.2-19 leiden_0.3.9 glue_1.6.2 data.table_1.14.10 vctrs_0.6.5 +## [67] png_0.1-8 spam_2.10-0 cellranger_1.1.0 gtable_0.3.4 assertthat_0.2.1 gower_1.0.1 +## [73] xfun_0.41 mime_0.12 prodlim_2023.08.28 survival_3.5-7 timeDate_4032.109 iterators_1.0.14 +## [79] hardhat_1.3.0 lava_1.7.3 DiagrammeR_1.0.10 ellipsis_0.3.2 fitdistrplus_1.1-11 ROCR_1.0-11 +## [85] ipred_0.9-14 nlme_3.1-163 bit64_4.0.5 RcppAnnoy_0.0.21 irlba_2.3.5.1 KernSmooth_2.23-22 +## [91] rpart_4.1.21 colorspace_2.1-0 BiocGenerics_0.46.0 DBI_1.1.3 Hmisc_5.1-0 nnet_7.3-19 +## [97] tidyselect_1.2.0 bit_4.0.5 compiler_4.3.2 rvest_1.0.2 htmlTable_2.4.1 xml2_1.3.6 +## [103] plotly_4.10.0 shadowtext_0.1.2 checkmate_2.3.1 scales_1.3.0 caTools_1.18.2 lmtest_0.9-40 +## [109] digest_0.6.33 goftest_1.2-3 spatstat.utils_3.0-4 rmarkdown_2.11 htmltools_0.5.7 pkgconfig_2.0.3 +## [115] base64enc_0.1-3 highr_0.10 dbplyr_2.1.1 fastmap_1.1.1 rlang_1.1.2 GlobalOptions_0.1.2 +## [121] htmlwidgets_1.6.2 shiny_1.7.1 farver_2.1.1 zoo_1.8-12 jsonlite_1.8.8 ModelMetrics_1.2.2.2 +## [127] magrittr_2.0.3 Formula_1.2-5 dotCall64_1.1-1 patchwork_1.1.3 munsell_0.5.0 Rcpp_1.0.11 +## [133] ggnewscale_0.4.9 reticulate_1.34.0 stringi_1.7.6 pROC_1.18.5 MASS_7.3-60 plyr_1.8.9 +## [139] parallel_4.3.2 listenv_0.9.0 ggrepel_0.9.4 deldir_2.0-2 splines_4.3.2 tensor_1.5 +## [145] hms_1.1.3 circlize_0.4.15 igraph_1.2.11 spatstat.geom_3.2-7 ggsignif_0.6.4 reshape2_1.4.4 +## [151] stats4_4.3.2 reprex_2.0.1 evaluate_0.23 SeuratObject_5.0.1 modelr_0.1.8 tzdb_0.4.0 +## [157] foreach_1.5.2 tweenr_2.0.2 httpuv_1.6.13 RANN_2.6.1 polyclip_1.10-6 future_1.33.0 +## [163] clue_0.3-64 scattermore_1.2 ggforce_0.4.1 broom_0.7.12 xtable_1.8-4 e1071_1.7-14 +## [169] rstatix_0.7.2 later_1.3.2 viridisLite_0.4.2 class_7.3-22 IRanges_2.34.1 cluster_2.1.4 +## [175] timechange_0.2.0 globals_0.16.2 caret_6.0-94 +``` ## References diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-activity-heatmap-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-activity-heatmap-1.png new file mode 100644 index 0000000..8913050 Binary files /dev/null and b/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-activity-heatmap-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-expression-heatmap-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-expression-heatmap-1.png new file mode 100644 index 0000000..bbfd682 Binary files /dev/null and b/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-expression-heatmap-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-receptor-heatmap-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-receptor-heatmap-1.png new file mode 100644 index 0000000..0d36455 Binary files /dev/null and b/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-receptor-heatmap-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-target-heatmap-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-target-heatmap-1.png new file mode 100644 index 0000000..dff8eea Binary files /dev/null and b/vignettes/ligand_activity_geneset_files/figure-gfm/ligand-target-heatmap-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/summary-vis-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/summary-vis-1.png new file mode 100644 index 0000000..8c2afd0 Binary files /dev/null and b/vignettes/ligand_activity_geneset_files/figure-gfm/summary-vis-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/target-expression-heatmap-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/target-expression-heatmap-1.png new file mode 100644 index 0000000..f4c3838 Binary files /dev/null and b/vignettes/ligand_activity_geneset_files/figure-gfm/target-expression-heatmap-1.png differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-11-1.png deleted file mode 100644 index f3fea91..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-11-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-14-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-14-1.png deleted file mode 100644 index 066a883..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-14-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-16-1.png deleted file mode 100644 index 2a5b064..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-16-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-19-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-19-1.png deleted file mode 100644 index cfd4718..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-19-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-21-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-21-1.png deleted file mode 100644 index 2470147..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-21-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png deleted file mode 100644 index 7b4d239..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-23-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-24-1.png b/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-24-1.png deleted file mode 100644 index 1fda2ea..0000000 Binary files a/vignettes/ligand_activity_geneset_files/figure-gfm/unnamed-chunk-24-1.png and /dev/null differ diff --git a/vignettes/ligand_activity_prediction_workflow_new.jpg b/vignettes/ligand_activity_prediction_workflow_new.jpg deleted file mode 100644 index 305af16..0000000 Binary files a/vignettes/ligand_activity_prediction_workflow_new.jpg and /dev/null differ diff --git a/vignettes/ligand_activity_single_cell.Rmd b/vignettes/ligand_activity_single_cell.Rmd index 39d198a..15e5451 100644 --- a/vignettes/ligand_activity_single_cell.Rmd +++ b/vignettes/ligand_activity_single_cell.Rmd @@ -51,11 +51,22 @@ Secondly, we will determine which genes are expressed in CAFs and malignant cell ```{r} tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") -CAF_ids = sample_info %>% filter(`Lymph node` == 0) %>% filter((tumor %in% tumors_remove == FALSE)) %>% filter(`non-cancer cell type` == "CAF") %>% .$cell -malignant_ids = sample_info %>% filter(`Lymph node` == 0) %>% filter(`classified as cancer cell` == 1) %>% filter((tumor %in% tumors_remove == FALSE)) %>% .$cell +CAF_ids = sample_info %>% filter(`Lymph node` == 0) %>% + filter((tumor %in% tumors_remove == FALSE)) %>% + filter(`non-cancer cell type` == "CAF") %>% .$cell -expressed_genes_CAFs = expression[CAF_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() -expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() +malignant_ids = sample_info %>% filter(`Lymph node` == 0) %>% + filter(`classified as cancer cell` == 1) %>% + filter((tumor %in% tumors_remove == FALSE)) %>% .$cell + +expressed_genes_CAFs = expression[CAF_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% + .[. >= 4] %>% names() + +expressed_genes_malignant = expression[malignant_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() ``` @@ -93,9 +104,14 @@ Now perform the ligand activity analysis: infer how well NicheNet's ligand-targe In practice, ligand activity analysis for several cells can be better run in parallel (via e.g. parallel::mclapply)! ```{r} -malignant_hn5_ids = sample_info %>% filter(tumor == "HN5") %>% filter(`Lymph node` == 0) %>% filter(`classified as cancer cell` == 1) %>% .$cell %>% head(10) - -ligand_activities = predict_single_cell_ligand_activities(cell_ids = malignant_hn5_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +malignant_hn5_ids = sample_info %>% filter(tumor == "HN5") %>% + filter(`Lymph node` == 0) %>% + filter(`classified as cancer cell` == 1) %>% + .$cell %>% head(10) + +ligand_activities = predict_single_cell_ligand_activities(cell_ids = malignant_hn5_ids, + expression_scaled = expression_scaled, + ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) ``` ### Ligand prioritization by regression analysis @@ -124,8 +140,11 @@ output_correlation_analysis %>% arrange(-pearson_regression) %>% select(pearson_ Visualize the relation between ligand activity and the cell's property score of interest -```{r} -inner_join(cell_scores_tbl,normalized_ligand_activities) %>% ggplot(aes(score,TNC)) + geom_point() + geom_smooth(method = "lm") +```{r scatterplot } +inner_join(cell_scores_tbl,normalized_ligand_activities) %>% + ggplot(aes(score,TNC)) + + geom_point() + + geom_smooth(method = "lm") ``` ### References diff --git a/vignettes/ligand_activity_single_cell.md b/vignettes/ligand_activity_single_cell.md index 6c1a4a1..e8c2309 100644 --- a/vignettes/ligand_activity_single_cell.md +++ b/vignettes/ligand_activity_single_cell.md @@ -58,11 +58,22 @@ of Puram et al. ``` r tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") -CAF_ids = sample_info %>% filter(`Lymph node` == 0) %>% filter((tumor %in% tumors_remove == FALSE)) %>% filter(`non-cancer cell type` == "CAF") %>% .$cell -malignant_ids = sample_info %>% filter(`Lymph node` == 0) %>% filter(`classified as cancer cell` == 1) %>% filter((tumor %in% tumors_remove == FALSE)) %>% .$cell - -expressed_genes_CAFs = expression[CAF_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() -expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() +CAF_ids = sample_info %>% filter(`Lymph node` == 0) %>% + filter((tumor %in% tumors_remove == FALSE)) %>% + filter(`non-cancer cell type` == "CAF") %>% .$cell + +malignant_ids = sample_info %>% filter(`Lymph node` == 0) %>% + filter(`classified as cancer cell` == 1) %>% + filter((tumor %in% tumors_remove == FALSE)) %>% .$cell + +expressed_genes_CAFs = expression[CAF_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% + .[. >= 4] %>% names() + +expressed_genes_malignant = expression[malignant_ids,] %>% + apply(2,function(x){10*(2**x - 1)}) %>% + apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() ``` ### Load the ligand-target model we want to use @@ -117,9 +128,14 @@ In practice, ligand activity analysis for several cells can be better run in parallel (via e.g. parallel::mclapply)! ``` r -malignant_hn5_ids = sample_info %>% filter(tumor == "HN5") %>% filter(`Lymph node` == 0) %>% filter(`classified as cancer cell` == 1) %>% .$cell %>% head(10) - -ligand_activities = predict_single_cell_ligand_activities(cell_ids = malignant_hn5_ids, expression_scaled = expression_scaled, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +malignant_hn5_ids = sample_info %>% filter(tumor == "HN5") %>% + filter(`Lymph node` == 0) %>% + filter(`classified as cancer cell` == 1) %>% + .$cell %>% head(10) + +ligand_activities = predict_single_cell_ligand_activities(cell_ids = malignant_hn5_ids, + expression_scaled = expression_scaled, + ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) ``` ### Ligand prioritization by regression analysis @@ -170,17 +186,20 @@ output_correlation_analysis %>% arrange(-pearson_regression) %>% select(pearson_ ## 8 0.550 BGN ## 9 0.526 CLCF1 ## 10 0.510 TFPI -## # … with 193 more rows +## # ℹ 193 more rows ``` Visualize the relation between ligand activity and the cell’s property score of interest ``` r -inner_join(cell_scores_tbl,normalized_ligand_activities) %>% ggplot(aes(score,TNC)) + geom_point() + geom_smooth(method = "lm") +inner_join(cell_scores_tbl,normalized_ligand_activities) %>% + ggplot(aes(score,TNC)) + + geom_point() + + geom_smooth(method = "lm") ``` -![](ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-80-1.png) +![](ligand_activity_single_cell_files/figure-gfm/scatterplot-1.png) ### References diff --git a/vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-80-1.png b/vignettes/ligand_activity_single_cell_files/figure-gfm/scatterplot-1.png similarity index 100% rename from vignettes/ligand_activity_single_cell_files/figure-gfm/unnamed-chunk-80-1.png rename to vignettes/ligand_activity_single_cell_files/figure-gfm/scatterplot-1.png diff --git a/vignettes/ligand_receptor_circos.svg b/vignettes/ligand_receptor_circos.svg deleted file mode 100644 index f557b11..0000000 --- a/vignettes/ligand_receptor_circos.svg +++ /dev/null @@ -1,952 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/vignettes/ligand_target_circos.svg b/vignettes/ligand_target_circos.svg deleted file mode 100644 index 71bd2d2..0000000 --- a/vignettes/ligand_target_circos.svg +++ /dev/null @@ -1,2364 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/vignettes/ligand_target_circos_adapted.svg b/vignettes/ligand_target_circos_adapted.svg deleted file mode 100644 index 7d25f14..0000000 --- a/vignettes/ligand_target_circos_adapted.svg +++ /dev/null @@ -1,3380 +0,0 @@ - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Partial EMT target genes in malignant cells - Fibroblast-specific ligands - Fibroblast & Endothelial ligands - Endothelial-specific ligands - diff --git a/vignettes/ligand_target_circos_adapted_caf.svg b/vignettes/ligand_target_circos_adapted_caf.svg deleted file mode 100644 index 65ca1de..0000000 --- a/vignettes/ligand_target_circos_adapted_caf.svg +++ /dev/null @@ -1,3439 +0,0 @@ - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Partial EMT target genes in malignant cells - Fibroblast-specific ligands - Fibroblast & Endothelial ligands - Endothelial-specific ligands - diff --git a/vignettes/ligand_target_signaling_path.Rmd b/vignettes/ligand_target_signaling_path.Rmd index 097ed9e..0819aa5 100644 --- a/vignettes/ligand_target_signaling_path.Rmd +++ b/vignettes/ligand_target_signaling_path.Rmd @@ -40,38 +40,47 @@ First, we will load the necessary packages and networks to infer signaling paths library(nichenetr) library(tidyverse) -weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) -ligand_tf_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_tf_matrix_nsga2r_final.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) +ligand_tf_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_tf_matrix_nsga2r_final.rds")) -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) -sig_network = readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) -gr_network = readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) +lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +sig_network <- readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) +gr_network <- readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) ``` -As example, we will infer signaling paths between the CAF-ligand TGFB3 and its top-predicted p-EMT target genes TGFBI, LAMC2 and TNC. +As an example, we will infer signaling paths between the CAF-ligand TGFB2 and its top-predicted p-EMT target genes SERPINE1 and COL1A1. +For better visualization of edge weights, we will also normalize edge weights to make them comparable between signaling and gene regulatory interactions (`minmax_scaling = TRUE`). -```{r} -ligands_all = "TGFB3" # this can be a list of multiple ligands if required -targets_all = c("TGFBI","LAMC2","TNC") +```{r signaling-graph} +ligands_oi <- "TGFB2" # this can be a list of multiple ligands if required +targets_oi <- c("SERPINE1","COL1A1") -active_signaling_network = get_ligand_signaling_path(ligand_tf_matrix = ligand_tf_matrix, ligands_all = ligands_all, targets_all = targets_all, weighted_networks = weighted_networks) +active_signaling_network <- get_ligand_signaling_path(ligands_all = ligands_oi, + targets_all = targets_oi, + weighted_networks = weighted_networks, + ligand_tf_matrix = ligand_tf_matrix, + top_n_regulators = 4, + minmax_scaling = TRUE) -# For better visualization of edge weigths: normalize edge weights to make them comparable between signaling and gene regulatory interactions -active_signaling_network_min_max = active_signaling_network -active_signaling_network_min_max$sig = active_signaling_network_min_max$sig %>% mutate(weight = ((weight-min(weight))/(max(weight)-min(weight))) + 0.75) -active_signaling_network_min_max$gr = active_signaling_network_min_max$gr %>% mutate(weight = ((weight-min(weight))/(max(weight)-min(weight))) + 0.75) -graph_min_max = diagrammer_format_signaling_graph(signaling_graph_list = active_signaling_network_min_max, ligands_all = ligands_all, targets_all = targets_all, sig_color = "indianred", gr_color = "steelblue") +graph_min_max <- diagrammer_format_signaling_graph(signaling_graph_list = active_signaling_network, + ligands_all = ligands_oi, targets_all = targets_oi, + sig_color = "indianred", gr_color = "steelblue") -# To render the graph: uncomment following line of code +# To render the graph in RStudio Viewer, uncomment following line of code # DiagrammeR::render_graph(graph_min_max, layout = "tree") +# To export/draw the svg, you need to install DiagrammeRsvg +graph_svg <- DiagrammeRsvg::export_svg(DiagrammeR::render_graph(graph_min_max, layout = "tree", output = "graph")) +cowplot::ggdraw() + cowplot::draw_image(charToRaw(graph_svg)) + ``` We will now look which of the collected data sources support the interactions in this network. ```{r} -data_source_network = infer_supporting_datasources(signaling_graph_list = active_signaling_network,lr_network = lr_network, sig_network = sig_network, gr_network = gr_network) +data_source_network <- infer_supporting_datasources(signaling_graph_list = active_signaling_network, + lr_network = lr_network, sig_network = sig_network, gr_network = gr_network) head(data_source_network) ``` @@ -82,30 +91,33 @@ For information of all mentioned data sources in the source column (link to the Export the following to e.g. Cytoscape for exploration of the networks ```{r} -output_path = "" -write_output = FALSE # change to TRUE for writing output +output_path <- "" +write_output <- FALSE # change to TRUE for writing output # weighted networks ('import network' in Cytoscape) if(write_output){ - bind_rows(active_signaling_network$sig %>% mutate(layer = "signaling"), active_signaling_network$gr %>% mutate(layer = "regulatory")) %>% write_tsv(paste0(output_path,"weighted_signaling_network.txt")) + bind_rows(active_signaling_network$sig %>% mutate(layer = "signaling"), + active_signaling_network$gr %>% mutate(layer = "regulatory")) %>% + write_tsv(paste0(output_path,"weighted_signaling_network.txt")) } # networks with information of supporting data sources ('import network' in Cytoscape) if(write_output){ -data_source_network %>% write_tsv(paste0(output_path,"data_source_network.txt")) + data_source_network %>% write_tsv(paste0(output_path,"data_source_network.txt")) } # Node annotation table ('import table' in Cytoscape) -specific_annotation_tbl = bind_rows( - tibble(gene = ligands_all, annotation = "ligand"), - tibble(gene = targets_all, annotation = "target"), - tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(c(targets_all,ligands_all)) %>% intersect(lr_network$to %>% unique()), annotation = "receptor"), - tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(c(targets_all,ligands_all)) %>% intersect(gr_network$from %>% unique()) %>% setdiff(c(data_source_network$from, data_source_network$to) %>% unique() %>% intersect(lr_network$to %>% unique())),annotation = "transcriptional regulator") +specific_annotation_tbl <- bind_rows( + tibble(gene = ligands_oi, annotation = "ligand"), + tibble(gene = targets_oi, annotation = "target"), + tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(c(targets_oi,ligands_oi)) %>% intersect(lr_network$to %>% unique()), annotation = "receptor"), + tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(c(targets_oi,ligands_oi)) %>% intersect(gr_network$from %>% unique()) %>% setdiff(c(data_source_network$from, data_source_network$to) %>% unique() %>% intersect(lr_network$to %>% unique())),annotation = "transcriptional regulator") ) -non_specific_annotation_tbl = tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(specific_annotation_tbl$gene), annotation = "signaling mediator") +non_specific_annotation_tbl <- tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(specific_annotation_tbl$gene), annotation = "signaling mediator") if(write_output){ -bind_rows(specific_annotation_tbl,non_specific_annotation_tbl) %>% write_tsv(paste0(output_path,"annotation_table.txt")) + bind_rows(specific_annotation_tbl, non_specific_annotation_tbl) %>% + write_tsv(paste0(output_path,"annotation_table.txt")) } ``` diff --git a/vignettes/ligand_target_signaling_path.md b/vignettes/ligand_target_signaling_path.md index 3d582de..e600f0b 100644 --- a/vignettes/ligand_target_signaling_path.md +++ b/vignettes/ligand_target_signaling_path.md @@ -40,50 +40,62 @@ signaling paths between ligand and target genes of interest. library(nichenetr) library(tidyverse) -weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) -ligand_tf_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_tf_matrix_nsga2r_final.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) +ligand_tf_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_tf_matrix_nsga2r_final.rds")) -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) -sig_network = readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) -gr_network = readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) +lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +sig_network <- readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) +gr_network <- readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) ``` -As example, we will infer signaling paths between the CAF-ligand TGFB3 -and its top-predicted p-EMT target genes TGFBI, LAMC2 and TNC. +As an example, we will infer signaling paths between the CAF-ligand +TGFB2 and its top-predicted p-EMT target genes SERPINE1 and COL1A1. For +better visualization of edge weights, we will also normalize edge +weights to make them comparable between signaling and gene regulatory +interactions (`minmax_scaling = TRUE`). ``` r -ligands_all = "TGFB3" # this can be a list of multiple ligands if required -targets_all = c("TGFBI","LAMC2","TNC") +ligands_oi <- "TGFB2" # this can be a list of multiple ligands if required +targets_oi <- c("SERPINE1","COL1A1") -active_signaling_network = get_ligand_signaling_path(ligand_tf_matrix = ligand_tf_matrix, ligands_all = ligands_all, targets_all = targets_all, weighted_networks = weighted_networks) +active_signaling_network <- get_ligand_signaling_path(ligands_all = ligands_oi, + targets_all = targets_oi, + weighted_networks = weighted_networks, + ligand_tf_matrix = ligand_tf_matrix, + top_n_regulators = 4, + minmax_scaling = TRUE) -# For better visualization of edge weigths: normalize edge weights to make them comparable between signaling and gene regulatory interactions -active_signaling_network_min_max = active_signaling_network -active_signaling_network_min_max$sig = active_signaling_network_min_max$sig %>% mutate(weight = ((weight-min(weight))/(max(weight)-min(weight))) + 0.75) -active_signaling_network_min_max$gr = active_signaling_network_min_max$gr %>% mutate(weight = ((weight-min(weight))/(max(weight)-min(weight))) + 0.75) -graph_min_max = diagrammer_format_signaling_graph(signaling_graph_list = active_signaling_network_min_max, ligands_all = ligands_all, targets_all = targets_all, sig_color = "indianred", gr_color = "steelblue") +graph_min_max <- diagrammer_format_signaling_graph(signaling_graph_list = active_signaling_network, + ligands_all = ligands_oi, targets_all = targets_oi, + sig_color = "indianred", gr_color = "steelblue") -# To render the graph: uncomment following line of code +# To render the graph in RStudio Viewer, uncomment following line of code # DiagrammeR::render_graph(graph_min_max, layout = "tree") + +# To export/draw the svg, you need to install DiagrammeRsvg +graph_svg <- DiagrammeRsvg::export_svg(DiagrammeR::render_graph(graph_min_max, layout = "tree", output = "graph")) +cowplot::ggdraw() + cowplot::draw_image(charToRaw(graph_svg)) ``` -![](tgfb3_targets_signaling_path.png) + +![](ligand_target_signaling_path_files/figure-gfm/signaling-graph-1.png) We will now look which of the collected data sources support the interactions in this network. ``` r -data_source_network = infer_supporting_datasources(signaling_graph_list = active_signaling_network,lr_network = lr_network, sig_network = sig_network, gr_network = gr_network) +data_source_network <- infer_supporting_datasources(signaling_graph_list = active_signaling_network, + lr_network = lr_network, sig_network = sig_network, gr_network = gr_network) head(data_source_network) ## # A tibble: 6 × 5 -## from to source database layer -## -## 1 PTEN LAMC2 KnockTF KnockTF regulatory -## 2 PTEN TGFBI harmonizome_GEO_GENE harmonizome_gr regulatory -## 3 PTEN TGFBI harmonizome_MSIGDB_GENE harmonizome_gr regulatory -## 4 PTEN TGFBI KnockTF KnockTF regulatory -## 5 SMAD1 TGFBI regnetwork_source regnetwork regulatory -## 6 SMAD1 TGFBI Remap_1 Remap regulatory +## from to source database layer +## +## 1 NFKB1 COL1A1 regnetwork_source regnetwork regulatory +## 2 NFKB1 COL1A1 trrust trrust regulatory +## 3 NFKB1 COL1A1 omnipath_ABC omnipath regulatory +## 4 NFKB1 SERPINE1 trrust trrust regulatory +## 5 NFKB1 SERPINE1 omnipath_ABC omnipath regulatory +## 6 SMAD3 COL1A1 regnetwork_source regnetwork regulatory ``` For information of all mentioned data sources in the source column (link @@ -95,29 +107,32 @@ information](data_sources.xlsx) Export the following to e.g. Cytoscape for exploration of the networks ``` r -output_path = "" -write_output = FALSE # change to TRUE for writing output +output_path <- "" +write_output <- FALSE # change to TRUE for writing output # weighted networks ('import network' in Cytoscape) if(write_output){ - bind_rows(active_signaling_network$sig %>% mutate(layer = "signaling"), active_signaling_network$gr %>% mutate(layer = "regulatory")) %>% write_tsv(paste0(output_path,"weighted_signaling_network.txt")) + bind_rows(active_signaling_network$sig %>% mutate(layer = "signaling"), + active_signaling_network$gr %>% mutate(layer = "regulatory")) %>% + write_tsv(paste0(output_path,"weighted_signaling_network.txt")) } # networks with information of supporting data sources ('import network' in Cytoscape) if(write_output){ -data_source_network %>% write_tsv(paste0(output_path,"data_source_network.txt")) + data_source_network %>% write_tsv(paste0(output_path,"data_source_network.txt")) } # Node annotation table ('import table' in Cytoscape) -specific_annotation_tbl = bind_rows( - tibble(gene = ligands_all, annotation = "ligand"), - tibble(gene = targets_all, annotation = "target"), - tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(c(targets_all,ligands_all)) %>% intersect(lr_network$to %>% unique()), annotation = "receptor"), - tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(c(targets_all,ligands_all)) %>% intersect(gr_network$from %>% unique()) %>% setdiff(c(data_source_network$from, data_source_network$to) %>% unique() %>% intersect(lr_network$to %>% unique())),annotation = "transcriptional regulator") +specific_annotation_tbl <- bind_rows( + tibble(gene = ligands_oi, annotation = "ligand"), + tibble(gene = targets_oi, annotation = "target"), + tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(c(targets_oi,ligands_oi)) %>% intersect(lr_network$to %>% unique()), annotation = "receptor"), + tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(c(targets_oi,ligands_oi)) %>% intersect(gr_network$from %>% unique()) %>% setdiff(c(data_source_network$from, data_source_network$to) %>% unique() %>% intersect(lr_network$to %>% unique())),annotation = "transcriptional regulator") ) -non_specific_annotation_tbl = tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(specific_annotation_tbl$gene), annotation = "signaling mediator") +non_specific_annotation_tbl <- tibble(gene = c(data_source_network$from, data_source_network$to) %>% unique() %>% setdiff(specific_annotation_tbl$gene), annotation = "signaling mediator") if(write_output){ -bind_rows(specific_annotation_tbl,non_specific_annotation_tbl) %>% write_tsv(paste0(output_path,"annotation_table.txt")) + bind_rows(specific_annotation_tbl, non_specific_annotation_tbl) %>% + write_tsv(paste0(output_path,"annotation_table.txt")) } ``` diff --git a/vignettes/ligand_target_signaling_path_files/figure-gfm/signaling-graph-1.png b/vignettes/ligand_target_signaling_path_files/figure-gfm/signaling-graph-1.png new file mode 100644 index 0000000..726e4b9 Binary files /dev/null and b/vignettes/ligand_target_signaling_path_files/figure-gfm/signaling-graph-1.png differ diff --git a/vignettes/model_construction.Rmd b/vignettes/model_construction.Rmd index b7f4380..9eb7287 100644 --- a/vignettes/model_construction.Rmd +++ b/vignettes/model_construction.Rmd @@ -31,13 +31,13 @@ The prior model at the basis of NicheNet denotes how strongly existing knowledge First, we collected multiple complementary data sources covering ligand-receptor, signal transduction (e.g., protein-protein and kinase-substrate interactions) and gene regulatory interactions (e.g., inferred from ChIP-seq and motifs). For information of all collected data sources (link to the website of the database, etc), see [Data source information](data_sources.xlsx) Secondly, we integrated these individual data sources into two weighted networks: 1) a ligand-signaling network, which contains protein-protein interactions covering the signaling paths from ligands to downstream transcriptional regulators; and 2) a gene regulatory network, which contains gene regulatory interactions between transcriptional regulators and target genes. -To let informative data sources contribute more to the final model, we weighted each data source during integration. These data source weights were automatically determined via model-based parameter optimization to improve the accuracy of ligand-target predictions (see the vignette [Parameter optimization via mlrMBO](parameter_optimization.md). In this vignette, we will show how to construct models with unoptimized data source weigths as well. +To let informative data sources contribute more to the final model, we weighted each data source during integration. These data source weights were automatically determined via model-based parameter optimization to improve the accuracy of ligand-target predictions (see the vignette [Parameter optimization via NSGA-II](parameter_optimization.md). In this vignette, we will show how to construct models with unoptimized data source weigths as well. Finally, we combined the ligand-signaling and gene regulatory network to calculate a regulatory potential score between all pairs of ligands and target genes. A ligand-target pair receives a high regulatory potential if the regulators of the target gene are lying downstream of the signaling network of the ligand. To calculate this, we used network propagation methods on the integrated networks to propagate the signal starting from a ligand, flowing through receptors, signaling proteins, transcriptional regulators, and ultimately ending at target genes. A graphical summary of this procedure is visualized here below: -![](workflow_model_construction.png) +![](images/workflow_model_construction.png) # Construct a ligand-target model from all collected ligand-receptor, signaling and gene regulatory network data sources @@ -57,7 +57,7 @@ gr_network = readRDS(url("https://zenodo.org/record/7074291/files/gr_network_hum ## Construct NicheNet's ligand-target model from unoptimized data source weights -Construct the weighted integrated ligand-signaling and gene regulatory network. In this first example, we give every data source the same weight (as given by the `source_weights_df` data frame provided by default by the nichenetr package). See the vignette showing how to use mlrMBO to optimize data source weights and the hyperparameters if interested in performing parameter optimization. For the hyperparameters of the model (hub correction factors and damping factor), we will use the optimized values (as given by the `hyperparameter_list` data frame provided by default by the nichenetr package). +Construct the weighted integrated ligand-signaling and gene regulatory network. In this first example, we give every data source the same weight (as given by the `source_weights_df` data frame provided by default by the nichenetr package). See the vignette showing how to use NSGA-II to optimize data source weights and the hyperparameters if interested in performing parameter optimization. For the hyperparameters of the model (hub correction factors and damping factor), we will use the optimized values (as given by the `hyperparameter_list` data frame provided by default by the nichenetr package). The ligand-signaling network hub correction factor and gene regulatory network hub correction factor were defined as hyperparameter of the model to mitigate the potential negative influence of over-dominant hubs on the final model. The damping factor hyperparameter is the main parameter of the Personalized PageRank algorithm, which we used as network propagation algorithm to link ligands to downstream regulators. @@ -290,6 +290,6 @@ extract_top_n_targets("TNF",10,ligand_target_matrix) ## Final note Most optimally, you would like to optimize the parameters again when including own data sources. Instructions to do this are given in the following vignette: -[Parameter optimization via mlrMBO](parameter_optimization.md): `vignette("parameter_optimization", package="nichenetr")` +[Parameter optimization via NSGA-II](parameter_optimization.md): `vignette("parameter_optimization", package="nichenetr")` However, this optimization process takes a lot of time and requires the availability of multiple cores to perform the optimization in parallel. Because we demonstrate in the NicheNet paper that unoptimized models also perform considerably well, data source weight optmization is not necessary to have decent predictive ability. diff --git a/vignettes/model_construction.md b/vignettes/model_construction.md index 9edf73d..53719cd 100644 --- a/vignettes/model_construction.md +++ b/vignettes/model_construction.md @@ -39,7 +39,7 @@ final model, we weighted each data source during integration. These data source weights were automatically determined via model-based parameter optimization to improve the accuracy of ligand-target predictions (see the vignette [Parameter optimization via -mlrMBO](parameter_optimization.md). In this vignette, we will show how +NSGA-II](parameter_optimization.md). In this vignette, we will show how to construct models with unoptimized data source weigths as well. Finally, we combined the ligand-signaling and gene regulatory network to @@ -53,7 +53,7 @@ transcriptional regulators, and ultimately ending at target genes. A graphical summary of this procedure is visualized here below: -![](workflow_model_construction.png) +![](images/workflow_model_construction.png) # Construct a ligand-target model from all collected ligand-receptor, signaling and gene regulatory network data sources @@ -78,7 +78,7 @@ Construct the weighted integrated ligand-signaling and gene regulatory network. In this first example, we give every data source the same weight (as given by the `source_weights_df` data frame provided by default by the nichenetr package). See the vignette showing how to use -mlrMBO to optimize data source weights and the hyperparameters if +NSGA-II to optimize data source weights and the hyperparameters if interested in performing parameter optimization. For the hyperparameters of the model (hub correction factors and damping factor), we will use the optimized values (as given by the `hyperparameter_list` data frame @@ -175,7 +175,8 @@ nichenetr package) ``` r annotation_data_sources$type_db %>% unique() -## [1] "comprehensive_db" "literature" "ptm" "text_mining" "directional_ppi" "PPI" "ChIP" "motif" "prediction" "perturbation" +## [1] "comprehensive_db" "literature" "ptm" "text_mining" "directional_ppi" "PPI" "ChIP" +## [8] "motif" "prediction" "perturbation" ``` ``` r @@ -381,7 +382,7 @@ extract_top_n_targets("TNF",10,ligand_target_matrix) Most optimally, you would like to optimize the parameters again when including own data sources. Instructions to do this are given in the following vignette: [Parameter optimization via -mlrMBO](parameter_optimization.md): +NSGA-II](parameter_optimization.md): `vignette("parameter_optimization", package="nichenetr")` However, this optimization process takes a lot of time and requires the diff --git a/vignettes/model_construction_with_liana.Rmd b/vignettes/model_construction_with_liana.Rmd new file mode 100644 index 0000000..7e19a0e --- /dev/null +++ b/vignettes/model_construction_with_liana.Rmd @@ -0,0 +1,266 @@ +--- +title: "Using LIANA ligand-receptor databases to construct the ligand-target model" +author: "Chananchida Sang-aram" +date: '2023-08-23' +output: html_document +vignette: > + %\VignetteIndexEntry{Using LIANA ligand-receptor databases to construct the ligand-target model} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + # comment = "#>", + warning = FALSE, + message = FALSE +) +``` + +Following the [Construction of NicheNet's ligand-target model](model_construction.md) vignette, we will now demonstrate how to use ligand-receptor reactions from LIANA to build the ligand-target model. LIANA is a framework that combines both resources and computational tools for ligand-receptor cell-cell communication inference (Dimitrov et al., 2022). As the NicheNet prior model is built by integrating ligand-receptor, signaling, and gene regulatory databases, each part can be replaced with external data sources. We will show how the first part, the ligand-receptor database, can be replaced with those from LIANA, and how to run the model afterward. + +**Important**: Since LIANA also offers functions to calculate ligand-receptor interactions of interest, it is also possible to use them to select which ligands are of interest to do the ligand activity analysis. This is explained further in the [LIANA vignette](https://saezlab.github.io/liana/articles/liana_nichenet.html). + + +First, we will install LIANA: + +```{r message=FALSE} +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + +if (!requireNamespace("remotes", quietly = TRUE)) + install.packages("remotes") + +remotes::install_github('saezlab/liana') +``` + +Load necessary packages. +```{r} +library(liana) +library(nichenetr) +library(tidyverse) +library(Seurat) +``` + +To check which resources are present in LIANA, we can use the `show_resources()` function. These are then accessed via `select_resource()`. + +```{r} +show_resources() +``` + +Next, we will calculate how much overlap there is between the ligands and receptors in the LIANA and NicheNet databases. If the overlap between LIANA receptors and NicheNet signaling network is too low, the integration will probably not work very well. Furthermore, The `decomplexify()` function of LIANA is crucial in our case, as we would like to separate receptors into their respective subunits. + +```{r} +# Load signaling networks of NicheNet +lr_network_human <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +lr_network_mouse <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + +sig_network_human <- readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) +sig_network_mouse <- readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_mouse_21122021.rds")) + +overlap_df <- lapply(show_resources()[-1], function(resource) { + db <- select_resource(resource)[[1]] %>% decomplexify() + + lr_network <- paste0("lr_network_", ifelse(grepl("Mouse", resource), "mouse", "human")) + sig_network <- paste0("sig_network_", ifelse(grepl("Mouse", resource), "mouse", "human")) + + data.frame(row.names = resource, + n_ligands = length(unique(db$source_genesymbol)), + n_receptors = length(unique(db$target_genesymbol)), + n_ligands_overlap = length(intersect(db$source_genesymbol, get(lr_network)$from)), + n_receptors_overlap_lr = length(intersect(db$target_genesymbol, get(lr_network)$to)), + n_receptors_overlap_sig = length(intersect(db$target_genesymbol, get(sig_network)$from)) + ) %>% mutate(frac_ligands_overlap = n_ligands_overlap / n_ligands, + frac_receptors_overlap_lr = n_receptors_overlap_lr / n_receptors, + frac_receptors_overlap_sig = n_receptors_overlap_sig / n_receptors) + +}) %>% do.call(rbind, .) + +overlap_df + +``` + +On average, ~90% of the ligands and receptors of LIANA databases are in the NicheNet LR network (`frac_ligands_overlap`, `frac_receptors_overlap_lr`), and almost all of the receptors in LIANA databases are present in the NicheNet signaling network (`frac_receptors_overlap_sig`). When using the "Consensus" database of LIANA, there are ~100 ligands that are not present in NicheNet; in contrast, there are 303 ligands in NicheNet that are not present in the LIANA consensus database. + +To build the ligand-target model, we can use a very similar code to the [Construction of NicheNet's ligand-target model](model_construction.md) vignette. Users can choose between replacing the NicheNet LR database entirely with LIANA's (`replace_nichenet_lr = TRUE`), or just adding the LIANA database as an additional data source, which may contain a lot of redundant information. + +```{r} +# Load liana consensus LR network, and rename columns to be compatible with nichenet function +# Users can choose here whether or not to replace the NicheNet LR network entirely, or just add the LIANA db to it +replace_nichenet_lr <- TRUE +liana_db <- select_resource("Consensus")[[1]] %>% decomplexify() +liana_db <- liana_db %>% rename(from = source_genesymbol, to = target_genesymbol) %>% select(from, to) %>% mutate(source = "liana") %>% + {if (replace_nichenet_lr) (.) else (bind_rows(lr_network_human, .))} + +# Change source weights dataframe (but in this case all source weights are 1) +source_weights <- source_weights_df %>% + add_row(source = "liana", weight = 1, .before = 1) + +# Load the gene regulatory network +gr_network_human <- readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) + +# Aggregate the individual data sources in a weighted manner to obtain a weighted integrated signaling network +weighted_networks <- construct_weighted_networks(lr_network = liana_db, + sig_network = sig_network_human, + gr_network = gr_network_human, + source_weights_df = source_weights) + +# downweigh the importance of signaling and gene regulatory hubs - use the optimized parameters of this +weighted_networks <- apply_hub_corrections(weighted_networks = weighted_networks, + lr_sig_hub = hyperparameter_list %>% filter(parameter == "lr_sig_hub") %>% pull(avg_weight), + gr_hub = hyperparameter_list %>% filter(parameter == "gr_hub") %>% pull(avg_weight)) + +# in this example we will calculate target gene regulatory potential scores for TNF and the ligand combination TNF+IL6 +ligands <- list("TNF",c("TNF","IL6")) +ligand_target_matrix_liana <- construct_ligand_target_matrix(weighted_networks = weighted_networks, ligands = ligands, algorithm = "PPR", + damping_factor = hyperparameter_list %>% filter(parameter == "damping_factor") %>% pull(avg_weight), + ltf_cutoff = hyperparameter_list %>% filter(parameter == "ltf_cutoff") %>% pull(avg_weight)) + +``` + +### Running NicheNet on the LIANA ligand-target model + +In this section compare the results between using the LIANA LR network and NicheNet one in a typical [NicheNet analysis](seurat_steps.md). As this is mouse scRNA-seq data, we will build the model using mouse networks ("MouseConsensus" resource in LIANA). Furthermore, instead of using the same source weights for all data sources as in the previous section, we will use the optimized data source weights. Here, we will use the same data source weight for the LIANA model as the one that was computed for the NicheNet LR network. This may not be the optimum weight, but it requires a lot of runtime to optimize this parameter (see [Parameter optimization vignette](parameter_optimization.md) for more information). + +```{r} +# Typical nichenet analysis +# Load seurat object +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- UpdateSeuratObject(seuratObj) +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") # convert gene names + +# Load LIANA mouse consensus ligand-receptor network +lr_network_liana <- select_resource("MouseConsensus")[[1]] %>% decomplexify() +lr_network_liana <- lr_network_liana %>% rename(from = source_genesymbol, to = target_genesymbol) %>% select(from, to) %>% mutate(source = "liana") + +# Define receiver cell type +receiver = "CD8 T" +expressed_genes_receiver = get_expressed_genes(receiver, seuratObj, pct = 0.10) + +# Define sender cell types +sender_celltypes <- c("CD4 T","Treg", "Mono", "NK", "B", "DC") + +# Get expressed genes in the sender +list_expressed_genes_sender <- sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.10) # lapply to get the expressed genes of every sender cell type separately here +expressed_genes_sender <- list_expressed_genes_sender %>% unlist() %>% unique() + +# Get gene set of interest through DE analysis +seurat_obj_receiver <- subset(seuratObj, idents = receiver) +condition_oi <- "LCMV"; condition_reference <- "SS" +DE_table_receiver <- FindMarkers(object = seurat_obj_receiver, + ident.1 = condition_oi, ident.2 = condition_reference, + group.by = "aggregate", + min.pct = 0.10) %>% rownames_to_column("gene") +geneset_oi <- DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) + +# Get potential ligands +ligands <- lr_network_liana %>% pull(from) %>% unique() +receptors <- lr_network_liana %>% pull(to) %>% unique() +expressed_ligands <- intersect(ligands,expressed_genes_sender) +expressed_receptors <- intersect(receptors,expressed_genes_receiver) +potential_ligands <- lr_network_liana %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() +potential_ligands + +# Constructing the ligand-target matrix +gr_network_mouse <- readRDS(url("https://zenodo.org/records/7074291/file/gr_network_mouse_21122021.rds")) + +# Define optimum weight as the one calculated for the NicheNet LR network +optim_weight <- optimized_source_weights_df %>% filter(source == "nichenet_verschueren") %>% pull(avg_weight) # 0.2788104 +source_weights <- optimized_source_weights_df %>% + add_row(source = "liana", avg_weight = optim_weight, .before = 1) %>% rename(weight=avg_weight) + +# Aggregate the individual data sources in a weighted manner to obtain a weighted integrated signaling network +weighted_networks_liana <- construct_weighted_networks(lr_network = lr_network_liana, sig_network = sig_network_mouse, gr_network = gr_network_mouse, source_weights_df = source_weights) + +# Downweigh the importance of signaling and gene regulatory hubs - use the optimized parameters of this +weighted_networks_liana <- apply_hub_corrections(weighted_networks = weighted_networks_liana, + lr_sig_hub = hyperparameter_list %>% filter(parameter == "lr_sig_hub") %>% pull(avg_weight), + gr_hub = hyperparameter_list %>% filter(parameter == "gr_hub") %>% pull(avg_weight)) + +# Construct ligand-target matrix using only the potential ligands to save time +ligand_target_matrix_liana <- construct_ligand_target_matrix(weighted_networks = weighted_networks_liana, ligands = as.list(potential_ligands), algorithm = "PPR", + damping_factor = hyperparameter_list %>% filter(parameter == "damping_factor") %>% pull(avg_weight), + ltf_cutoff = hyperparameter_list %>% filter(parameter == "ltf_cutoff") %>% pull(avg_weight)) + +# Filter background genes and the gene set of interest to only the ones in the ligand-target matrix +background_expressed_genes <- expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix_liana)] +geneset_oi <- geneset_oi %>% .[. %in% rownames(ligand_target_matrix_liana)] + +# Perform ligand activity analysis +ligand_activities <- predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix_liana, potential_ligands = potential_ligands) +ligand_activities <- ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) +ligand_activities +``` + +#### Compare results with NicheNet ligand-target model + +Run NicheNet using the wrapper function. + +```{r} +# Use the aggregate function +ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) + +nichenet_output <- nichenet_seuratobj_aggregate( + seurat_obj = seuratObj, + receiver = "CD8 T", + condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", + sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network_mouse %>% distinct(from, to), + weighted_networks = weighted_networks) + +nichenet_output$ligand_activities +``` + +Compare the results between LIANA and NicheNet. Here we see that half of the top 20 ligands between the two are the same. + +```{r} +intersect(ligand_activities[1:20,]$test_ligand, nichenet_output$ligand_activities[1:20,]$test_ligand) +``` + +Below is a comparison of the rankings. Some of the NicheNet top-ranked ligands are not present in the LIANA LR network, such as Ptprc and some H2- ligands. Nonetheless, the LIANA network seems to have made some new links that results in new ligands appearing, such as Lck, Ccl5, and Crlf2. + +```{r line-plot, fig.width=9, fig.height=6} +rankings_df <- bind_rows(ligand_activities %>% select(test_ligand, rank) %>% mutate(db = "LIANA"), + nichenet_output$ligand_activities %>% select(test_ligand, rank) %>% mutate(db = "NicheNet")) +rankings_df <- rankings_df %>% group_by(db) %>% mutate(new_rank = 1:n()) %>% + group_by(db, rank) %>% mutate(ties = n() > 1, ties = factor(ties, levels = c(TRUE, FALSE))) %>% + ungroup() %>% mutate(x_label = case_when(db == "NicheNet" ~ 0.8, db == "LIANA" ~ 2.2), + db = factor(db, levels = c("NicheNet", "LIANA"))) + +# Top 20 +top_n <- 20 +p1 <- ggplot(rankings_df %>% filter(rank <= top_n), + aes(x=db, y=new_rank, label=test_ligand, group = test_ligand, color = ties)) + + geom_point() + + geom_line() + + geom_text(aes(x = x_label), show.legend = FALSE) + + scale_color_manual(values = c("tomato", "black")) + + scale_y_reverse() + theme_classic() + + labs(y = "Ligand rankings", color = "Tied rankings", title = "Top 20 ligands") + + theme(axis.title.x = element_blank(), axis.text.x = element_text(size=10), legend.position = "none") + +# All ligands +p2 <- ggplot(rankings_df, aes(x=db, y=new_rank, label=test_ligand, group = test_ligand, color = ties)) + + geom_point() + + geom_line() + + scale_color_manual(values = c("tomato", "black")) + + scale_y_reverse() + theme_classic() + + labs(y = "Ligand rankings", color = "Tied rankings", title = "All ligands") + + theme(axis.title.x = element_blank(), axis.text.x = element_text(size=10)) + + +cowplot::plot_grid(p1, p2) +``` + +## References + +Dimitrov, D., Türei, D., Garrido-Rodriguez M., Burmedi P.L., Nagai, J.S., Boys, C., Flores, R.O.R., Kim, H., Szalai, B., Costa, I.G., Valdeolivas, A., Dugourd, A. and Saez-Rodriguez, J. Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data. Nat Commun 13, 3224 (2022). https://doi.org/10.1038/s41467-022-30755-0 diff --git a/vignettes/model_construction_with_liana.md b/vignettes/model_construction_with_liana.md new file mode 100644 index 0000000..29eb802 --- /dev/null +++ b/vignettes/model_construction_with_liana.md @@ -0,0 +1,384 @@ +Using LIANA ligand-receptor databases to construct the ligand-target +model +================ +Chananchida Sang-aram +2023-08-23 + + + +Following the [Construction of NicheNet’s ligand-target +model](model_construction.md) vignette, we will now demonstrate how to +use ligand-receptor reactions from LIANA to build the ligand-target +model. LIANA is a framework that combines both resources and +computational tools for ligand-receptor cell-cell communication +inference (Dimitrov et al., 2022). As the NicheNet prior model is built +by integrating ligand-receptor, signaling, and gene regulatory +databases, each part can be replaced with external data sources. We will +show how the first part, the ligand-receptor database, can be replaced +with those from LIANA, and how to run the model afterward. + +**Important**: Since LIANA also offers functions to calculate +ligand-receptor interactions of interest, it is also possible to use +them to select which ligands are of interest to do the ligand activity +analysis. This is explained further in the [LIANA +vignette](https://saezlab.github.io/liana/articles/liana_nichenet.html). + +First, we will install LIANA: + +``` r +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + +if (!requireNamespace("remotes", quietly = TRUE)) + install.packages("remotes") + +remotes::install_github('saezlab/liana') +``` + +Load necessary packages. + +``` r +library(liana) +library(nichenetr) +library(tidyverse) +library(Seurat) +``` + +To check which resources are present in LIANA, we can use the +`show_resources()` function. These are then accessed via +`select_resource()`. + +``` r +show_resources() +## [1] "Default" "Consensus" "Baccin2019" "CellCall" "CellChatDB" "Cellinker" "CellPhoneDB" +## [8] "CellTalkDB" "connectomeDB2020" "EMBRACE" "Guide2Pharma" "HPMR" "ICELLNET" "iTALK" +## [15] "Kirouac2010" "LRdb" "Ramilowski2015" "OmniPath" "MouseConsensus" +``` + +Next, we will calculate how much overlap there is between the ligands +and receptors in the LIANA and NicheNet databases. If the overlap +between LIANA receptors and NicheNet signaling network is too low, the +integration will probably not work very well. Furthermore, The +`decomplexify()` function of LIANA is crucial in our case, as we would +like to separate receptors into their respective subunits. + +``` r +# Load signaling networks of NicheNet +lr_network_human <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) +lr_network_mouse <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + +sig_network_human <- readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) +sig_network_mouse <- readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_mouse_21122021.rds")) + +overlap_df <- lapply(show_resources()[-1], function(resource) { + db <- select_resource(resource)[[1]] %>% decomplexify() + + lr_network <- paste0("lr_network_", ifelse(grepl("Mouse", resource), "mouse", "human")) + sig_network <- paste0("sig_network_", ifelse(grepl("Mouse", resource), "mouse", "human")) + + data.frame(row.names = resource, + n_ligands = length(unique(db$source_genesymbol)), + n_receptors = length(unique(db$target_genesymbol)), + n_ligands_overlap = length(intersect(db$source_genesymbol, get(lr_network)$from)), + n_receptors_overlap_lr = length(intersect(db$target_genesymbol, get(lr_network)$to)), + n_receptors_overlap_sig = length(intersect(db$target_genesymbol, get(sig_network)$from)) + ) %>% mutate(frac_ligands_overlap = n_ligands_overlap / n_ligands, + frac_receptors_overlap_lr = n_receptors_overlap_lr / n_receptors, + frac_receptors_overlap_sig = n_receptors_overlap_sig / n_receptors) + +}) %>% do.call(rbind, .) + +overlap_df +## n_ligands n_receptors n_ligands_overlap n_receptors_overlap_lr n_receptors_overlap_sig frac_ligands_overlap frac_receptors_overlap_lr +## Consensus 1032 934 923 794 927 0.8943798 0.8501071 +## Baccin2019 650 612 550 535 611 0.8461538 0.8741830 +## CellCall 276 195 272 193 195 0.9855072 0.9897436 +## CellChatDB 531 450 515 430 447 0.9698682 0.9555556 +## Cellinker 1216 1010 879 817 1003 0.7228618 0.8089109 +## CellPhoneDB 500 461 479 415 458 0.9580000 0.9002169 +## CellTalkDB 812 780 741 683 776 0.9125616 0.8756410 +## connectomeDB2020 813 681 768 640 678 0.9446494 0.9397944 +## EMBRACE 462 473 435 436 473 0.9415584 0.9217759 +## Guide2Pharma 293 243 289 242 243 0.9863481 0.9958848 +## HPMR 365 492 260 407 492 0.7123288 0.8272358 +## ICELLNET 318 252 313 246 252 0.9842767 0.9761905 +## iTALK 714 699 666 625 697 0.9327731 0.8941345 +## Kirouac2010 143 81 135 80 81 0.9440559 0.9876543 +## LRdb 801 745 726 662 742 0.9063670 0.8885906 +## Ramilowski2015 639 589 607 564 588 0.9499218 0.9575552 +## OmniPath 1219 907 956 773 901 0.7842494 0.8522602 +## MouseConsensus 874 796 763 685 789 0.8729977 0.8605528 +## frac_receptors_overlap_sig +## Consensus 0.9925054 +## Baccin2019 0.9983660 +## CellCall 1.0000000 +## CellChatDB 0.9933333 +## Cellinker 0.9930693 +## CellPhoneDB 0.9934924 +## CellTalkDB 0.9948718 +## connectomeDB2020 0.9955947 +## EMBRACE 1.0000000 +## Guide2Pharma 1.0000000 +## HPMR 1.0000000 +## ICELLNET 1.0000000 +## iTALK 0.9971388 +## Kirouac2010 1.0000000 +## LRdb 0.9959732 +## Ramilowski2015 0.9983022 +## OmniPath 0.9933848 +## MouseConsensus 0.9912060 +``` + +On average, ~90% of the ligands and receptors of LIANA databases are in +the NicheNet LR network (`frac_ligands_overlap`, +`frac_receptors_overlap_lr`), and almost all of the receptors in LIANA +databases are present in the NicheNet signaling network +(`frac_receptors_overlap_sig`). When using the “Consensus” database of +LIANA, there are ~100 ligands that are not present in NicheNet; in +contrast, there are 303 ligands in NicheNet that are not present in the +LIANA consensus database. + +To build the ligand-target model, we can use a very similar code to the +[Construction of NicheNet’s ligand-target model](model_construction.md) +vignette. Users can choose between replacing the NicheNet LR database +entirely with LIANA’s (`replace_nichenet_lr = TRUE`), or just adding the +LIANA database as an additional data source, which may contain a lot of +redundant information. + +``` r +# Load liana consensus LR network, and rename columns to be compatible with nichenet function +# Users can choose here whether or not to replace the NicheNet LR network entirely, or just add the LIANA db to it +replace_nichenet_lr <- TRUE +liana_db <- select_resource("Consensus")[[1]] %>% decomplexify() +liana_db <- liana_db %>% rename(from = source_genesymbol, to = target_genesymbol) %>% select(from, to) %>% mutate(source = "liana") %>% + {if (replace_nichenet_lr) (.) else (bind_rows(lr_network_human, .))} + +# Change source weights dataframe (but in this case all source weights are 1) +source_weights <- source_weights_df %>% + add_row(source = "liana", weight = 1, .before = 1) + +# Load the gene regulatory network +gr_network_human <- readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) + +# Aggregate the individual data sources in a weighted manner to obtain a weighted integrated signaling network +weighted_networks <- construct_weighted_networks(lr_network = liana_db, + sig_network = sig_network_human, + gr_network = gr_network_human, + source_weights_df = source_weights) + +# downweigh the importance of signaling and gene regulatory hubs - use the optimized parameters of this +weighted_networks <- apply_hub_corrections(weighted_networks = weighted_networks, + lr_sig_hub = hyperparameter_list %>% filter(parameter == "lr_sig_hub") %>% pull(avg_weight), + gr_hub = hyperparameter_list %>% filter(parameter == "gr_hub") %>% pull(avg_weight)) + +# in this example we will calculate target gene regulatory potential scores for TNF and the ligand combination TNF+IL6 +ligands <- list("TNF",c("TNF","IL6")) +ligand_target_matrix_liana <- construct_ligand_target_matrix(weighted_networks = weighted_networks, ligands = ligands, algorithm = "PPR", + damping_factor = hyperparameter_list %>% filter(parameter == "damping_factor") %>% pull(avg_weight), + ltf_cutoff = hyperparameter_list %>% filter(parameter == "ltf_cutoff") %>% pull(avg_weight)) +``` + +### Running NicheNet on the LIANA ligand-target model + +In this section compare the results between using the LIANA LR network +and NicheNet one in a typical [NicheNet analysis](seurat_steps.md). As +this is mouse scRNA-seq data, we will build the model using mouse +networks (“MouseConsensus” resource in LIANA). Furthermore, instead of +using the same source weights for all data sources as in the previous +section, we will use the optimized data source weights. Here, we will +use the same data source weight for the LIANA model as the one that was +computed for the NicheNet LR network. This may not be the optimum +weight, but it requires a lot of runtime to optimize this parameter (see +[Parameter optimization vignette](parameter_optimization.md) for more +information). + +``` r +# Typical nichenet analysis +# Load seurat object +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- UpdateSeuratObject(seuratObj) +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") # convert gene names + +# Load LIANA mouse consensus ligand-receptor network +lr_network_liana <- select_resource("MouseConsensus")[[1]] %>% decomplexify() +lr_network_liana <- lr_network_liana %>% rename(from = source_genesymbol, to = target_genesymbol) %>% select(from, to) %>% mutate(source = "liana") + +# Define receiver cell type +receiver = "CD8 T" +expressed_genes_receiver = get_expressed_genes(receiver, seuratObj, pct = 0.10) + +# Define sender cell types +sender_celltypes <- c("CD4 T","Treg", "Mono", "NK", "B", "DC") + +# Get expressed genes in the sender +list_expressed_genes_sender <- sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.10) # lapply to get the expressed genes of every sender cell type separately here +expressed_genes_sender <- list_expressed_genes_sender %>% unlist() %>% unique() + +# Get gene set of interest through DE analysis +seurat_obj_receiver <- subset(seuratObj, idents = receiver) +condition_oi <- "LCMV"; condition_reference <- "SS" +DE_table_receiver <- FindMarkers(object = seurat_obj_receiver, + ident.1 = condition_oi, ident.2 = condition_reference, + group.by = "aggregate", + min.pct = 0.10) %>% rownames_to_column("gene") +geneset_oi <- DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) + +# Get potential ligands +ligands <- lr_network_liana %>% pull(from) %>% unique() +receptors <- lr_network_liana %>% pull(to) %>% unique() +expressed_ligands <- intersect(ligands,expressed_genes_sender) +expressed_receptors <- intersect(receptors,expressed_genes_receiver) +potential_ligands <- lr_network_liana %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() +potential_ligands +## [1] "Adam10" "Adam23" "Icosl" "Thy1" "H2-K1" "App" "Itgb2" "Icam1" "Lck" "Fcer2a" "Cd48" "Icam2" "H2-M3" "Vcam1" "Cd22" "Cd86" +## [17] "B2m" "Sirpa" "Adam17" "Selplg" "Btla" "F11r" "Fn1" "Ebi3" "Hp" "Tgfb1" "Lgals1" "Vcan" "Mif" "Il15" "Copa" "Ybx1" +## [33] "Fam3c" "Ccl2" "Crlf2" "Ccl22" "Cxcl10" "Cxcl16" "Cd72" "Txlna" "Psen1" "F13a1" "C1qb" "Gnai2" "Tgm2" "Gnas" "Ccl5" "Mia" +## [49] "H2-T23" "Lyz1" "Lyz2" + +# Constructing the ligand-target matrix +gr_network_mouse <- readRDS(url("https://zenodo.org/records/7074291/file/gr_network_mouse_21122021.rds")) + +# Define optimum weight as the one calculated for the NicheNet LR network +optim_weight <- optimized_source_weights_df %>% filter(source == "nichenet_verschueren") %>% pull(avg_weight) # 0.2788104 +source_weights <- optimized_source_weights_df %>% + add_row(source = "liana", avg_weight = optim_weight, .before = 1) %>% rename(weight=avg_weight) + +# Aggregate the individual data sources in a weighted manner to obtain a weighted integrated signaling network +weighted_networks_liana <- construct_weighted_networks(lr_network = lr_network_liana, sig_network = sig_network_mouse, gr_network = gr_network_mouse, source_weights_df = source_weights) + +# Downweigh the importance of signaling and gene regulatory hubs - use the optimized parameters of this +weighted_networks_liana <- apply_hub_corrections(weighted_networks = weighted_networks_liana, + lr_sig_hub = hyperparameter_list %>% filter(parameter == "lr_sig_hub") %>% pull(avg_weight), + gr_hub = hyperparameter_list %>% filter(parameter == "gr_hub") %>% pull(avg_weight)) + +# Construct ligand-target matrix using only the potential ligands to save time +ligand_target_matrix_liana <- construct_ligand_target_matrix(weighted_networks = weighted_networks_liana, ligands = as.list(potential_ligands), algorithm = "PPR", + damping_factor = hyperparameter_list %>% filter(parameter == "damping_factor") %>% pull(avg_weight), + ltf_cutoff = hyperparameter_list %>% filter(parameter == "ltf_cutoff") %>% pull(avg_weight)) + +# Filter background genes and the gene set of interest to only the ones in the ligand-target matrix +background_expressed_genes <- expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix_liana)] +geneset_oi <- geneset_oi %>% .[. %in% rownames(ligand_target_matrix_liana)] + +# Perform ligand activity analysis +ligand_activities <- predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix_liana, potential_ligands = potential_ligands) +ligand_activities <- ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) +ligand_activities +## # A tibble: 51 × 6 +## test_ligand auroc aupr aupr_corrected pearson rank +## +## 1 Ebi3 0.663 0.389 0.244 0.300 1 +## 2 H2-M3 0.608 0.292 0.147 0.179 2 +## 3 H2-T23 0.611 0.278 0.133 0.153 3 +## 4 Lck 0.621 0.268 0.122 0.103 4 +## 5 H2-K1 0.605 0.268 0.122 0.141 5 +## 6 Sirpa 0.613 0.263 0.117 0.143 6 +## 7 Cd48 0.612 0.257 0.112 0.0860 7 +## 8 Tgfb1 0.597 0.254 0.108 0.202 8 +## 9 Ccl22 0.605 0.250 0.104 0.125 9 +## 10 Ccl5 0.606 0.248 0.102 0.0343 10 +## # ℹ 41 more rows +``` + +#### Compare results with NicheNet ligand-target model + +Run NicheNet using the wrapper function. + +``` r +# Use the aggregate function +ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) + +nichenet_output <- nichenet_seuratobj_aggregate( + seurat_obj = seuratObj, + receiver = "CD8 T", + condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", + sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network_mouse %>% distinct(from, to), + weighted_networks = weighted_networks) +## [1] "Read in and process NicheNet's networks" +## [1] "Define expressed ligands and receptors in receiver and sender cells" +## [1] "Perform DE analysis in receiver cell" +## [1] "Perform NicheNet ligand activity analysis" +## [1] "Infer active target genes of the prioritized ligands" +## [1] "Infer receptors of the prioritized ligands" +## [1] "Perform DE analysis in sender cells" + +nichenet_output$ligand_activities +## # A tibble: 73 × 6 +## test_ligand auroc aupr aupr_corrected pearson rank +## +## 1 Ebi3 0.663 0.390 0.244 0.301 1 +## 2 Ptprc 0.642 0.310 0.165 0.167 2 +## 3 H2-M3 0.608 0.292 0.146 0.179 3 +## 4 H2-M2 0.611 0.279 0.133 0.153 5 +## 5 H2-T10 0.611 0.279 0.133 0.153 5 +## 6 H2-T22 0.611 0.279 0.133 0.153 5 +## 7 H2-T23 0.611 0.278 0.132 0.153 7 +## 8 H2-K1 0.605 0.268 0.122 0.142 8 +## 9 H2-Q4 0.605 0.268 0.122 0.141 10 +## 10 H2-Q6 0.605 0.268 0.122 0.141 10 +## # ℹ 63 more rows +``` + +Compare the results between LIANA and NicheNet. Here we see that half of +the top 20 ligands between the two are the same. + +``` r +intersect(ligand_activities[1:20,]$test_ligand, nichenet_output$ligand_activities[1:20,]$test_ligand) +## [1] "Ebi3" "H2-M3" "H2-T23" "H2-K1" "Sirpa" "Cd48" "Tgfb1" "Ccl22" "App" "Selplg" "Cxcl10" "Btla" +``` + +Below is a comparison of the rankings. Some of the NicheNet top-ranked +ligands are not present in the LIANA LR network, such as Ptprc and some +H2- ligands. Nonetheless, the LIANA network seems to have made some new +links that results in new ligands appearing, such as Lck, Ccl5, and +Crlf2. + +``` r +rankings_df <- bind_rows(ligand_activities %>% select(test_ligand, rank) %>% mutate(db = "LIANA"), + nichenet_output$ligand_activities %>% select(test_ligand, rank) %>% mutate(db = "NicheNet")) +rankings_df <- rankings_df %>% group_by(db) %>% mutate(new_rank = 1:n()) %>% + group_by(db, rank) %>% mutate(ties = n() > 1, ties = factor(ties, levels = c(TRUE, FALSE))) %>% + ungroup() %>% mutate(x_label = case_when(db == "NicheNet" ~ 0.8, db == "LIANA" ~ 2.2), + db = factor(db, levels = c("NicheNet", "LIANA"))) + +# Top 20 +top_n <- 20 +p1 <- ggplot(rankings_df %>% filter(rank <= top_n), + aes(x=db, y=new_rank, label=test_ligand, group = test_ligand, color = ties)) + + geom_point() + + geom_line() + + geom_text(aes(x = x_label), show.legend = FALSE) + + scale_color_manual(values = c("tomato", "black")) + + scale_y_reverse() + theme_classic() + + labs(y = "Ligand rankings", color = "Tied rankings", title = "Top 20 ligands") + + theme(axis.title.x = element_blank(), axis.text.x = element_text(size=10), legend.position = "none") + +# All ligands +p2 <- ggplot(rankings_df, aes(x=db, y=new_rank, label=test_ligand, group = test_ligand, color = ties)) + + geom_point() + + geom_line() + + scale_color_manual(values = c("tomato", "black")) + + scale_y_reverse() + theme_classic() + + labs(y = "Ligand rankings", color = "Tied rankings", title = "All ligands") + + theme(axis.title.x = element_blank(), axis.text.x = element_text(size=10)) + + +cowplot::plot_grid(p1, p2) +``` + +![](model_construction_with_liana_files/figure-gfm/line-plot-1.png) + +## References + +Dimitrov, D., Türei, D., Garrido-Rodriguez M., Burmedi P.L., Nagai, +J.S., Boys, C., Flores, R.O.R., Kim, H., Szalai, B., Costa, I.G., +Valdeolivas, A., Dugourd, A. and Saez-Rodriguez, J. Comparison of +methods and resources for cell-cell communication inference from +single-cell RNA-Seq data. Nat Commun 13, 3224 (2022). + diff --git a/vignettes/model_construction_with_liana_files/figure-gfm/line-plot-1.png b/vignettes/model_construction_with_liana_files/figure-gfm/line-plot-1.png new file mode 100644 index 0000000..eb34452 Binary files /dev/null and b/vignettes/model_construction_with_liana_files/figure-gfm/line-plot-1.png differ diff --git a/vignettes/model_evaluation.Rmd b/vignettes/model_evaluation.Rmd index 77e2c66..7224c70 100644 --- a/vignettes/model_evaluation.Rmd +++ b/vignettes/model_evaluation.Rmd @@ -67,7 +67,7 @@ performances = settings %>% lapply(evaluate_target_prediction, ligand_target_mat Step 3: visualize the results: show here different classification evaluation metrics -```{r, fig.width=8, fig.height=8} +```{r target-prediction-v2-results, fig.width=8, fig.height=8} # Visualize some classification evaluation metrics showing the target gene prediction performance performances = performances %>% select(-aupr, -auc_iregulon,-pearson_log_pval,-spearman_log_pval ,-sensitivity_roc, -specificity_roc) %>% gather(key = scorename, value = scorevalue, auroc:spearman) @@ -88,7 +88,7 @@ performances %>% We will now compare performances between NicheNet v1 and v2 on both ligand treatment datasets. Note that although the performance of v2 is much better here, the CytoSig experiments were also included during model construction of v2. To get the results in the MultiNicheNet paper, you will have to follow the `model_construction.Rmd` vignette and filter out the CytoSig data sources during model construction. -```{r fig.width=8, fig.height=6} +```{r target-prediction-comparison, fig.width=8, fig.height=6} performances_df <- lapply(c("nichenet_gs", "cytosig_gs"), function(gs) { @@ -102,7 +102,6 @@ performances_df <- lapply(c("nichenet_gs", "cytosig_gs"), function(gs) { lapply(c("v1", "v2"), function(ver){ # Get the ligand_target_matrix according to the version, Evaluate transcriptional response prediction on every dataset - # performances = settings %>% lapply(evaluate_target_prediction, get(paste0("ligand_target_matrix_", ver))) %>% bind_rows() # Select some classification evaluation metrics showing the target gene prediction performance @@ -141,7 +140,7 @@ Now we will show how to assess the accuracy of the model in predicting whether c A graphical summary of this procedure is visualized here below: -![](ligand_activity_prediction_workflow_new.png) +![](images/ligand_activity_prediction_workflow_new.png) Step 1: convert expression datasets to the required format to perform ligand activity prediction @@ -175,7 +174,7 @@ evaluation_ligand_prediction = ligand_importances$setting %>% unique() %>% lappl Step 4: visualize the results: show here different classification evaluation metrics -```{r,fig.width=8, fig.height=8} +```{r ligand-activity-v2-results, fig.width=8, fig.height=8} # Visualize some classification evaluation metrics showing the ligand activity prediction performance evaluation_ligand_prediction = evaluation_ligand_prediction %>% select(-aupr, -sensitivity_roc, -specificity_roc, -pearson, -spearman, -mean_rank_GST_log_pval) %>% gather(key = scorename, value = scorevalue, auroc:aupr_corrected) scorelabels = c(auroc="AUROC", aupr_corrected="AUPR (corrected)") @@ -233,7 +232,7 @@ performances_df <- lapply(c("nichenet_gs", "cytosig_gs"), function(gs) { ``` -```{r fig.width=8, fig.height=6} +```{r ligand-activity-comparison, fig.width=8, fig.height=6} scorelabels = str_wrap(c("AUROC", "AUPR", "AUPR (corrected)", "Sensitivity ROC", "Specificity ROC", "Mean-rank gene-set enrichment", "AUC-iRegulon", "AUC-iRegulon (corrected)", "Pearson log p-val", "Spearman log p-val", "Pearson correlation", "Spearman's rank correlation"), width=15) %>% setNames(performances_df$importance_measure %>% unique) diff --git a/vignettes/model_evaluation.md b/vignettes/model_evaluation.md index 69e945e..fbd4070 100644 --- a/vignettes/model_evaluation.md +++ b/vignettes/model_evaluation.md @@ -15,7 +15,15 @@ that observed gene expression changes can be directly attributed to the addition of the ligand(s). Hence, differentially expressed genes can be considered as a gold standard of target genes of a particular ligand. -You can use the procedure shown here to evaluate your own model and compare its performance to NicheNet. In NicheNet v2, we added more ligand treatment validation datasets from CytoSig. We will also demonstrate the better performance of NicheNet v2's ligand-target matrix. [Ligand treatment validation datasets, NicheNet's v1 ligand-target model](https://doi.org/10.5281/zenodo.3260758), and [NicheNet’s v2 ligand-target model](https://doi.org/10.5281/zenodo.7074290) can be downloaded from Zenodo. +You can use the procedure shown here to evaluate your own model and +compare its performance to NicheNet. In NicheNet v2, we added more +ligand treatment validation datasets from CytoSig. We will also +demonstrate the better performance of NicheNet v2’s ligand-target +matrix. [Ligand treatment validation datasets, NicheNet’s v1 +ligand-target model](https://doi.org/10.5281/zenodo.3260758), and +[NicheNet’s v2 ligand-target +model](https://doi.org/10.5281/zenodo.7074290) can be downloaded from +Zenodo. ### Load nichenetr, the model we want to evaluate, and the datasets on which we want to evaluate it. @@ -99,14 +107,14 @@ performances %>% theme_bw() ``` -![](model_evaluation_files/figure-gfm/unnamed-chunk-4-1.png) +![](model_evaluation_files/figure-gfm/target-prediction-v2-results-1.png) We will now compare performances between NicheNet v1 and v2 on both ligand treatment datasets. Note that although the performance of v2 is much better here, the CytoSig experiments were also included during -model construction of v2. To get the results in the MultiNicheNet paper, you will have -to follow the `model_construction.Rmd` vignette and filter out the -CytoSig data sources during model construction. +model construction of v2. To get the results in the MultiNicheNet paper, +you will have to follow the `model_construction.Rmd` vignette and filter +out the CytoSig data sources during model construction. ``` r @@ -122,7 +130,6 @@ performances_df <- lapply(c("nichenet_gs", "cytosig_gs"), function(gs) { lapply(c("v1", "v2"), function(ver){ # Get the ligand_target_matrix according to the version, Evaluate transcriptional response prediction on every dataset - # performances = settings %>% lapply(evaluate_target_prediction, get(paste0("ligand_target_matrix_", ver))) %>% bind_rows() # Select some classification evaluation metrics showing the target gene prediction performance @@ -153,7 +160,7 @@ ggplot(performances_df, aes(y=scorevalue, x=version)) + legend.position = "none") ``` -![](model_evaluation_files/figure-gfm/unnamed-chunk-5-1.png) +![](model_evaluation_files/figure-gfm/target-prediction-comparison-1.png) ### Example: ligand activity prediction evaluation @@ -171,7 +178,7 @@ ligand activity scores as should be for a good ligand-target model. A graphical summary of this procedure is visualized here below: -![](ligand_activity_prediction_workflow_new.png) +![](images/ligand_activity_prediction_workflow_new.png) Step 1: convert expression datasets to the required format to perform ligand activity prediction @@ -229,7 +236,7 @@ evaluation_ligand_prediction %>% theme(axis.text.x = element_text(angle = 90)) ``` -![](model_evaluation_files/figure-gfm/unnamed-chunk-9-1.png) +![](model_evaluation_files/figure-gfm/ligand-activity-v2-results-1.png) We will again compare performances between NicheNet v1 and v2 on both ligand treatment datasets. @@ -299,4 +306,4 @@ ggplot(performances_df %>% filter(importance_measure %in% legend.position = "bottom") ``` -![](model_evaluation_files/figure-gfm/unnamed-chunk-11-1.png) +![](model_evaluation_files/figure-gfm/ligand-activity-comparison-1.png) diff --git a/vignettes/model_evaluation_files/figure-gfm/ligand-activity-comparison-1.png b/vignettes/model_evaluation_files/figure-gfm/ligand-activity-comparison-1.png new file mode 100644 index 0000000..2338c28 Binary files /dev/null and b/vignettes/model_evaluation_files/figure-gfm/ligand-activity-comparison-1.png differ diff --git a/vignettes/model_evaluation_files/figure-gfm/ligand-activity-v2-results-1.png b/vignettes/model_evaluation_files/figure-gfm/ligand-activity-v2-results-1.png new file mode 100644 index 0000000..e7f3bdb Binary files /dev/null and b/vignettes/model_evaluation_files/figure-gfm/ligand-activity-v2-results-1.png differ diff --git a/vignettes/model_evaluation_files/figure-gfm/target-prediction-comparison-1.png b/vignettes/model_evaluation_files/figure-gfm/target-prediction-comparison-1.png new file mode 100644 index 0000000..31c3854 Binary files /dev/null and b/vignettes/model_evaluation_files/figure-gfm/target-prediction-comparison-1.png differ diff --git a/vignettes/model_evaluation_files/figure-gfm/target-prediction-v2-results-1.png b/vignettes/model_evaluation_files/figure-gfm/target-prediction-v2-results-1.png new file mode 100644 index 0000000..8aa6bdb Binary files /dev/null and b/vignettes/model_evaluation_files/figure-gfm/target-prediction-v2-results-1.png differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-11-1.png deleted file mode 100644 index d59054b..0000000 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-11-1.png and /dev/null differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-4-1.png deleted file mode 100644 index f6cbb63..0000000 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-4-1.png and /dev/null differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-5-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-5-1.png deleted file mode 100644 index 72e5cd6..0000000 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-5-1.png and /dev/null differ diff --git a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-9-1.png deleted file mode 100644 index 0febf8b..0000000 Binary files a/vignettes/model_evaluation_files/figure-gfm/unnamed-chunk-9-1.png and /dev/null differ diff --git a/vignettes/parameter_optimization.Rmd b/vignettes/parameter_optimization.Rmd index ca89c9b..e8d935c 100644 --- a/vignettes/parameter_optimization.Rmd +++ b/vignettes/parameter_optimization.Rmd @@ -1,16 +1,17 @@ --- -title: "Parameter optimization via mlrMBO" -author: "Robin Browaeys" -date: "2018-02-20" +title: "Parameter optimization via NSGA-II" +author: "Robin Browaeys & Chananchida Sang-aram" +date: "2023-01-03" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Parameter optimization via mlrMBO} + %\VignetteIndexEntry{Parameter optimization via NSGA2R} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} @@ -19,111 +20,188 @@ knitr::opts_chunk$set( # comment = "#>", warning = FALSE, message = FALSE, - eval = FALSE + eval = TRUE ) + ``` -This vignette shows how we optimized both hyperparameters and data source weights via model-based optimization (see manuscript for more information). Because the optimization requires intensive parallel computation, we performed optimization in parallel on a gridengine cluster via the qsub package (https://cran.r-project.org/web/packages/qsub/qsub.pdf). This script is merely illustrative and should be adapted by the user to work on its own system. +This vignette shows how we optimized both hyperparameters and data source weights via NSGA-II, a non-dominated sorting genetic algorithm. Because the optimization requires intensive parallel computation, we performed optimization in parallel on a high performance computing (HPC) cluster. The old code for parameter optimization via mlrMBO can be accessed by browsing the repository history during NicheNet v1. + +This script is merely illustrative and should be adapted by the user to work on its own system. -The [ligand treatment validation datasets](https://doi.org/10.5281/zenodo.3260758), and [NicheNet’s v2 ligand-target model](https://doi.org/10.5281/zenodo.7074290) can be downloaded from Zenodo. +All network and validation datasets can be downloaded from [Zenodo](https://zenodo.org/records/8016880), under "NicheNet_V2.zip". First, we will load in the required packages and networks we will use to construct the models which we will evaluate during the optimization procedure. + ```{r} library(nichenetr) library(tidyverse) -library(qsub) -library(mlrMBO) +library(nsga2R) -# in the NicheNet framework, ligand-target links are predicted based on collected biological knowledge on ligand-receptor, signaling and gene regulatory interactions -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) -sig_network = readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) -gr_network = readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) -``` +# Path is relative to 'optimization.Rproj' in the zip file (evaluation/optimization/) +lr_network <- readRDS("../../networks/data/ligand_receptor/lr_network_human_21122021.rds") +sig_network <- readRDS("../../networks/data/signaling/signaling_network_human_21122021.rds") +gr_network <- readRDS("../../networks/data/gene_regulatory/gr_network_human_21122021_optimization.rds") -We will load in the ligand treatment validation datasets and try to optimize the parameters to maximize both target gene and ligand activity prediction. In this vignette, we do the optimization on all datasets. Alternatively, you can select a specific subset of datasets and evaluate the final performance on the left-out datasets. +# Get names of all data sources +source_names <- c(lr_network$source, sig_network$source, gr_network$source) %>% unique() +source_names_zero_possible <- c(sig_network$source, gr_network$source) %>% unique() + +# nsga2r parameters +n_param <- length(source_names) + 4 +n_obj <- 4 +lower_bounds <- c(rep(0,times = length(source_names)), 0, 0, 0.9, 0.01) +upper_bounds <- c(rep(1,times = length(source_names)), 1, 1, 0.999, 0.99) -```{r} -# The ligand treatment expression datasets used for validation can be downloaded from Zenodo: -expression_settings_validation = readRDS(url("https://zenodo.org/record/3260758/files/expression_settings.rds")) ``` -Define the optimization wrapper function and config information for the qsub package -```{r} -mlrmbo_optimization_wrapper = function(...){ - library(nichenetr) - library(mlrMBO) - library(parallelMap) - library(dplyr) - output = mlrmbo_optimization(...) - return(output) -} +To perform optimization, we will use the ligand treatment validation datasets and try to optimize the parameters to maximize both target gene and ligand activity prediction. We used 5-fold cross validation, i.e., 4/5th of dataset was used for training the parameters, and 1/5th of the dataset was used to evaluate the final performance. + +```{r, eval=FALSE} +# Choose 1 fold +cv_names <- c("1234", "2345", "1345", "1245", "1235") +fold <- cv_names[1] -qsub_config = create_qsub_config( - remote = "myuser@mycluster.address.org:1234", - local_tmp_path = "/tmp/r2gridengine", - remote_tmp_path = "/scratch/personal/myuser/r2gridengine", - num_cores = 24, - memory = "10G", - wait = FALSE, - max_wall_time = "500:00:00" +# Load in validation datasets corresponding to the fold +settings_CV <- readRDS(paste0("settings_training_f", fold)) +settings <- settings_CV$settings + +# Remove databases from GRN if it is in the validation dataset +gr_network <- readRDS("../../networks/data/gene_regulatory/gr_network_human_21122021_optimization.rds") +forbidden_gr <- bind_rows( + gr_network %>% filter(database == "NicheNet_LT" & from %in% settings_CV$forbidden_ligands_nichenet), + gr_network %>% filter(database == "CytoSig" & from %in% settings_CV$forbidden_ligands_cytosig) ) +gr_network <- gr_network %>% setdiff(forbidden_gr) +forbidden_gr %>% filter(database %in% c("NicheNet_LT", "CytoSig")) %>% pull(source) %>% table() +gr_network %>% filter(database %in% c("NicheNet_LT", "CytoSig")) %>% pull(source) %>% table() + +# Perform optimization +set.seed(1) +results <- run_nsga2R_cluster(model_evaluation_optimization_nsga2r, + # nsga2R params + varNo=n_param, + objDim=n_obj, + lowerBounds= lower_bounds, + upperBounds=upper_bounds, + popSize = 360, + tourSize = 2, + generations = 15, + ncores = 8, + # model_evaluation_optimization_nsga2r params + source_names = source_names, + algorithm = "PPR", + correct_topology = FALSE, + lr_network = lr_network, + sig_network = lr_network, + gr_network = gr_network, + settings = settings, + secondary_targets = FALSE, + remove_direct_links = "no", + damping_factor = NULL) + +# Save results object +readr::write_rds(results, paste0("results/cv_", fold, "_ltfcutoff_nsga2r_final.rds")) + +# Save parameters +optimized_parameters <- get_optimized_parameters_nsga2r(results, source_names) +saveRDS(optimized_parameters, paste0("results/cv_", fold, "_ltfcutoff_nsga2r_optimized_parameters_final.rds")) + ``` -Perform optimization: +In the real implementation, we submitted the code chunks above as an R script to the HPC in 5 separate jobs (one per cross-validation fold). Our job submission script looks as follows: +```{bash, eval=FALSE} +#!/bin/bash -l +#PBS -l nodes=1:ppn=48 +#PBS -l mem=64gb +#PBS -l walltime=72:00:00 -```{r} -additional_arguments_topology_correction = list(source_names = source_weights_df$source %>% unique(), - algorithm = "PPR", - correct_topology = FALSE, - lr_network = lr_network, - sig_network = sig_network, - gr_network = gr_network, - settings = lapply(expression_settings_validation,convert_expression_settings_evaluation), - secondary_targets = FALSE, - remove_direct_links = "no", - cutoff_method = "quantile") -nr_datasources = additional_arguments_topology_correction$source_names %>% length() - -obj_fun_multi_topology_correction = makeMultiObjectiveFunction(name = "nichenet_optimization", - description = "data source weight and hyperparameter optimization: expensive black-box function", - fn = model_evaluation_optimization, - par.set = makeParamSet( - makeNumericVectorParam("source_weights", len = nr_datasources, lower = 0, upper = 1), - makeNumericVectorParam("lr_sig_hub", len = 1, lower = 0, upper = 1), - makeNumericVectorParam("gr_hub", len = 1, lower = 0, upper = 1), - makeNumericVectorParam("ltf_cutoff", len = 1, lower = 0.9, upper = 0.999), - makeNumericVectorParam("damping_factor", len = 1, lower = 0.01, upper = 0.99)), - has.simple.signature = FALSE, - n.objectives = 4, - noisy = FALSE, - minimize = c(FALSE,FALSE,FALSE,FALSE)) -set.seed(1) +module load R/4.2.0-foss-2021b +module load R-bundle-Bioconductor/3.15-foss-2021b-R-4.2.0 + +cd $PBS_O_WORKDIR +Rscript scripts/nsga2r_optimization.R +``` -# Run with: 50 iterations, 24 desings evaluated in parallel and 240 start designs - -job_mlrmbo = qsub_lapply(X = 1, - FUN = mlrmbo_optimization_wrapper, - object_envir = environment(mlrmbo_optimization_wrapper), - qsub_config = qsub_config, - qsub_environment = NULL, - qsub_packages = NULL, - obj_fun_multi_topology_correction, - 50, 24, 240, - additional_arguments_topology_correction) +The submission command is as follows: +```{bash, eval=FALSE} +for i in {1..5} +do +qsub qsub_script.pbs -v "CV_NO=${i}" -N nsga2r_cv${i} +done ``` -Once the job is finised (which can take a few days - for shorter running time: reduce the number of iterations), run: +This submits the job with the environment variable `CV_NO` that can be used by the script. This requires us to define `fold` differently. -```{r} -res_job_mlrmbo = qsub_retrieve(job_mlrmbo) +```{r, eval=FALSE} +task_id <- as.numeric(Sys.getenv("CV_NO")) #from 1-5 +fold <- cv_names[task_id] ``` -Get now the most optimal parameter setting as a result of this analysis -```{r} -optimized_parameters = res_job_mlrmbo %>% process_mlrmbo_nichenet_optimization(source_names = source_weights_df$source %>% unique()) +Once the job is finished (which can take a few days - for shorter running time: reduce the number of iterations), we can analyze the output of each fold and combine the results together. By default, `get_optimized_parameters_nsga2r` will retrieve the value of the parameters at the final iteration. However, in genetic algorithms it may be possible that equally good or better solution could have existed in earlier iterations or populations. Thus, we also allow users to set `search_all_iterations = TRUE`. The `top_n` value will then select the top number of populations that have the best value (based on the geometric mean of the objective functions). + +```{r weights-between-populations, fig.height=8, fig.width=10} +cv_names <- c("1234", "2345", "1345", "1245", "1235") +for (fold in cv_names){ + res_job_nsga2r_multi <- readRDS(paste0("results/cv_", fold, "_ltfcutoff_nsga2r_final.rds")) + + # Visualize source weights + print(fold) + print(visualize_parameter_values(res_job_nsga2r_multi, source_names)) + + for (n in c(25, 5, 1)){ + + optimized_df <- get_optimized_parameters_nsga2r(result_nsga2r = res_job_nsga2r_multi, + source_names = source_names, + search_all_iterations = TRUE, + top_n = n, + summarise_weights = TRUE) + + # saveRDS(optimized_df$source_weight_df, paste0("results/fold", fold, "_sourceweights_top", n, ifelse(n == 1, "", "_summarized"), ".rds")) + # saveRDS(optimized_df$hyperparams_df, paste0("results/fold", fold, "_hyperparameters_top", n, ifelse(n == 1, "", "_summarized"), ".rds")) + } +} ``` +We can also visualize differences across folds. + +```{r weights-between-folds, fig.height=8, fig.width=10} +list_optimization_results <- lapply(cv_names, function(fold){ + readRDS(paste0("results/cv_", fold, "_ltfcutoff_nsga2r_final.rds")) +}) -When you would be interested to generate a context-specific model, it could be possible that you would like to optimize the parameters specifically on your dataset of interest and not on the general ligand treatment datasets (be aware for overfitting, though!). Because for your own data, you don't know the true active ligands, you could only optimize target gene prediction performance and not ligand activity performance. In order to this, you would need to change the expression settings in the optimization functions such that they include your data, and use the function `model_evaluation_optimization_application` instead of `model_evaluation_optimization` (define this as the function parameter in `makeMultiObjectiveFunction` shown here above). +print(visualize_parameter_values_across_folds(list_optimization_results, source_names, 5)) +# The top 25 seems to have more overlap +print(visualize_parameter_values_across_folds(list_optimization_results, source_names, 25)) + +``` + +Finally, we can compute the average value of top 25 populations across all folds. + +```{r} +# Get average across all folds +for (top_n in c(5, 25)){ + all_optimized_weights <- lapply(1:length(list_optimization_results), function(i){ + get_optimized_parameters_nsga2r(list_optimization_results[[i]], + source_names, + search_all_iterations = TRUE, + top_n = top_n, + summarise_weights = FALSE) %>% + lapply(mutate, fold = paste0("Fold", i)) + }) + + all_optimized_source_weights_summarised <- purrr::map(all_optimized_weights, "source_weight_df") %>% bind_rows() %>% + group_by(source) %>% summarise(avg_weight = mean(weight), median_weight = median(weight)) + + all_optimized_hyperparams_df_summarised <- purrr::map(all_optimized_weights, "hyperparams_df") %>% bind_rows() %>% + group_by(parameter) %>% summarise(avg_weight = mean(weight), median_weight = median(weight)) + + # saveRDS(all_optimized_source_weights_summarised, paste0("results/all_sourceweights_top", top_n, "_summarized_final.rds")) + # saveRDS(all_optimized_hyperparams_df_summarised, paste0("results/all_hyperparameters_top", top_n, "_summarized_final.rds")) + + # readRDS(paste0("results/all_sourceweights_top25_summarized_final.rds")) %>% write_tsv("results/source_weights_df_final.txt") + # readRDS(paste0("results/all_hyperparameters_top25_summarized_final.rds")) %>% write_tsv("results/hyperparameters_df_final.txt") +} +``` diff --git a/vignettes/parameter_optimization.md b/vignettes/parameter_optimization.md index b7aa113..12476ed 100644 --- a/vignettes/parameter_optimization.md +++ b/vignettes/parameter_optimization.md @@ -1,22 +1,25 @@ -Parameter optimization via mlrMBO +Parameter optimization via NSGA-II ================ -Robin Browaeys -2018-02-20 +Robin Browaeys & Chananchida Sang-aram +2023-01-03 This vignette shows how we optimized both hyperparameters and data -source weights via model-based optimization (see manuscript for more -information). Because the optimization requires intensive parallel -computation, we performed optimization in parallel on a gridengine -cluster via the qsub package -(). This script -is merely illustrative and should be adapted by the user to work on its -own system. +source weights via NSGA-II, a non-dominated sorting genetic algorithm. +Because the optimization requires intensive parallel computation, we +performed optimization in parallel on a high performance computing (HPC) +cluster. The old code for parameter optimization via mlrMBO can be +accessed by browsing the repository history during NicheNet v1. -The [ligand treatment validation datasets](https://doi.org/10.5281/zenodo.3260758), and [NicheNet’s v2 ligand-target model](https://doi.org/10.5281/zenodo.7074290) can be downloaded from Zenodo. +This script is merely illustrative and should be adapted by the user to +work on its own system. + +All network and validation datasets can be downloaded from +[Zenodo](https://zenodo.org/records/8016880), under “NicheNet_V2.zip”. First, we will load in the required packages and networks we will use to construct the models which we will evaluate during the optimization @@ -25,114 +28,259 @@ procedure. ``` r library(nichenetr) library(tidyverse) -library(qsub) -library(mlrMBO) +library(nsga2R) -# in the NicheNet framework, ligand-target links are predicted based on collected biological knowledge on ligand-receptor, signaling and gene regulatory interactions -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) -sig_network = readRDS(url("https://zenodo.org/record/7074291/files/signaling_network_human_21122021.rds")) -gr_network = readRDS(url("https://zenodo.org/record/7074291/files/gr_network_human_21122021.rds")) -``` +# Path is relative to 'optimization.Rproj' in the zip file (evaluation/optimization/) +lr_network <- readRDS("../../networks/data/ligand_receptor/lr_network_human_21122021.rds") +sig_network <- readRDS("../../networks/data/signaling/signaling_network_human_21122021.rds") +gr_network <- readRDS("../../networks/data/gene_regulatory/gr_network_human_21122021_optimization.rds") -We will load in the ligand treatment validation datasets and try to -optimize the parameters to maximize both target gene and ligand activity -prediction. In this vignette, we do the optimization on all datasets. -Alternatively, you can select a specific subset of datasets and evaluate -the final performance on the left-out datasets. +# Get names of all data sources +source_names <- c(lr_network$source, sig_network$source, gr_network$source) %>% unique() +source_names_zero_possible <- c(sig_network$source, gr_network$source) %>% unique() -``` r -# The ligand treatment expression datasets used for validation can be downloaded from Zenodo: -expression_settings_validation = readRDS(url("https://zenodo.org/record/3260758/files/expression_settings.rds")) +# nsga2r parameters +n_param <- length(source_names) + 4 +n_obj <- 4 +lower_bounds <- c(rep(0,times = length(source_names)), 0, 0, 0.9, 0.01) +upper_bounds <- c(rep(1,times = length(source_names)), 1, 1, 0.999, 0.99) ``` -Define the optimization wrapper function and config information for the -qsub package +To perform optimization, we will use the ligand treatment validation +datasets and try to optimize the parameters to maximize both target gene +and ligand activity prediction. We used 5-fold cross validation, i.e., +4/5th of dataset was used for training the parameters, and 1/5th of the +dataset was used to evaluate the final performance. ``` r -mlrmbo_optimization_wrapper = function(...){ - library(nichenetr) - library(mlrMBO) - library(parallelMap) - library(dplyr) - output = mlrmbo_optimization(...) - return(output) -} +# Choose 1 fold +cv_names <- c("1234", "2345", "1345", "1245", "1235") +fold <- cv_names[1] -qsub_config = create_qsub_config( - remote = "myuser@mycluster.address.org:1234", - local_tmp_path = "/tmp/r2gridengine", - remote_tmp_path = "/scratch/personal/myuser/r2gridengine", - num_cores = 24, - memory = "10G", - wait = FALSE, - max_wall_time = "500:00:00" +# Load in validation datasets corresponding to the fold +settings_CV <- readRDS(paste0("settings_training_f", fold)) +settings <- settings_CV$settings + +# Remove databases from GRN if it is in the validation dataset +gr_network <- readRDS("../../networks/data/gene_regulatory/gr_network_human_21122021_optimization.rds") +forbidden_gr <- bind_rows( + gr_network %>% filter(database == "NicheNet_LT" & from %in% settings_CV$forbidden_ligands_nichenet), + gr_network %>% filter(database == "CytoSig" & from %in% settings_CV$forbidden_ligands_cytosig) ) +gr_network <- gr_network %>% setdiff(forbidden_gr) +forbidden_gr %>% filter(database %in% c("NicheNet_LT", "CytoSig")) %>% pull(source) %>% table() +gr_network %>% filter(database %in% c("NicheNet_LT", "CytoSig")) %>% pull(source) %>% table() + +# Perform optimization +set.seed(1) +results <- run_nsga2R_cluster(model_evaluation_optimization_nsga2r, + # nsga2R params + varNo=n_param, + objDim=n_obj, + lowerBounds= lower_bounds, + upperBounds=upper_bounds, + popSize = 360, + tourSize = 2, + generations = 15, + ncores = 8, + # model_evaluation_optimization_nsga2r params + source_names = source_names, + algorithm = "PPR", + correct_topology = FALSE, + lr_network = lr_network, + sig_network = lr_network, + gr_network = gr_network, + settings = settings, + secondary_targets = FALSE, + remove_direct_links = "no", + damping_factor = NULL) + +# Save results object +readr::write_rds(results, paste0("results/cv_", fold, "_ltfcutoff_nsga2r_final.rds")) + +# Save parameters +optimized_parameters <- get_optimized_parameters_nsga2r(results, source_names) +saveRDS(optimized_parameters, paste0("results/cv_", fold, "_ltfcutoff_nsga2r_optimized_parameters_final.rds")) +``` + +In the real implementation, we submitted the code chunks above as an R +script to the HPC in 5 separate jobs (one per cross-validation fold). +Our job submission script looks as follows: + +``` bash +#!/bin/bash -l +#PBS -l nodes=1:ppn=48 +#PBS -l mem=64gb +#PBS -l walltime=72:00:00 + +module load R/4.2.0-foss-2021b +module load R-bundle-Bioconductor/3.15-foss-2021b-R-4.2.0 + +cd $PBS_O_WORKDIR +Rscript scripts/nsga2r_optimization.R +``` + +The submission command is as follows: + +``` bash +for i in {1..5} +do +qsub qsub_script.pbs -v "CV_NO=${i}" -N nsga2r_cv${i} +done ``` -Perform optimization: +This submits the job with the environment variable `CV_NO` that can be +used by the script. This requires us to define `fold` differently. ``` r -additional_arguments_topology_correction = list(source_names = source_weights_df$source %>% unique(), - algorithm = "PPR", - correct_topology = FALSE, - lr_network = lr_network, - sig_network = sig_network, - gr_network = gr_network, - settings = lapply(expression_settings_validation,convert_expression_settings_evaluation), - secondary_targets = FALSE, - remove_direct_links = "no", - cutoff_method = "quantile") -nr_datasources = additional_arguments_topology_correction$source_names %>% length() - -obj_fun_multi_topology_correction = makeMultiObjectiveFunction(name = "nichenet_optimization", - description = "data source weight and hyperparameter optimization: expensive black-box function", - fn = model_evaluation_optimization, - par.set = makeParamSet( - makeNumericVectorParam("source_weights", len = nr_datasources, lower = 0, upper = 1), - makeNumericVectorParam("lr_sig_hub", len = 1, lower = 0, upper = 1), - makeNumericVectorParam("gr_hub", len = 1, lower = 0, upper = 1), - makeNumericVectorParam("ltf_cutoff", len = 1, lower = 0.9, upper = 0.999), - makeNumericVectorParam("damping_factor", len = 1, lower = 0.01, upper = 0.99)), - has.simple.signature = FALSE, - n.objectives = 4, - noisy = FALSE, - minimize = c(FALSE,FALSE,FALSE,FALSE)) -set.seed(1) +task_id <- as.numeric(Sys.getenv("CV_NO")) #from 1-5 +fold <- cv_names[task_id] +``` + +Once the job is finished (which can take a few days - for shorter +running time: reduce the number of iterations), we can analyze the +output of each fold and combine the results together. By default, +`get_optimized_parameters_nsga2r` will retrieve the value of the +parameters at the final iteration. However, in genetic algorithms it may +be possible that equally good or better solution could have existed in +earlier iterations or populations. Thus, we also allow users to set +`search_all_iterations = TRUE`. The `top_n` value will then select the +top number of populations that have the best value (based on the +geometric mean of the objective functions). -# Run with: 50 iterations, 24 desings evaluated in parallel and 240 start designs - -job_mlrmbo = qsub_lapply(X = 1, - FUN = mlrmbo_optimization_wrapper, - object_envir = environment(mlrmbo_optimization_wrapper), - qsub_config = qsub_config, - qsub_environment = NULL, - qsub_packages = NULL, - obj_fun_multi_topology_correction, - 50, 24, 240, - additional_arguments_topology_correction) +``` r +cv_names <- c("1234", "2345", "1345", "1245", "1235") +for (fold in cv_names){ + res_job_nsga2r_multi <- readRDS(paste0("results/cv_", fold, "_ltfcutoff_nsga2r_final.rds")) + + # Visualize source weights + print(fold) + print(visualize_parameter_values(res_job_nsga2r_multi, source_names)) + + for (n in c(25, 5, 1)){ + + optimized_df <- get_optimized_parameters_nsga2r(result_nsga2r = res_job_nsga2r_multi, + source_names = source_names, + search_all_iterations = TRUE, + top_n = n, + summarise_weights = TRUE) + + # saveRDS(optimized_df$source_weight_df, paste0("results/fold", fold, "_sourceweights_top", n, ifelse(n == 1, "", "_summarized"), ".rds")) + # saveRDS(optimized_df$hyperparams_df, paste0("results/fold", fold, "_hyperparameters_top", n, ifelse(n == 1, "", "_summarized"), ".rds")) + } +} +## [1] "1234" +## $source_weights_boxplot ``` -Once the job is finised (which can take a few days - for shorter running -time: reduce the number of iterations), run: +![](parameter_optimization_files/figure-gfm/weights-between-populations-1.png) + + ## + ## $hyperparameters_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-populations-2.png) + + ## + ## [1] "2345" + ## $source_weights_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-populations-3.png) + + ## + ## $hyperparameters_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-populations-4.png) + + ## + ## [1] "1345" + ## $source_weights_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-populations-5.png) + + ## + ## $hyperparameters_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-populations-6.png) + + ## + ## [1] "1245" + ## $source_weights_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-populations-7.png) + + ## + ## $hyperparameters_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-populations-8.png) + + ## + ## [1] "1235" + ## $source_weights_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-populations-9.png) + + ## + ## $hyperparameters_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-populations-10.png) + +We can also visualize differences across folds. ``` r -res_job_mlrmbo = qsub_retrieve(job_mlrmbo) +list_optimization_results <- lapply(cv_names, function(fold){ + readRDS(paste0("results/cv_", fold, "_ltfcutoff_nsga2r_final.rds")) +}) + +print(visualize_parameter_values_across_folds(list_optimization_results, source_names, 5)) +## $source_weights_boxplot ``` -Get now the most optimal parameter setting as a result of this analysis +![](parameter_optimization_files/figure-gfm/weights-between-folds-1.png) + + ## + ## $hyperparameters_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-folds-2.png) ``` r -optimized_parameters = res_job_mlrmbo %>% process_mlrmbo_nichenet_optimization(source_names = source_weights_df$source %>% unique()) + +# The top 25 seems to have more overlap +print(visualize_parameter_values_across_folds(list_optimization_results, source_names, 25)) +## $source_weights_boxplot ``` -When you would be interested to generate a context-specific model, it -could be possible that you would like to optimize the parameters -specifically on your dataset of interest and not on the general ligand -treatment datasets (be aware for overfitting, though!). Because for your -own data, you don’t know the true active ligands, you could only -optimize target gene prediction performance and not ligand activity -performance. In order to this, you would need to change the expression -settings in the optimization functions such that they include your data, -and use the function `model_evaluation_optimization_application` instead -of `model_evaluation_optimization` (define this as the function -parameter in `makeMultiObjectiveFunction` shown here above). +![](parameter_optimization_files/figure-gfm/weights-between-folds-3.png) + + ## + ## $hyperparameters_boxplot + +![](parameter_optimization_files/figure-gfm/weights-between-folds-4.png) + +Finally, we can compute the average value of top 25 populations across +all folds. + +``` r +# Get average across all folds +for (top_n in c(5, 25)){ + all_optimized_weights <- lapply(1:length(list_optimization_results), function(i){ + get_optimized_parameters_nsga2r(list_optimization_results[[i]], + source_names, + search_all_iterations = TRUE, + top_n = top_n, + summarise_weights = FALSE) %>% + lapply(mutate, fold = paste0("Fold", i)) + }) + + all_optimized_source_weights_summarised <- purrr::map(all_optimized_weights, "source_weight_df") %>% bind_rows() %>% + group_by(source) %>% summarise(avg_weight = mean(weight), median_weight = median(weight)) + + all_optimized_hyperparams_df_summarised <- purrr::map(all_optimized_weights, "hyperparams_df") %>% bind_rows() %>% + group_by(parameter) %>% summarise(avg_weight = mean(weight), median_weight = median(weight)) + + # saveRDS(all_optimized_source_weights_summarised, paste0("results/all_sourceweights_top", top_n, "_summarized_final.rds")) + # saveRDS(all_optimized_hyperparams_df_summarised, paste0("results/all_hyperparameters_top", top_n, "_summarized_final.rds")) + + # readRDS(paste0("results/all_sourceweights_top25_summarized_final.rds")) %>% write_tsv("results/source_weights_df_final.txt") + # readRDS(paste0("results/all_hyperparameters_top25_summarized_final.rds")) %>% write_tsv("results/hyperparameters_df_final.txt") +} +``` diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-1.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-1.png new file mode 100644 index 0000000..3c803c3 Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-1.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-2.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-2.png new file mode 100644 index 0000000..aafdd4c Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-2.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-3.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-3.png new file mode 100644 index 0000000..07c2fca Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-3.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-4.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-4.png new file mode 100644 index 0000000..a08e31d Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-folds-4.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-1.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-1.png new file mode 100644 index 0000000..4d58df3 Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-1.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-10.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-10.png new file mode 100644 index 0000000..e7ec1e1 Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-10.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-2.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-2.png new file mode 100644 index 0000000..f506d56 Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-2.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-3.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-3.png new file mode 100644 index 0000000..fe67f49 Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-3.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-4.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-4.png new file mode 100644 index 0000000..421e020 Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-4.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-5.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-5.png new file mode 100644 index 0000000..ce4fb6b Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-5.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-6.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-6.png new file mode 100644 index 0000000..dbf1507 Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-6.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-7.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-7.png new file mode 100644 index 0000000..d53f761 Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-7.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-8.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-8.png new file mode 100644 index 0000000..d424ae4 Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-8.png differ diff --git a/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-9.png b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-9.png new file mode 100644 index 0000000..c81b2ae Binary files /dev/null and b/vignettes/parameter_optimization_files/figure-gfm/weights-between-populations-9.png differ diff --git a/vignettes/seurat_steps.Rmd b/vignettes/seurat_steps.Rmd index 9db6fac..dea3737 100644 --- a/vignettes/seurat_steps.Rmd +++ b/vignettes/seurat_steps.Rmd @@ -1,6 +1,6 @@ --- title: "Perform NicheNet analysis starting from a Seurat object: step-by-step analysis" -author: "Robin Browaeys" +author: "Robin Browaeys & Chananchida Sang-aram" date: "2023-10-02" output: rmarkdown::html_vignette vignette: > @@ -23,26 +23,23 @@ knitr::opts_chunk$set( ) ``` -In this vignette, you can learn how to perform a basic NicheNet analysis on a Seurat v3/v4 object. Such a NicheNet analysis can help you to generate hypotheses about an intercellular communication process of interest for which you have single-cell gene expression data as a Seurat object. Specifically, NicheNet can predict 1) which ligands from one or more cell population(s) ("sender/niche") are most likely to affect target gene expression in an interacting cell population ("receiver/target") and 2) which specific target genes are affected by which of these predicted ligands. +In this vignette, you can learn how to perform a basic NicheNet analysis on a Seurat (v3-v5) object containing single-cell expression data. The steps of this vignette can also be adapted for other single-cell or bulk frameworks. -Because NicheNet studies how ligands affect gene expression in putatively neighboring/interacting cells, you need to have data about this effect in gene expression you want to study. So, there need to be 'some kind of' differential expression in a receiver cell population, caused by ligands from one of more interacting sender cell populations. +**Assuming you have captured the changes in gene expression resulting from your cell-cell communication (CCC) process of interest,** a NicheNet analysis can help you to generate hypotheses about the CCC process. Specifically, NicheNet can predict 1) which ligands from the microenvironment or cell population(s) ("sender/niche") are most likely to affect target gene expression in an interacting cell population ("receiver/target") and 2) which specific target genes are affected by which of these predicted ligands. -In this vignette, we demonstrate the use of NicheNet on a Seurat Object. The steps of the analysis we show here are also discussed in detail in the main, basis, NicheNet vignette [NicheNet's ligand activity analysis on a gene set of interest: predict active ligands and their target genes](ligand_activity_geneset.md):`vignette("ligand_activity_geneset", package="nichenetr")`. Make sure you understand the different steps in a NicheNet analysis that are described in that vignette before proceeding with this vignette and performing a real NicheNet analysis on your data. -This vignette describes the different steps behind the wrapper functions that are shown in [Perform NicheNet analysis starting from a Seurat object](seurat_wrapper.md):`vignette("seurat_wrapper", package="nichenetr")`. Following this vignette has the advantage that it allows users to adapt specific steps of the pipeline to make them more appropriate for their data. +To perform a NicheNet analysis, three features are extracted from the input data: the potential ligands, the gene set of interest, and the background gene set. This vignette will extract each feature as described in this flowchart: -As example expression data of interacting cells, we will use mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [@medaglia_spatial_2017]. We will NicheNet to explore immune cell crosstalk in response to this LCMV infection. +![](images/figure2.svg){width=70%} -In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be applied to look at how several immune cell populations in the lymph node (i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. +As example expression data of interacting cells, we will use mouse NICHE-seq data to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [@medaglia_spatial_2017]. We will focus on CD8 T cells as the receiver population, and as this dataset contains two conditions (before and after LCMV infection), the differentially expressed genes between these two conditions in CD8 T cells will be used as our gene set of interest. We will then prioritize which ligands from the microenvironment (sender-agnostic approach) and from specific immune cell populations like monocytes, dendritic cells, NK cells, B cells, and CD4 T cells (sender-focused approach) can regulate and induce these observed gene expression changes. -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. +Please make sure you understand the different steps described in this vignette before performing a real NicheNet analysis on your data. There are also wrapper functions that perform the same steps as in this vignette in [Perform NicheNet analysis starting from a Seurat object](seurat_wrapper.md). However, in that case users will not be able to adapt specific steps of the pipeline to make them more appropriate for their data. -# Prepare NicheNet analysis - -## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks. +The [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. -The NicheNet ligand-receptor network and weighted networks are necessary to define and show possible ligand-receptor interactions between two cell populations. The ligand-target matrix denotes the prior potential that particular ligands might regulate the expression of particular target genes. This matrix is necessary to prioritize possible ligand-receptor interactions based on observed gene expression effects (i.e. NicheNet's ligand activity analysis) and infer affected target genes of these prioritized ligands. +# Prepare NicheNet analysis -### Load Packages: +### Load packages ```{r} library(nichenetr) # Please update to v2.0.4 @@ -51,289 +48,438 @@ library(SeuratObject) library(tidyverse) ``` -If you would use and load other packages, we recommend to load these 3 packages after the others. +### Read in the expression data of interacting cells -### Read in the expression data of interacting cells: - -The dataset used here is publicly available single-cell data from immune cells in the T cell area of the inguinal lymph node. The data was processed and aggregated by applying the Seurat alignment pipeline. The Seurat object contains this aggregated data. Note that this should be a Seurat v3/v4 object and that gene should be named by their official mouse/human gene symbol. +We processed and aggregated the original dataset by using the Seurat alignment pipeline. As we created this object using Seurat v3, it has to be updated with `UpdateSeuratObject`. Note that genes should be named by their official mouse/human gene symbol. ```{r} -seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) seuratObj@meta.data %>% head() -# For newer Seurat versions, you may need to run the following +# For older Seurat objects, you may need to run this seuratObj <- UpdateSeuratObject(seuratObj) ``` -Visualize which cell populations are present: CD4 T cells (including regulatory T cells), CD8 T cells, B cells, NK cells, dendritic cells (DCs) and inflammatory monocytes +Additionally, if your expression data has the older gene symbols, you may want to use our alias conversion function to avoid the loss of gene names. + ```{r} -seuratObj@meta.data$celltype %>% table() # note that the number of cells of some cell types is very low and should preferably be higher for a real application +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") +``` + +Visualize which cell populations are present: CD4 T cells (including regulatory T cells), CD8 T cells, B cells, NK cells, dendritic cells (DCs) and inflammatory monocytes. + +```{r umap-1} +# Note that the number of cells of some cell types is very low and should preferably be higher for a real application +seuratObj@meta.data$celltype %>% table() + DimPlot(seuratObj, reduction = "tsne") ``` -Visualize the data to see to which condition cells belong. The metadata dataframe column that denotes the condition (steady-state or after LCMV infection) is here called 'aggregate'. +Visualize the data to see to which condition cells belong. The metadata column that denotes the condition (steady-state or after LCMV infection) is here called 'aggregate'. -```{r} +```{r umap-2} seuratObj@meta.data$aggregate %>% table() DimPlot(seuratObj, reduction = "tsne", group.by = "aggregate") ``` -### Read in NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks: +### Read in NicheNet's networks -```{r} +The ligand-target prior model, ligand-receptor network, and weighted integrated networks are needed for this vignette. The ligand-target prior model is a matrix describing the potential that a ligand may regulate a target gene, and it is used to run the ligand activity analysis. The ligand-receptor network contains information on potential ligand-receptor bindings, and it is used to identify potential ligands. Finally, the weighted ligand-receptor network contains weights representing the potential that a ligand will bind to a receptor, and it is used for visualization. -organism = "mouse" +```{r} +organism <- "mouse" if(organism == "human"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) - weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) } else if(organism == "mouse"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) - weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) } -lr_network = lr_network %>% distinct(from, to) +lr_network <- lr_network %>% distinct(from, to) head(lr_network) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network head(weighted_networks$gr) # interactions and their weights in the gene regulatory network ``` -If your expression data has the older gene symbols, you may want to use our alias conversion function to avoid the loss of gene names. - -```{r} -seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") -``` # Perform the NicheNet analysis -In this case study, we want to apply NicheNet to predict which ligands expressed by all immune cells in the T cell area of the lymph node are most likely to have induced the differential expression in CD8 T cells after LCMV infection. +In contrary to NicheNet v1, we now recommend users to run both the "sender-agnostic" approach and "sender-focused" approach. These approaches only affect the list of potential ligands that are considered for prioritization. As described in the flowchart above, we do not define any sender populations in the 'sender agnostic' approach but consider all ligands for which its cognate receptor is expressed in the receiver population. The sender-focused approach will then filter the list of ligands to ones where the ligands are expressed in the sender cell population(s). -As described in the main vignette, the pipeline of a basic NicheNet analysis consist of the following steps: +## 1. Define a set of potential ligands for both the sender-agnostic and sender-focused approach -## 1. Define a “sender/niche” cell population and a “receiver/target” cell population present in your expression data and determine which genes are expressed in both populations +We first define a "receiver/target" cell population and determine which genes are expressed. Here, we will consider a gene to be expressed if it is expressed in at least 5% of cells (by default this is set to 10%). The receiver cell population can only consist of one cell type, so in case of multiple receiver populations, you will have to rerun the vignette separately for each one. We will only look at CD8 T cells in this vignette. -In this case study, the receiver cell population is the 'CD8 T' cell population, whereas the sender cell populations are 'CD4 T', 'Treg', 'Mono', 'NK', 'B' and 'DC'. -We will consider a gene to be expressed when it is expressed in at least 10% of cells in one cluster. ```{r} -## receiver receiver = "CD8 T" -expressed_genes_receiver = get_expressed_genes(receiver, seuratObj, pct = 0.10) +expressed_genes_receiver <- get_expressed_genes(receiver, seuratObj, pct = 0.05) +``` + +Get a list of all receptors available in the ligand-receptor network, and define expressed receptors as genes that are in the ligand-receptor network and expressed in the receiver. Then, define the potential ligands as all ligands whose cognate receptors are expressed. -background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] +```{r} +all_receptors <- unique(lr_network$to) +expressed_receptors <- intersect(all_receptors, expressed_genes_receiver) + +potential_ligands <- lr_network %>% filter(to %in% expressed_receptors) %>% pull(from) %>% unique() ``` +For the sender-focused approach, define sender cell types (CD4 T, Treg, Mono, NK, B, and DC) and expressed genes in all sender populations. (Although we pool all ligands from all sender cell types together in this step, later on during the interpretation of the output, we will check which sender cell type expresses which ligand.) Then, filter potential ligands to those that are expressed in sender cells. Note that autocrine signaling can also be considered if we also include CD8 T cells as a sender. + ```{r} -## sender -sender_celltypes = c("CD4 T","Treg", "Mono", "NK", "B", "DC") +sender_celltypes <- c("CD4 T", "Treg", "Mono", "NK", "B", "DC") + +# Use lapply to get the expressed genes of every sender cell type separately here +list_expressed_genes_sender <- sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.05) +expressed_genes_sender <- list_expressed_genes_sender %>% unlist() %>% unique() + +potential_ligands_focused <- intersect(potential_ligands, expressed_genes_sender) -list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.10) # lapply to get the expressed genes of every sender cell type separately here -expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() +# Also check +length(expressed_genes_sender) +length(potential_ligands) +length(potential_ligands_focused) ``` -## 2. Define a gene set of interest: these are the genes in the “receiver/target” cell population that are potentially affected by ligands expressed by interacting cells (e.g. genes differentially expressed upon cell-cell interaction) -Here, the gene set of interest are the genes differentially expressed in CD8 T cells after LCMV infection. The condition of interest is thus 'LCMV', whereas the reference/steady-state condition is 'SS'. The notion of conditions can be extracted from the metadata column 'aggregate'. The method to calculate the differential expression is here the standard Seurat Wilcoxon test, but this can be changed if necessary. +## 2. Define the gene set of interest + +The gene set of interest are genes within the receiver cell type that are likely to be influenced by ligands from the CCC event. In typical case-control studies like this one, we use the differentially expressed (DE) genes between the two conditions in the receiver cell type, assuming that the observed DE pattern is a result of the CCC event (i.e., LCMV infection). The condition of interest is thus 'LCMV', whereas the reference/steady-state condition is 'SS'. The condition can be extracted from the metadata column 'aggregate'. The method to calculate the differential expression is here the standard Seurat Wilcoxon test, but this can be changed if necessary. ```{r} -seurat_obj_receiver= subset(seuratObj, idents = receiver) -seurat_obj_receiver = SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[["aggregate", drop=TRUE]]) +condition_oi <- "LCMV" +condition_reference <- "SS" + +seurat_obj_receiver <- subset(seuratObj, idents = receiver) -condition_oi = "LCMV" -condition_reference = "SS" - -DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = 0.10) %>% rownames_to_column("gene") +DE_table_receiver <- FindMarkers(object = seurat_obj_receiver, + ident.1 = condition_oi, ident.2 = condition_reference, + group.by = "aggregate", + min.pct = 0.05) %>% rownames_to_column("gene") -geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) -geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] +geneset_oi <- DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) +geneset_oi <- geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] ``` -## 3. Define a set of potential ligands: these are ligands that are expressed by the “sender/niche” cell population and bind a (putative) receptor expressed by the “receiver/target” population +## 3. Define the background genes -Because we combined the expressed genes of each sender cell type, in this example, we will perform one NicheNet analysis by pooling all ligands from all cell types together. Later on during the interpretation of the output, we will check which sender cell type expresses which ligand. +All expressed genes in the receiver cell population (that are also in the ligand-target matrix) is defined as the 'background set' for our ligand prioritization procedure in the next step. It's also important to check that the number of background genes is a 'reasonable' number, generally between 5000-10000, and sufficiently larger than our gene set of interest. ```{r} -ligands = lr_network %>% pull(from) %>% unique() -receptors = lr_network %>% pull(to) %>% unique() +background_expressed_genes <- expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] -expressed_ligands = intersect(ligands,expressed_genes_sender) -expressed_receptors = intersect(receptors,expressed_genes_receiver) - -potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() +length(background_expressed_genes) +length(geneset_oi) ``` -## 4) Perform NicheNet ligand activity analysis: rank the potential ligands based on the presence of their target genes in the gene set of interest (compared to the background set of genes) + +## 4. Perform NicheNet ligand activity analysis + +This is the main step of NicheNet where the potential ligands are ranked based on the presence of their target genes in the gene set of interest (compared to the background set of genes). In this case, we prioritize ligands that induce the antiviral response in CD8 T cells. + +Ligands are ranked based on the area under the precision-recall curve (AUPR) between a ligand's target predictions and the observed transcriptional response. Although other metrics like the AUROC and pearson correlation coefficient are also computed, we demonstrated in our validation study that the AUPR was the most informative measure to define ligand activity (this was the Pearson correlation for v1). The vignette on how we performed the validation can be found at [Evaluation of NicheNet's ligand-target predictions](model_evaluation.md). + +We will first show the results of the sender-agnostic approach. ```{r} -ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +ligand_activities <- predict_ligand_activities(geneset = geneset_oi, + background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix, + potential_ligands = potential_ligands) -ligand_activities = ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) +ligand_activities <- ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) ligand_activities ``` -The different ligand activity measures (auroc, aupr, pearson correlation coefficient) are a measure for how well a ligand can predict the observed differentially expressed genes compared to the background of expressed genes. In our validation study, we showed that the area under the precision-recall curve (AUPR) between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity (this was the Pearson correlation for v1). Therefore, NicheNet ranks the ligands based on their AUPR. This allows us to prioritize ligands inducing the antiviral response in CD8 T cells. +The performance metrics indicate that the 30 top-ranked ligands can predict the viral response reasonably, implying that the ranking of the ligands might be accurate. However, it is possible that for some gene sets, the target gene prediction performance of the top-ranked ligands would not be much better than random prediction. In that case, prioritization of ligands will be less trustworthy. + +We will use the top 30 ligands to predict active target genes and construct an active ligand-receptor network. However, the choice of looking only at the 30 top-ranked ligands for further biological interpretation is based on biological intuition and is quite arbitrary. Therefore, users can decide to continue the analysis with a different number of ligands. We recommend to check the selected cutoff by looking at the distribution of the ligand activity values. Here, we show the ligand activity histogram (the score for the 30th ligand is indicated via the dashed line). -The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is here 30. +```{r histogram} +p_hist_lig_activity <- ggplot(ligand_activities, aes(x=aupr_corrected)) + + geom_histogram(color="black", fill="darkorange") + + geom_vline(aes(xintercept=min(ligand_activities %>% top_n(30, aupr_corrected) %>% pull(aupr_corrected))), + color="red", linetype="dashed", size=1) + + labs(x="ligand activity (PCC)", y = "# ligands") + + theme_classic() + +p_hist_lig_activity +``` ```{r} -best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() +best_upstream_ligands <- ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) ``` -These ligands are expressed by one or more of the input sender cells. To see which cell population expresses which of these top-ranked ligands, you can run the following: +We can also visualize the ligand activity measure (AUPR) of these top-ranked ligands: -```{r, fig.width=12} -DotPlot(seuratObj, features = best_upstream_ligands %>% rev(), cols = "RdYlBu") + RotatedAxis() -``` +```{r agnostic-ligand-activity-heatmap, fig.width=3,fig.height=6} +vis_ligand_aupr <- ligand_activities %>% filter(test_ligand %in% best_upstream_ligands) %>% + column_to_rownames("test_ligand") %>% select(aupr_corrected) %>% arrange(aupr_corrected) %>% as.matrix(ncol = 1) -As you can see, most op the top-ranked ligands seem to be mainly expressed by dendritic cells and monocytes. +(make_heatmap_ggplot(vis_ligand_aupr, + "Prioritized ligands", "Ligand activity", + legend_title = "AUPR", color = "darkorange") + + theme(axis.text.x.top = element_blank())) +``` -## 5) Infer receptors and top-predicted target genes of ligands that are top-ranked in the ligand activity analysis +## 5. Infer target genes and receptors of top-ranked ligands ### Active target gene inference +Active target genes are defined as genes in the gene set of interest that have the highest regulatory potential for each top-ranked ligand. These top targets of each ligand are based on the prior model. The function get_weighted_ligand_target_links will return genes that are in the gene set of interest and are the top `n` targets of a ligand (default: `n = 200`, but there are too many target genes here so we only considered the top 100). + ```{r} -active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 200) %>% bind_rows() %>% drop_na() +active_ligand_target_links_df <- best_upstream_ligands %>% + lapply(get_weighted_ligand_target_links, + geneset = geneset_oi, + ligand_target_matrix = ligand_target_matrix, + n = 100) %>% + bind_rows() %>% drop_na() + +nrow(active_ligand_target_links_df) +head(active_ligand_target_links_df) +``` -active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.33) +For visualization purposes, the ligand-target prior model was adapted by setting a regulatory potential score to 0 if their score was below a predefined cutoff (default: 0.25, or the 25th percentile) across all scores between the top-ranked ligands and their top `n` targets. We recommend users to test several cutoff values for the best visualization, as lowering or increasing the cutoff will result in a denser or sparser heatmap, respectively. -order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() %>% make.names() -order_targets = active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) %>% make.names() -rownames(active_ligand_target_links) = rownames(active_ligand_target_links) %>% make.names() # make.names() for heatmap visualization of genes like H2-T23 -colnames(active_ligand_target_links) = colnames(active_ligand_target_links) %>% make.names() # make.names() for heatmap visualization of genes like H2-T23 +```{r ligand-target-heatmap, fig.width=10} +active_ligand_target_links <- prepare_ligand_target_visualization( + ligand_target_df = active_ligand_target_links_df, + ligand_target_matrix = ligand_target_matrix, + cutoff = 0.33) -vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t() -``` +nrow(active_ligand_target_links) +head(active_ligand_target_links) + +order_ligands <- intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() +order_targets <- active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) + +vis_ligand_target <- t(active_ligand_target_links[order_targets,order_ligands]) -```{r, fig.width=10} -p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) + scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.0045,0.0090)) -p_ligand_target_network +make_heatmap_ggplot(vis_ligand_target, "Prioritized ligands", "Predicted target genes", + color = "purple", legend_title = "Regulatory potential") + + scale_fill_gradient2(low = "whitesmoke", high = "purple") ``` -Note that not all ligands from the top 30 are present in this ligand-target heatmap. The left-out ligands are ligands that don't have target genes with high enough regulatory potential scores. Therefore, they did not survive the used cutoffs. To include them, you can be less stringent in the used cutoffs. +The rows of the heatmap are ordered based on the rankings of the ligands, and the columns are ordered alphabetically. We see a lot of interferons in the top ligands, which biologically make sense as we are looking at response to a viral infection. + +Note that not all ligands from the top 30 are present in the heatmap. The left-out ligands are ligands that don't have target genes with high enough regulatory potential scores. Therefore, they did not survive the used cutoffs. To include them, you can be less stringent in the used cutoffs or increase the number of target genes considered. Additionally, if you would consider more than the top 200 targets based on prior information, you will infer more, but less confident, ligand-target links; by considering less than 200 targets, you will be more stringent. ### Receptors of top-ranked ligands +Similar to above, we identify which receptors have the highest interaction potential with the top-ranked ligands. + ```{r} -lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) -best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() +ligand_receptor_links_df <- get_weighted_ligand_receptor_links( + best_upstream_ligands, expressed_receptors, + lr_network, weighted_networks$lr_sig) +``` -lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) +Then, we create a heatmap for ligand-receptor interactions. Here, both the ligands and receptors are ordered by hierarchical clustering You can choose to order only ligands or receptors hierarachically (with `order_hclust = ligands` or `receptors`, respectively) or not at all (`none`), in which case the ligands are ordered based on their rankings, and the receptors are ordered alphabetically.. -lr_network_top_df = lr_network_top_df_large %>% spread("from","weight",fill = 0) -lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) +```{r ligand-receptor-heatmap, fig.width=8} +vis_ligand_receptor_network <- prepare_ligand_receptor_visualization( + ligand_receptor_links_df, + best_upstream_ligands, + order_hclust = "both") -dist_receptors = dist(lr_network_top_matrix, method = "binary") -hclust_receptors = hclust(dist_receptors, method = "ward.D2") -order_receptors = hclust_receptors$labels[hclust_receptors$order] - -dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") -hclust_ligands = hclust(dist_ligands, method = "ward.D2") -order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] +(make_heatmap_ggplot(t(vis_ligand_receptor_network), + y_name = "Ligands", x_name = "Receptors", + color = "mediumvioletred", legend_title = "Prior interaction potential")) +``` -order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix)) -order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix)) +## 6. Sender-focused approach -vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] -rownames(vis_ligand_receptor_network) = order_receptors %>% make.names() -colnames(vis_ligand_receptor_network) = order_ligands_receptor %>% make.names() -``` +To perform the sender-focused approach, simply subset the ligand activities to only contain expressed ligands from all populations (calculated in Step 1). We can then perform target gene and receptor inference as above. ```{r} -p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") -p_ligand_receptor_network +ligand_activities_all <- ligand_activities +best_upstream_ligands_all <- best_upstream_ligands + +ligand_activities <- ligand_activities %>% filter(test_ligand %in% potential_ligands_focused) +best_upstream_ligands <- ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% + pull(test_ligand) %>% unique() + ``` -## 6) Add log fold change information of ligands from sender cells +```{r focused-ligand-activity-heatmap, fig.width=3,fig.height=6} +ligand_aupr_matrix <- ligand_activities %>% filter(test_ligand %in% best_upstream_ligands) %>% + column_to_rownames("test_ligand") %>% select(aupr_corrected) %>% arrange(aupr_corrected) +vis_ligand_aupr <- as.matrix(ligand_aupr_matrix, ncol = 1) -In some cases, it might be possible to also check upregulation of ligands in sender cells. This can add a useful extra layer of information next to the ligand activities defined by NicheNet, because you can assume that some of the ligands inducing DE in receiver cells, will be DE themselves in the sender cells. +p_ligand_aupr <- make_heatmap_ggplot(vis_ligand_aupr, + "Prioritized ligands", "Ligand activity", + legend_title = "AUPR", color = "darkorange") + + theme(axis.text.x.top = element_blank()) -Here this is possible: we will define the log fold change between LCMV and steady-state in all sender cell types and visualize this as extra information. +p_ligand_aupr -```{r, fig.width=10} -# DE analysis for each sender cell type -# this uses a new nichenetr function - reinstall nichenetr if necessary! -DE_table_all = Idents(seuratObj) %>% levels() %>% intersect(sender_celltypes) %>% lapply(get_lfc_celltype, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = condition_oi, condition_reference = condition_reference, expression_pct = 0.10, celltype_col = NULL) %>% reduce(full_join) # use this if cell type labels are the identities of your Seurat object -- if not: indicate the celltype_col properly -DE_table_all[is.na(DE_table_all)] = 0 +``` -# Combine ligand activities with DE information -ligand_activities_de = ligand_activities %>% select(test_ligand, pearson) %>% rename(ligand = test_ligand) %>% left_join(DE_table_all %>% rename(ligand = gene)) -ligand_activities_de[is.na(ligand_activities_de)] = 0 +```{r focused-ligand-target-heatmap, fig.width=10} +# Target gene plot +active_ligand_target_links_df <- best_upstream_ligands %>% + lapply(get_weighted_ligand_target_links, + geneset = geneset_oi, + ligand_target_matrix = ligand_target_matrix, + n = 100) %>% + bind_rows() %>% drop_na() -# make LFC heatmap -lfc_matrix = ligand_activities_de %>% select(-ligand, -pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities_de$ligand) -rownames(lfc_matrix) = rownames(lfc_matrix) %>% make.names() +active_ligand_target_links <- prepare_ligand_target_visualization( + ligand_target_df = active_ligand_target_links_df, + ligand_target_matrix = ligand_target_matrix, + cutoff = 0.33) -order_ligands = order_ligands[order_ligands %in% rownames(lfc_matrix)] -vis_ligand_lfc = lfc_matrix[order_ligands,] +order_ligands <- intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() +order_targets <- active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) -colnames(vis_ligand_lfc) = vis_ligand_lfc %>% colnames() %>% make.names() +vis_ligand_target <- t(active_ligand_target_links[order_targets,order_ligands]) -p_ligand_lfc = vis_ligand_lfc %>% make_threecolor_heatmap_ggplot("Prioritized ligands","LFC in Sender", low_color = "midnightblue",mid_color = "white", mid = median(vis_ligand_lfc), high_color = "red",legend_position = "top", x_axis_position = "top", legend_title = "LFC") + theme(axis.text.y = element_text(face = "italic")) -p_ligand_lfc +p_ligand_target <- make_heatmap_ggplot(vis_ligand_target, "Prioritized ligands", "Predicted target genes", + color = "purple", legend_title = "Regulatory potential") + + scale_fill_gradient2(low = "whitesmoke", high = "purple") -# change colors a bit to make them more stand out -p_ligand_lfc = p_ligand_lfc + scale_fill_gradientn(colors = c("midnightblue","blue", "grey95", "grey99","firebrick1","red"),values = c(0,0.1,0.2,0.25, 0.40, 0.7,1), limits = c(vis_ligand_lfc %>% min() - 0.1, vis_ligand_lfc %>% max() + 0.1)) -p_ligand_lfc +p_ligand_target ``` +```{r focused-ligand-receptor-heatmap} +# Receptor plot +ligand_receptor_links_df <- get_weighted_ligand_receptor_links( + best_upstream_ligands, expressed_receptors, + lr_network, weighted_networks$lr_sig) -## 7) Summary visualizations of the NicheNet analysis +vis_ligand_receptor_network <- prepare_ligand_receptor_visualization( + ligand_receptor_links_df, + best_upstream_ligands, + order_hclust = "both") + +p_ligand_receptor <- make_heatmap_ggplot(t(vis_ligand_receptor_network), + y_name = "Ligands", x_name = "Receptors", + color = "mediumvioletred", legend_title = "Prior interaction potential") + +p_ligand_receptor + +``` -For example, you can make a combined heatmap of ligand activities, ligand expression, ligand log fold change and the target genes of the top-ranked ligands. The plots for the log fold change and target genes were already made. Let's now make the heatmap for ligand activities and for expression. +Here, we instead observe that the top-ranked ligands consist of many H2 genes (which encode MHC-II proteins), and not IFN genes as in the sender-agnostic approach. This is because IFN genes are not expressed by the sender cell populations, and it was already filtered out during preprocessing for being too lowly expressed. ```{r} -# ligand activity heatmap -ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) +best_upstream_ligands_all %in% rownames(seuratObj) %>% table() +``` + +### Visualizing expression and log-fold change in sender cells -rownames(ligand_aupr_matrix) = rownames(ligand_aupr_matrix) %>% make.names() -colnames(ligand_aupr_matrix) = colnames(ligand_aupr_matrix) %>% make.names() +For the sender-focused approach, we can also investigate further on which sender cell populations are potentially the true sender of these ligands. First, we can simply check which sender cell population expresses which of these top-ranked ligands. -vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") -p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") + theme(legend.text = element_text(size = 9)) +```{r dotplot, fig.width=10} +# Dotplot of sender-focused approach +p_dotplot <- DotPlot(subset(seuratObj, celltype %in% sender_celltypes), + features = rev(best_upstream_ligands), cols = "RdYlBu") + + coord_flip() + + scale_y_discrete(position = "right") +p_dotplot ``` -```{r} -# ligand expression Seurat dotplot -order_ligands_adapted <- str_replace_all(order_ligands, "\\.", "-") -rotated_dotplot = DotPlot(seuratObj %>% subset(celltype %in% sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) # flip of coordinates necessary because we want to show ligands in the rows when combining all plots +As you can see, most of the top-ranked ligands seem to be mainly expressed by dendritic cells and monocytes. + +Next, we can also check upregulation of ligands in sender cells by computing the log-fold change between the two conditions. This ligand differential expression is not used for prioritization and ranking of the ligands (the ranking is only determined based on enrichment of target genes among DE genes in the receiver, CD8T cells), but it can add a useful extra layer of information next to the ligand activities. This is of course only possible in some cases, such as case-control studies. + +```{r lfc-heatmap, fig.width=10} + +celltype_order <- levels(Idents(seuratObj)) + +# Use this if cell type labels are the identities of your Seurat object +# if not: indicate the celltype_col properly +DE_table_top_ligands <- lapply( + celltype_order[celltype_order %in% sender_celltypes], + get_lfc_celltype, + seurat_obj = seuratObj, + condition_colname = "aggregate", + condition_oi = condition_oi, + condition_reference = condition_reference, + celltype_col = "celltype", + min.pct = 0, logfc.threshold = 0, + features = best_upstream_ligands +) + +DE_table_top_ligands <- DE_table_top_ligands %>% reduce(., full_join) %>% + column_to_rownames("gene") + +vis_ligand_lfc <- as.matrix(DE_table_top_ligands[rev(best_upstream_ligands), ]) + +p_lfc <- make_threecolor_heatmap_ggplot(vis_ligand_lfc, + "Prioritized ligands", "LFC in Sender", + low_color = "midnightblue", mid_color = "white", + mid = median(vis_ligand_lfc), high_color = "red", + legend_title = "LFC") + +p_lfc ``` -```{r, fig.width=12, fig.height=10} +We see that most of the top-ranked ligands also seem to be upregulated themselves in monocytes after viral infection. This is nice additional "evidence" that these ligands might indeed be important. + +Finally, you can also compare rankings between the sender-agnostic and sender-focused approach. Here, the red sections of the left bar plot indicates which ligands in the sender-agnostic approach are filtered out in the sender-focused approach because they are not expressed. + +```{r lineplot} +(make_line_plot(ligand_activities = ligand_activities_all, + potential_ligands = potential_ligands_focused) + + theme(plot.title = element_text(size=11, hjust=0.1, margin=margin(0, 0, -5, 0)))) +``` -figures_without_legend = cowplot::plot_grid( - p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), - p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), - p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), +## 7. Summary visualizations of the NicheNet analysis + +Finally, we can make a combined plot containing heatmap of ligand activities, ligand expression, ligand log-fold change and the target genes of the top-ranked ligands. As mentioned earlier, sometimes ligands do not appear in the ligand-target heatmap because they don't have target genes with high enough regulatory potential scores. In this case, CCl22 is present in other plots (ranked 25th) but is missing in the rightmost plot. If users wish for these plots to be consistent, they may use the variable `order_ligands` defined when creating the ligand-target heatmap to subset other plots instead of `best_upstream_ligands`. + + +```{r summary-vis, fig.width=16, fig.height=10} +figures_without_legend <- cowplot::plot_grid( + p_ligand_aupr + theme(legend.position = "none"), + p_dotplot + theme(legend.position = "none", + axis.ticks = element_blank(), + axis.title.y = element_blank(), + axis.title.x = element_text(size = 12), + axis.text.y = element_text(size = 9), + axis.text.x = element_text(size = 9, angle = 90, hjust = 0)) + + ylab("Expression in Sender"), + p_lfc + theme(legend.position = "none", + axis.title.y = element_blank()), + p_ligand_target + theme(legend.position = "none", + axis.title.y = element_blank()), align = "hv", nrow = 1, - rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8, ncol(vis_ligand_target))) + rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc)+7, ncol(vis_ligand_lfc)+8, ncol(vis_ligand_target))) -legends = cowplot::plot_grid( +legends <- cowplot::plot_grid( ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), - ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), + ggpubr::as_ggplot(ggpubr::get_legend(p_dotplot)), + ggpubr::as_ggplot(ggpubr::get_legend(p_lfc)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target)), nrow = 1, align = "h", rel_widths = c(1.5, 1, 1, 1)) -combined_plot = cowplot::plot_grid(figures_without_legend, legends, rel_heights = c(10,5), nrow = 2, align = "hv") +combined_plot <- cowplot::plot_grid(figures_without_legend, legends, rel_heights = c(10,5), nrow = 2, align = "hv") combined_plot ``` +## Other follow-up analyses: + +As another follow-up analysis, you can infer possible signaling paths between ligands and targets of interest. You can read how to do this in the following vignette [Inferring ligand-to-target signaling paths](ligand_target_signaling_path.md):`vignette("ligand_target_signaling_path", package="nichenetr")`. -# Remarks +Another follow-up analysis is getting a "tangible" measure of how well top-ranked ligands predict the gene set of interest and assess which genes of the gene set can be predicted well. You can read how to do this in the following vignette [Assess how well top-ranked ligands can predict a gene set of interest](target_prediction_evaluation_geneset.md):`vignette("target_prediction_evaluation_geneset", package="nichenetr")`. + +In case you want to visualize ligand-target links between multiple interacting cells, you can make an appealing circos plot as shown in vignette [Circos plot visualization to show active ligand-target links between interacting cells](circos.md):`vignette("circos", package="nichenetr")`. -Top-ranked ligands and target genes shown here differ from the predictions shown in the respective case study in the NicheNet paper because 1) a different definition of expressed genes was used, and 2) we have updated the ligand-target matrix to include more data sources. ```{r} sessionInfo() @@ -342,3 +488,4 @@ sessionInfo() # References + diff --git a/vignettes/seurat_steps.md b/vignettes/seurat_steps.md index aba818b..9ba4e8d 100644 --- a/vignettes/seurat_steps.md +++ b/vignettes/seurat_steps.md @@ -1,7 +1,7 @@ Perform NicheNet analysis starting from a Seurat object: step-by-step analysis ================ -Robin Browaeys +Robin Browaeys & Chananchida Sang-aram 2023-10-02 In this vignette, you can learn how to perform a basic NicheNet analysis -on a Seurat v3/v4 object. Such a NicheNet analysis can help you to -generate hypotheses about an intercellular communication process of -interest for which you have single-cell gene expression data as a Seurat -object. Specifically, NicheNet can predict 1) which ligands from one or -more cell population(s) (“sender/niche”) are most likely to affect -target gene expression in an interacting cell population +on a Seurat (v3-v5) object containing single-cell expression data. The +steps of this vignette can also be adapted for other single-cell or bulk +frameworks. + +**Assuming you have captured the changes in gene expression resulting +from your cell-cell communication (CCC) process of interest,** a +NicheNet analysis can help you to generate hypotheses about the CCC +process. Specifically, NicheNet can predict 1) which ligands from the +microenvironment or cell population(s) (“sender/niche”) are most likely +to affect target gene expression in an interacting cell population (“receiver/target”) and 2) which specific target genes are affected by which of these predicted ligands. -Because NicheNet studies how ligands affect gene expression in -putatively neighboring/interacting cells, you need to have data about -this effect in gene expression you want to study. So, there need to be -‘some kind of’ differential expression in a receiver cell population, -caused by ligands from one of more interacting sender cell populations. - -In this vignette, we demonstrate the use of NicheNet on a Seurat Object. -The steps of the analysis we show here are also discussed in detail in -the main, basis, NicheNet vignette [NicheNet’s ligand activity analysis -on a gene set of interest: predict active ligands and their target -genes](ligand_activity_geneset.md):`vignette("ligand_activity_geneset", package="nichenetr")`. -Make sure you understand the different steps in a NicheNet analysis that -are described in that vignette before proceeding with this vignette and -performing a real NicheNet analysis on your data. This vignette -describes the different steps behind the wrapper functions that are -shown in [Perform NicheNet analysis starting from a Seurat -object](seurat_wrapper.md):`vignette("seurat_wrapper", package="nichenetr")`. -Following this vignette has the advantage that it allows users to adapt -specific steps of the pipeline to make them more appropriate for their -data. +To perform a NicheNet analysis, three features are extracted from the +input data: the potential ligands, the gene set of interest, and the +background gene set. This vignette will extract each feature as +described in this flowchart: + + As example expression data of interacting cells, we will use mouse -NICHE-seq data from Medaglia et al. to explore intercellular -communication in the T cell area in the inguinal lymph node before and -72 hours after lymphocytic choriomeningitis virus (LCMV) infection -(Medaglia et al. 2017). We will NicheNet to explore immune cell -crosstalk in response to this LCMV infection. - -In this dataset, differential expression is observed between CD8 T cells -in steady-state and CD8 T cells after LCMV infection. NicheNet can be -applied to look at how several immune cell populations in the lymph node -(i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can -regulate and induce these observed gene expression changes. NicheNet -will specifically prioritize ligands from these immune cells and their -target genes that change in expression upon LCMV infection. - -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) -and the [Seurat object of the processed NICHE-seq single-cell +NICHE-seq data to explore intercellular communication in the T cell area +in the inguinal lymph node before and 72 hours after lymphocytic +choriomeningitis virus (LCMV) infection (Medaglia et al. 2017). We will +focus on CD8 T cells as the receiver population, and as this dataset +contains two conditions (before and after LCMV infection), the +differentially expressed genes between these two conditions in CD8 T +cells will be used as our gene set of interest. We will then prioritize +which ligands from the microenvironment (sender-agnostic approach) and +from specific immune cell populations like monocytes, dendritic cells, +NK cells, B cells, and CD4 T cells (sender-focused approach) can +regulate and induce these observed gene expression changes. + +Please make sure you understand the different steps described in this +vignette before performing a real NicheNet analysis on your data. There +are also wrapper functions that perform the same steps as in this +vignette in [Perform NicheNet analysis starting from a Seurat +object](seurat_wrapper.md). However, in that case users will not be able +to adapt specific steps of the pipeline to make them more appropriate +for their data. + +The [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and +the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. # Prepare NicheNet analysis -## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks. - -The NicheNet ligand-receptor network and weighted networks are necessary -to define and show possible ligand-receptor interactions between two -cell populations. The ligand-target matrix denotes the prior potential -that particular ligands might regulate the expression of particular -target genes. This matrix is necessary to prioritize possible -ligand-receptor interactions based on observed gene expression effects -(i.e. NicheNet’s ligand activity analysis) and infer affected target -genes of these prioritized ligands. - -### Load Packages: +### Load packages ``` r -library(nichenetr) +library(nichenetr) # Please update to v2.0.4 library(Seurat) library(SeuratObject) library(tidyverse) ``` -If you would use and load other packages, we recommend to load these 3 -packages after the others. +### Read in the expression data of interacting cells -### Read in the expression data of interacting cells: - -The dataset used here is publicly available single-cell data from immune -cells in the T cell area of the inguinal lymph node. The data was -processed and aggregated by applying the Seurat alignment pipeline. The -Seurat object contains this aggregated data. Note that this should be a -Seurat v3/v4 object and that gene should be named by their official -mouse/human gene symbol. +We processed and aggregated the original dataset by using the Seurat +alignment pipeline. As we created this object using Seurat v3, it has to +be updated with `UpdateSeuratObject`. Note that genes should be named by +their official mouse/human gene symbol. ``` r -seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) seuratObj@meta.data %>% head() ## nGene nUMI orig.ident aggregate res.0.6 celltype nCount_RNA nFeature_RNA @@ -105,27 +85,37 @@ seuratObj@meta.data %>% head() ## W380379 839 1606 LN_SS SS 0 CD4 T 1603 836 ## W380381 517 844 LN_SS SS 0 CD4 T 840 513 -# For newer Seurat versions, you may need to run the following +# For older Seurat objects, you may need to run this seuratObj <- UpdateSeuratObject(seuratObj) ``` +Additionally, if your expression data has the older gene symbols, you +may want to use our alias conversion function to avoid the loss of gene +names. + +``` r +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") +``` + Visualize which cell populations are present: CD4 T cells (including regulatory T cells), CD8 T cells, B cells, NK cells, dendritic cells -(DCs) and inflammatory monocytes +(DCs) and inflammatory monocytes. ``` r -seuratObj@meta.data$celltype %>% table() # note that the number of cells of some cell types is very low and should preferably be higher for a real application +# Note that the number of cells of some cell types is very low and should preferably be higher for a real application +seuratObj@meta.data$celltype %>% table() ## . ## B CD4 T CD8 T DC Mono NK Treg ## 382 2562 1645 18 90 131 199 + DimPlot(seuratObj, reduction = "tsne") ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-3-1.png) +![](seurat_steps_files/figure-gfm/umap-1-1.png) Visualize the data to see to which condition cells belong. The metadata -dataframe column that denotes the condition (steady-state or after LCMV -infection) is here called ‘aggregate’. +column that denotes the condition (steady-state or after LCMV infection) +is here called ‘aggregate’. ``` r seuratObj@meta.data$aggregate %>% table() @@ -135,26 +125,35 @@ seuratObj@meta.data$aggregate %>% table() DimPlot(seuratObj, reduction = "tsne", group.by = "aggregate") ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-4-1.png) +![](seurat_steps_files/figure-gfm/umap-2-1.png) -### Read in NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks: +### Read in NicheNet’s networks -``` r +The ligand-target prior model, ligand-receptor network, and weighted +integrated networks are needed for this vignette. The ligand-target +prior model is a matrix describing the potential that a ligand may +regulate a target gene, and it is used to run the ligand activity +analysis. The ligand-receptor network contains information on potential +ligand-receptor bindings, and it is used to identify potential ligands. +Finally, the weighted ligand-receptor network contains weights +representing the potential that a ligand will bind to a receptor, and it +is used for visualization. -organism = "mouse" +``` r +organism <- "mouse" if(organism == "human"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) - weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) } else if(organism == "mouse"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) - weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) } -lr_network = lr_network %>% distinct(from, to) +lr_network <- lr_network %>% distinct(from, to) head(lr_network) ## # A tibble: 6 × 2 ## from to @@ -173,7 +172,6 @@ ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ## 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 2.585820e-03 ## 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 2.632082e-03 -weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network ## # A tibble: 6 × 3 ## from to weight @@ -196,310 +194,572 @@ head(weighted_networks$gr) # interactions and their weights in the gene regulato ## 6 0610010K14Rik Alox12 0.128 ``` -If your expression data has the older gene symbols, you may want to use -our alias conversion function to avoid the loss of gene names. +# Perform the NicheNet analysis + +In contrary to NicheNet v1, we now recommend users to run both the +“sender-agnostic” approach and “sender-focused” approach. These +approaches only affect the list of potential ligands that are considered +for prioritization. As described in the flowchart above, we do not +define any sender populations in the ‘sender agnostic’ approach but +consider all ligands for which its cognate receptor is expressed in the +receiver population. The sender-focused approach will then filter the +list of ligands to ones where the ligands are expressed in the sender +cell population(s). + +## 1. Define a set of potential ligands for both the sender-agnostic and sender-focused approach + +We first define a “receiver/target” cell population and determine which +genes are expressed. Here, we will consider a gene to be expressed if it +is expressed in at least 5% of cells (by default this is set to 10%). +The receiver cell population can only consist of one cell type, so in +case of multiple receiver populations, you will have to rerun the +vignette separately for each one. We will only look at CD8 T cells in +this vignette. ``` r -seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") +receiver = "CD8 T" +expressed_genes_receiver <- get_expressed_genes(receiver, seuratObj, pct = 0.05) ``` -# Perform the NicheNet analysis - -In this case study, we want to apply NicheNet to predict which ligands -expressed by all immune cells in the T cell area of the lymph node are -most likely to have induced the differential expression in CD8 T cells -after LCMV infection. +Get a list of all receptors available in the ligand-receptor network, +and define expressed receptors as genes that are in the ligand-receptor +network and expressed in the receiver. Then, define the potential +ligands as all ligands whose cognate receptors are expressed. -As described in the main vignette, the pipeline of a basic NicheNet -analysis consist of the following steps: +``` r +all_receptors <- unique(lr_network$to) +expressed_receptors <- intersect(all_receptors, expressed_genes_receiver) -## 1. Define a “sender/niche” cell population and a “receiver/target” cell population present in your expression data and determine which genes are expressed in both populations +potential_ligands <- lr_network %>% filter(to %in% expressed_receptors) %>% pull(from) %>% unique() +``` -In this case study, the receiver cell population is the ‘CD8 T’ cell -population, whereas the sender cell populations are ‘CD4 T’, ‘Treg’, -‘Mono’, ‘NK’, ‘B’ and ‘DC’. We will consider a gene to be expressed when -it is expressed in at least 10% of cells in one cluster. +For the sender-focused approach, define sender cell types (CD4 T, Treg, +Mono, NK, B, and DC) and expressed genes in all sender populations. +(Although we pool all ligands from all sender cell types together in +this step, later on during the interpretation of the output, we will +check which sender cell type expresses which ligand.) Then, filter +potential ligands to those that are expressed in sender cells. Note that +autocrine signaling can also be considered if we also include CD8 T +cells as a sender. ``` r -## receiver -receiver = "CD8 T" -expressed_genes_receiver = get_expressed_genes(receiver, seuratObj, pct = 0.10) +sender_celltypes <- c("CD4 T", "Treg", "Mono", "NK", "B", "DC") -background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] -``` +# Use lapply to get the expressed genes of every sender cell type separately here +list_expressed_genes_sender <- sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.05) +expressed_genes_sender <- list_expressed_genes_sender %>% unlist() %>% unique() -``` r -## sender -sender_celltypes = c("CD4 T","Treg", "Mono", "NK", "B", "DC") +potential_ligands_focused <- intersect(potential_ligands, expressed_genes_sender) -list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.10) # lapply to get the expressed genes of every sender cell type separately here -expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() +# Also check +length(expressed_genes_sender) +## [1] 8492 +length(potential_ligands) +## [1] 483 +length(potential_ligands_focused) +## [1] 127 ``` -## 2. Define a gene set of interest: these are the genes in the “receiver/target” cell population that are potentially affected by ligands expressed by interacting cells (e.g. genes differentially expressed upon cell-cell interaction) +## 2. Define the gene set of interest -Here, the gene set of interest are the genes differentially expressed in -CD8 T cells after LCMV infection. The condition of interest is thus -‘LCMV’, whereas the reference/steady-state condition is ‘SS’. The notion -of conditions can be extracted from the metadata column ‘aggregate’. The -method to calculate the differential expression is here the standard -Seurat Wilcoxon test, but this can be changed if necessary. +The gene set of interest are genes within the receiver cell type that +are likely to be influenced by ligands from the CCC event. In typical +case-control studies like this one, we use the differentially expressed +(DE) genes between the two conditions in the receiver cell type, +assuming that the observed DE pattern is a result of the CCC event +(i.e., LCMV infection). The condition of interest is thus ‘LCMV’, +whereas the reference/steady-state condition is ‘SS’. The condition can +be extracted from the metadata column ‘aggregate’. The method to +calculate the differential expression is here the standard Seurat +Wilcoxon test, but this can be changed if necessary. ``` r -seurat_obj_receiver= subset(seuratObj, idents = receiver) -seurat_obj_receiver = SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[["aggregate", drop=TRUE]]) +condition_oi <- "LCMV" +condition_reference <- "SS" + +seurat_obj_receiver <- subset(seuratObj, idents = receiver) -condition_oi = "LCMV" -condition_reference = "SS" - -DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = 0.10) %>% rownames_to_column("gene") +DE_table_receiver <- FindMarkers(object = seurat_obj_receiver, + ident.1 = condition_oi, ident.2 = condition_reference, + group.by = "aggregate", + min.pct = 0.05) %>% rownames_to_column("gene") -geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) -geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] +geneset_oi <- DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) +geneset_oi <- geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] ``` -## 3. Define a set of potential ligands: these are ligands that are expressed by the “sender/niche” cell population and bind a (putative) receptor expressed by the “receiver/target” population +## 3. Define the background genes -Because we combined the expressed genes of each sender cell type, in -this example, we will perform one NicheNet analysis by pooling all -ligands from all cell types together. Later on during the interpretation -of the output, we will check which sender cell type expresses which -ligand. +All expressed genes in the receiver cell population (that are also in +the ligand-target matrix) is defined as the ‘background set’ for our +ligand prioritization procedure in the next step. It’s also important to +check that the number of background genes is a ‘reasonable’ number, +generally between 5000-10000, and sufficiently larger than our gene set +of interest. ``` r -ligands = lr_network %>% pull(from) %>% unique() -receptors = lr_network %>% pull(to) %>% unique() +background_expressed_genes <- expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] -expressed_ligands = intersect(ligands,expressed_genes_sender) -expressed_receptors = intersect(receptors,expressed_genes_receiver) - -potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() +length(background_expressed_genes) +## [1] 3476 +length(geneset_oi) +## [1] 260 ``` -## 4) Perform NicheNet ligand activity analysis: rank the potential ligands based on the presence of their target genes in the gene set of interest (compared to the background set of genes) +## 4. Perform NicheNet ligand activity analysis + +This is the main step of NicheNet where the potential ligands are ranked +based on the presence of their target genes in the gene set of interest +(compared to the background set of genes). In this case, we prioritize +ligands that induce the antiviral response in CD8 T cells. + +Ligands are ranked based on the area under the precision-recall curve +(AUPR) between a ligand’s target predictions and the observed +transcriptional response. Although other metrics like the AUROC and +pearson correlation coefficient are also computed, we demonstrated in +our validation study that the AUPR was the most informative measure to +define ligand activity (this was the Pearson correlation for v1). The +vignette on how we performed the validation can be found at [Evaluation +of NicheNet’s ligand-target predictions](model_evaluation.md). + +We will first show the results of the sender-agnostic approach. ``` r -ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +ligand_activities <- predict_ligand_activities(geneset = geneset_oi, + background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix, + potential_ligands = potential_ligands) -ligand_activities = ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) +ligand_activities <- ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) ligand_activities -## # A tibble: 73 × 6 +## # A tibble: 483 × 6 ## test_ligand auroc aupr aupr_corrected pearson rank ## -## 1 Ebi3 0.663 0.390 0.244 0.301 1 -## 2 Ptprc 0.642 0.310 0.165 0.167 2 -## 3 H2-M3 0.608 0.292 0.146 0.179 3 -## 4 H2-M2 0.611 0.279 0.133 0.153 5 -## 5 H2-T10 0.611 0.279 0.133 0.153 5 -## 6 H2-T22 0.611 0.279 0.133 0.153 5 -## 7 H2-T23 0.611 0.278 0.132 0.153 7 -## 8 H2-K1 0.605 0.268 0.122 0.142 8 -## 9 H2-Q4 0.605 0.268 0.122 0.141 10 -## 10 H2-Q6 0.605 0.268 0.122 0.141 10 -## # ℹ 63 more rows +## 1 Ifna1 0.714 0.433 0.358 0.498 1 +## 2 Ifnb1 0.711 0.401 0.327 0.433 2 +## 3 Ifnl3 0.683 0.392 0.317 0.433 3 +## 4 Il27 0.682 0.391 0.316 0.445 4 +## 5 Ifng 0.732 0.382 0.307 0.451 5 +## 6 Ifnk 0.671 0.282 0.207 0.272 6 +## 7 Ifne 0.667 0.279 0.204 0.289 7 +## 8 Ebi3 0.666 0.264 0.189 0.256 8 +## 9 Ifnl2 0.658 0.252 0.177 0.246 9 +## 10 Ifna2 0.669 0.247 0.172 0.205 10 +## # ℹ 473 more rows ``` -The different ligand activity measures (auroc, aupr, pearson correlation -coefficient) are a measure for how well a ligand can predict the -observed differentially expressed genes compared to the background of -expressed genes. In our validation study, we showed that the area under -the precision-recall curve (AUPR) between a ligand’s target predictions -and the observed transcriptional response was the most informative -measure to define ligand activity (this was the Pearson correlation for -v1). Therefore, NicheNet ranks the ligands based on their AUPR. This -allows us to prioritize ligands inducing the antiviral response in CD8 T -cells. - -The number of top-ranked ligands that are further used to predict active -target genes and construct an active ligand-receptor network is here 30. +The performance metrics indicate that the 30 top-ranked ligands can +predict the viral response reasonably, implying that the ranking of the +ligands might be accurate. However, it is possible that for some gene +sets, the target gene prediction performance of the top-ranked ligands +would not be much better than random prediction. In that case, +prioritization of ligands will be less trustworthy. + +We will use the top 30 ligands to predict active target genes and +construct an active ligand-receptor network. However, the choice of +looking only at the 30 top-ranked ligands for further biological +interpretation is based on biological intuition and is quite arbitrary. +Therefore, users can decide to continue the analysis with a different +number of ligands. We recommend to check the selected cutoff by looking +at the distribution of the ligand activity values. Here, we show the +ligand activity histogram (the score for the 30th ligand is indicated +via the dashed line). ``` r -best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) %>% unique() +p_hist_lig_activity <- ggplot(ligand_activities, aes(x=aupr_corrected)) + + geom_histogram(color="black", fill="darkorange") + + geom_vline(aes(xintercept=min(ligand_activities %>% top_n(30, aupr_corrected) %>% pull(aupr_corrected))), + color="red", linetype="dashed", size=1) + + labs(x="ligand activity (PCC)", y = "# ligands") + + theme_classic() + +p_hist_lig_activity ``` -These ligands are expressed by one or more of the input sender cells. To -see which cell population expresses which of these top-ranked ligands, -you can run the following: +![](seurat_steps_files/figure-gfm/histogram-1.png) ``` r -DotPlot(seuratObj, features = best_upstream_ligands %>% rev(), cols = "RdYlBu") + RotatedAxis() +best_upstream_ligands <- ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-13-1.png) +We can also visualize the ligand activity measure (AUPR) of these +top-ranked ligands: -As you can see, most op the top-ranked ligands seem to be mainly -expressed by dendritic cells and monocytes. +``` r +vis_ligand_aupr <- ligand_activities %>% filter(test_ligand %in% best_upstream_ligands) %>% + column_to_rownames("test_ligand") %>% select(aupr_corrected) %>% arrange(aupr_corrected) %>% as.matrix(ncol = 1) -## 5) Infer receptors and top-predicted target genes of ligands that are top-ranked in the ligand activity analysis +(make_heatmap_ggplot(vis_ligand_aupr, + "Prioritized ligands", "Ligand activity", + legend_title = "AUPR", color = "darkorange") + + theme(axis.text.x.top = element_blank())) +``` -### Active target gene inference +![](seurat_steps_files/figure-gfm/agnostic-ligand-activity-heatmap-1.png) -``` r -active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 200) %>% bind_rows() %>% drop_na() +## 5. Infer target genes and receptors of top-ranked ligands -active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.33) +### Active target gene inference -order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() %>% make.names() -order_targets = active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) %>% make.names() -rownames(active_ligand_target_links) = rownames(active_ligand_target_links) %>% make.names() # make.names() for heatmap visualization of genes like H2-T23 -colnames(active_ligand_target_links) = colnames(active_ligand_target_links) %>% make.names() # make.names() for heatmap visualization of genes like H2-T23 +Active target genes are defined as genes in the gene set of interest +that have the highest regulatory potential for each top-ranked ligand. +These top targets of each ligand are based on the prior model. The +function get_weighted_ligand_target_links will return genes that are in +the gene set of interest and are the top `n` targets of a ligand +(default: `n = 200`, but there are too many target genes here so we only +considered the top 100). -vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t() +``` r +active_ligand_target_links_df <- best_upstream_ligands %>% + lapply(get_weighted_ligand_target_links, + geneset = geneset_oi, + ligand_target_matrix = ligand_target_matrix, + n = 100) %>% + bind_rows() %>% drop_na() + +nrow(active_ligand_target_links_df) +## [1] 637 +head(active_ligand_target_links_df) +## # A tibble: 6 × 3 +## ligand target weight +## +## 1 Ifna1 Ddx58 0.247 +## 2 Ifna1 Eif2ak2 0.246 +## 3 Ifna1 Gbp2 0.192 +## 4 Ifna1 Gbp7 0.195 +## 5 Ifna1 H2-D1 0.206 +## 6 Ifna1 H2-K1 0.206 ``` +For visualization purposes, the ligand-target prior model was adapted by +setting a regulatory potential score to 0 if their score was below a +predefined cutoff (default: 0.25, or the 25th percentile) across all +scores between the top-ranked ligands and their top `n` targets. We +recommend users to test several cutoff values for the best +visualization, as lowering or increasing the cutoff will result in a +denser or sparser heatmap, respectively. + ``` r -p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","Predicted target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + theme(axis.text.x = element_text(face = "italic")) + scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.0045,0.0090)) -p_ligand_target_network +active_ligand_target_links <- prepare_ligand_target_visualization( + ligand_target_df = active_ligand_target_links_df, + ligand_target_matrix = ligand_target_matrix, + cutoff = 0.33) + +nrow(active_ligand_target_links) +## [1] 86 +head(active_ligand_target_links) +## Ifna13 Ifna2 Ifna6 Ifna15 Ifna7 Ifna5 Ifnab Ifna9 Ifna11 Ifna12 Ifna16 Ifna4 Ifna14 Ptprc Tnf Il36g Il10 Il21 Osm +## Irf1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.27692301 0.07400782 0.07722567 0.1342983 0.16962803 +## Ddx60 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.11281871 0.00000000 0.05478472 0.0000000 0.08116101 +## Parp14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.07003101 0.06895448 0.00000000 0.0000000 0.08011593 +## Ddx58 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.24433255 0.06891134 0.00000000 0.0000000 0.08862524 +## Parp12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.19298997 0.06687691 0.05621734 0.0000000 0.07252823 +## Tap1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.25076038 0.07099514 0.00000000 0.0280451 0.15842935 +## Il27 Ifna1 Ifnb1 Ifng Ifnk Ifne Lrtm2 Ifnl3 Ebi3 Ifnl2 +## Irf1 0.3393635 0.2493108 0.25825704 0.2864755 0.04430047 0.04063913 0.03483987 0.10021371 0.11317944 0.07408235 +## Ddx60 0.1596771 0.1218225 0.13569911 0.1171453 0.02629924 0.02771157 0.00000000 0.08217529 0.04882999 0.03746171 +## Parp14 0.1563348 0.1269487 0.07891102 0.1142710 0.00000000 0.00000000 0.00000000 0.07074981 0.04568410 0.03135479 +## Ddx58 0.2265024 0.2467722 0.21469112 0.2480807 0.00000000 0.00000000 0.00000000 0.07125327 0.04024212 0.02705719 +## Parp12 0.1580405 0.1844883 0.14626411 0.1851128 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 +## Tap1 0.1949126 0.1937607 0.16257249 0.2563000 0.00000000 0.02717588 0.00000000 0.08010402 0.04710621 0.03769675 + +order_ligands <- intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() +order_targets <- active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) + +vis_ligand_target <- t(active_ligand_target_links[order_targets,order_ligands]) + +make_heatmap_ggplot(vis_ligand_target, "Prioritized ligands", "Predicted target genes", + color = "purple", legend_title = "Regulatory potential") + + scale_fill_gradient2(low = "whitesmoke", high = "purple") ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-15-1.png) +![](seurat_steps_files/figure-gfm/ligand-target-heatmap-1.png) + +The rows of the heatmap are ordered based on the rankings of the +ligands, and the columns are ordered alphabetically. We see a lot of +interferons in the top ligands, which biologically make sense as we are +looking at response to a viral infection. -Note that not all ligands from the top 30 are present in this -ligand-target heatmap. The left-out ligands are ligands that don’t have -target genes with high enough regulatory potential scores. Therefore, -they did not survive the used cutoffs. To include them, you can be less -stringent in the used cutoffs. +Note that not all ligands from the top 30 are present in the heatmap. +The left-out ligands are ligands that don’t have target genes with high +enough regulatory potential scores. Therefore, they did not survive the +used cutoffs. To include them, you can be less stringent in the used +cutoffs or increase the number of target genes considered. Additionally, +if you would consider more than the top 200 targets based on prior +information, you will infer more, but less confident, ligand-target +links; by considering less than 200 targets, you will be more stringent. ### Receptors of top-ranked ligands +Similar to above, we identify which receptors have the highest +interaction potential with the top-ranked ligands. + ``` r -lr_network_top = lr_network %>% filter(from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct(from,to) -best_upstream_receptors = lr_network_top %>% pull(to) %>% unique() +ligand_receptor_links_df <- get_weighted_ligand_receptor_links( + best_upstream_ligands, expressed_receptors, + lr_network, weighted_networks$lr_sig) +``` -lr_network_top_df_large = weighted_networks_lr %>% filter(from %in% best_upstream_ligands & to %in% best_upstream_receptors) +Then, we create a heatmap for ligand-receptor interactions. Here, both +the ligands and receptors are ordered by hierarchical clustering You can +choose to order only ligands or receptors hierarachically (with +`order_hclust = ligands` or `receptors`, respectively) or not at all +(`none`), in which case the ligands are ordered based on their rankings, +and the receptors are ordered alphabetically.. -lr_network_top_df = lr_network_top_df_large %>% spread("from","weight",fill = 0) -lr_network_top_matrix = lr_network_top_df %>% select(-to) %>% as.matrix() %>% magrittr::set_rownames(lr_network_top_df$to) +``` r +vis_ligand_receptor_network <- prepare_ligand_receptor_visualization( + ligand_receptor_links_df, + best_upstream_ligands, + order_hclust = "both") + +(make_heatmap_ggplot(t(vis_ligand_receptor_network), + y_name = "Ligands", x_name = "Receptors", + color = "mediumvioletred", legend_title = "Prior interaction potential")) +``` -dist_receptors = dist(lr_network_top_matrix, method = "binary") -hclust_receptors = hclust(dist_receptors, method = "ward.D2") -order_receptors = hclust_receptors$labels[hclust_receptors$order] - -dist_ligands = dist(lr_network_top_matrix %>% t(), method = "binary") -hclust_ligands = hclust(dist_ligands, method = "ward.D2") -order_ligands_receptor = hclust_ligands$labels[hclust_ligands$order] +![](seurat_steps_files/figure-gfm/ligand-receptor-heatmap-1.png) -order_receptors = order_receptors %>% intersect(rownames(lr_network_top_matrix)) -order_ligands_receptor = order_ligands_receptor %>% intersect(colnames(lr_network_top_matrix)) +## 6. Sender-focused approach -vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands_receptor] -rownames(vis_ligand_receptor_network) = order_receptors %>% make.names() -colnames(vis_ligand_receptor_network) = order_ligands_receptor %>% make.names() -``` +To perform the sender-focused approach, simply subset the ligand +activities to only contain expressed ligands from all populations +(calculated in Step 1). We can then perform target gene and receptor +inference as above. ``` r -p_ligand_receptor_network = vis_ligand_receptor_network %>% t() %>% make_heatmap_ggplot("Ligands","Receptors", color = "mediumvioletred", x_axis_position = "top",legend_title = "Prior interaction potential") -p_ligand_receptor_network +ligand_activities_all <- ligand_activities +best_upstream_ligands_all <- best_upstream_ligands + +ligand_activities <- ligand_activities %>% filter(test_ligand %in% potential_ligands_focused) +best_upstream_ligands <- ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% + pull(test_ligand) %>% unique() ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-17-1.png) +``` r +ligand_aupr_matrix <- ligand_activities %>% filter(test_ligand %in% best_upstream_ligands) %>% + column_to_rownames("test_ligand") %>% select(aupr_corrected) %>% arrange(aupr_corrected) +vis_ligand_aupr <- as.matrix(ligand_aupr_matrix, ncol = 1) -## 6) Add log fold change information of ligands from sender cells +p_ligand_aupr <- make_heatmap_ggplot(vis_ligand_aupr, + "Prioritized ligands", "Ligand activity", + legend_title = "AUPR", color = "darkorange") + + theme(axis.text.x.top = element_blank()) -In some cases, it might be possible to also check upregulation of -ligands in sender cells. This can add a useful extra layer of -information next to the ligand activities defined by NicheNet, because -you can assume that some of the ligands inducing DE in receiver cells, -will be DE themselves in the sender cells. +p_ligand_aupr +``` -Here this is possible: we will define the log fold change between LCMV -and steady-state in all sender cell types and visualize this as extra -information. +![](seurat_steps_files/figure-gfm/focused-ligand-activity-heatmap-1.png) ``` r -# DE analysis for each sender cell type -# this uses a new nichenetr function - reinstall nichenetr if necessary! -DE_table_all = Idents(seuratObj) %>% levels() %>% intersect(sender_celltypes) %>% lapply(get_lfc_celltype, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = condition_oi, condition_reference = condition_reference, expression_pct = 0.10, celltype_col = NULL) %>% reduce(full_join) # use this if cell type labels are the identities of your Seurat object -- if not: indicate the celltype_col properly -DE_table_all[is.na(DE_table_all)] = 0 +# Target gene plot +active_ligand_target_links_df <- best_upstream_ligands %>% + lapply(get_weighted_ligand_target_links, + geneset = geneset_oi, + ligand_target_matrix = ligand_target_matrix, + n = 100) %>% + bind_rows() %>% drop_na() -# Combine ligand activities with DE information -ligand_activities_de = ligand_activities %>% select(test_ligand, pearson) %>% rename(ligand = test_ligand) %>% left_join(DE_table_all %>% rename(ligand = gene)) -ligand_activities_de[is.na(ligand_activities_de)] = 0 +active_ligand_target_links <- prepare_ligand_target_visualization( + ligand_target_df = active_ligand_target_links_df, + ligand_target_matrix = ligand_target_matrix, + cutoff = 0.33) -# make LFC heatmap -lfc_matrix = ligand_activities_de %>% select(-ligand, -pearson) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities_de$ligand) -rownames(lfc_matrix) = rownames(lfc_matrix) %>% make.names() +order_ligands <- intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() +order_targets <- active_ligand_target_links_df$target %>% unique() %>% intersect(rownames(active_ligand_target_links)) -order_ligands = order_ligands[order_ligands %in% rownames(lfc_matrix)] -vis_ligand_lfc = lfc_matrix[order_ligands,] +vis_ligand_target <- t(active_ligand_target_links[order_targets,order_ligands]) -colnames(vis_ligand_lfc) = vis_ligand_lfc %>% colnames() %>% make.names() +p_ligand_target <- make_heatmap_ggplot(vis_ligand_target, "Prioritized ligands", "Predicted target genes", + color = "purple", legend_title = "Regulatory potential") + + scale_fill_gradient2(low = "whitesmoke", high = "purple") -p_ligand_lfc = vis_ligand_lfc %>% make_threecolor_heatmap_ggplot("Prioritized ligands","LFC in Sender", low_color = "midnightblue",mid_color = "white", mid = median(vis_ligand_lfc), high_color = "red",legend_position = "top", x_axis_position = "top", legend_title = "LFC") + theme(axis.text.y = element_text(face = "italic")) -p_ligand_lfc +p_ligand_target ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-18-1.png) +![](seurat_steps_files/figure-gfm/focused-ligand-target-heatmap-1.png) ``` r +# Receptor plot +ligand_receptor_links_df <- get_weighted_ligand_receptor_links( + best_upstream_ligands, expressed_receptors, + lr_network, weighted_networks$lr_sig) -# change colors a bit to make them more stand out -p_ligand_lfc = p_ligand_lfc + scale_fill_gradientn(colors = c("midnightblue","blue", "grey95", "grey99","firebrick1","red"),values = c(0,0.1,0.2,0.25, 0.40, 0.7,1), limits = c(vis_ligand_lfc %>% min() - 0.1, vis_ligand_lfc %>% max() + 0.1)) -p_ligand_lfc -``` +vis_ligand_receptor_network <- prepare_ligand_receptor_visualization( + ligand_receptor_links_df, + best_upstream_ligands, + order_hclust = "both") + +p_ligand_receptor <- make_heatmap_ggplot(t(vis_ligand_receptor_network), + y_name = "Ligands", x_name = "Receptors", + color = "mediumvioletred", legend_title = "Prior interaction potential") -![](seurat_steps_files/figure-gfm/unnamed-chunk-18-2.png) +p_ligand_receptor +``` -## 7) Summary visualizations of the NicheNet analysis +![](seurat_steps_files/figure-gfm/focused-ligand-receptor-heatmap-1.png) -For example, you can make a combined heatmap of ligand activities, -ligand expression, ligand log fold change and the target genes of the -top-ranked ligands. The plots for the log fold change and target genes -were already made. Let’s now make the heatmap for ligand activities and -for expression. +Here, we instead observe that the top-ranked ligands consist of many H2 +genes (which encode MHC-II proteins), and not IFN genes as in the +sender-agnostic approach. This is because IFN genes are not expressed by +the sender cell populations, and it was already filtered out during +preprocessing for being too lowly expressed. ``` r -# ligand activity heatmap -ligand_aupr_matrix = ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) +best_upstream_ligands_all %in% rownames(seuratObj) %>% table() +## . +## FALSE TRUE +## 23 7 +``` + +### Visualizing expression and log-fold change in sender cells -rownames(ligand_aupr_matrix) = rownames(ligand_aupr_matrix) %>% make.names() -colnames(ligand_aupr_matrix) = colnames(ligand_aupr_matrix) %>% make.names() +For the sender-focused approach, we can also investigate further on +which sender cell populations are potentially the true sender of these +ligands. First, we can simply check which sender cell population +expresses which of these top-ranked ligands. -vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix(ncol = 1) %>% magrittr::set_colnames("AUPR") -p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot("Prioritized ligands","Ligand activity", color = "darkorange",legend_position = "top", x_axis_position = "top", legend_title = "AUPR\n(target gene prediction ability)") + theme(legend.text = element_text(size = 9)) +``` r +# Dotplot of sender-focused approach +p_dotplot <- DotPlot(subset(seuratObj, celltype %in% sender_celltypes), + features = rev(best_upstream_ligands), cols = "RdYlBu") + + coord_flip() + + scale_y_discrete(position = "right") + +p_dotplot ``` +![](seurat_steps_files/figure-gfm/dotplot-1.png) + +As you can see, most of the top-ranked ligands seem to be mainly +expressed by dendritic cells and monocytes. + +Next, we can also check upregulation of ligands in sender cells by +computing the log-fold change between the two conditions. This ligand +differential expression is not used for prioritization and ranking of +the ligands (the ranking is only determined based on enrichment of +target genes among DE genes in the receiver, CD8T cells), but it can add +a useful extra layer of information next to the ligand activities. This +is of course only possible in some cases, such as case-control studies. + ``` r -# ligand expression Seurat dotplot -order_ligands_adapted <- str_replace_all(order_ligands, "\\.", "-") -rotated_dotplot = DotPlot(seuratObj %>% subset(celltype %in% sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) # flip of coordinates necessary because we want to show ligands in the rows when combining all plots + +celltype_order <- levels(Idents(seuratObj)) + +# Use this if cell type labels are the identities of your Seurat object +# if not: indicate the celltype_col properly +DE_table_top_ligands <- lapply( + celltype_order[celltype_order %in% sender_celltypes], + get_lfc_celltype, + seurat_obj = seuratObj, + condition_colname = "aggregate", + condition_oi = condition_oi, + condition_reference = condition_reference, + celltype_col = "celltype", + min.pct = 0, logfc.threshold = 0, + features = best_upstream_ligands +) + +DE_table_top_ligands <- DE_table_top_ligands %>% reduce(., full_join) %>% + column_to_rownames("gene") + +vis_ligand_lfc <- as.matrix(DE_table_top_ligands[rev(best_upstream_ligands), ]) + +p_lfc <- make_threecolor_heatmap_ggplot(vis_ligand_lfc, + "Prioritized ligands", "LFC in Sender", + low_color = "midnightblue", mid_color = "white", + mid = median(vis_ligand_lfc), high_color = "red", + legend_title = "LFC") + +p_lfc ``` +![](seurat_steps_files/figure-gfm/lfc-heatmap-1.png) + +We see that most of the top-ranked ligands also seem to be upregulated +themselves in monocytes after viral infection. This is nice additional +“evidence” that these ligands might indeed be important. + +Finally, you can also compare rankings between the sender-agnostic and +sender-focused approach. Here, the red sections of the left bar plot +indicates which ligands in the sender-agnostic approach are filtered out +in the sender-focused approach because they are not expressed. + ``` r +(make_line_plot(ligand_activities = ligand_activities_all, + potential_ligands = potential_ligands_focused) + + theme(plot.title = element_text(size=11, hjust=0.1, margin=margin(0, 0, -5, 0)))) +``` + +![](seurat_steps_files/figure-gfm/lineplot-1.png) + +## 7. Summary visualizations of the NicheNet analysis -figures_without_legend = cowplot::plot_grid( - p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), - p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), - p_ligand_target_network + theme(legend.position = "none", axis.ticks = element_blank()) + ylab(""), +Finally, we can make a combined plot containing heatmap of ligand +activities, ligand expression, ligand log-fold change and the target +genes of the top-ranked ligands. As mentioned earlier, sometimes ligands +do not appear in the ligand-target heatmap because they don’t have +target genes with high enough regulatory potential scores. In this case, +CCl22 is present in other plots (ranked 25th) but is missing in the +rightmost plot. If users wish for these plots to be consistent, they may +use the variable `order_ligands` defined when creating the ligand-target +heatmap to subset other plots instead of `best_upstream_ligands`. + +``` r +figures_without_legend <- cowplot::plot_grid( + p_ligand_aupr + theme(legend.position = "none"), + p_dotplot + theme(legend.position = "none", + axis.ticks = element_blank(), + axis.title.y = element_blank(), + axis.title.x = element_text(size = 12), + axis.text.y = element_text(size = 9), + axis.text.x = element_text(size = 9, angle = 90, hjust = 0)) + + ylab("Expression in Sender"), + p_lfc + theme(legend.position = "none", + axis.title.y = element_blank()), + p_ligand_target + theme(legend.position = "none", + axis.title.y = element_blank()), align = "hv", nrow = 1, - rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8, ncol(vis_ligand_target))) + rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc)+7, ncol(vis_ligand_lfc)+8, ncol(vis_ligand_target))) -legends = cowplot::plot_grid( +legends <- cowplot::plot_grid( ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), - ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target_network)), + ggpubr::as_ggplot(ggpubr::get_legend(p_dotplot)), + ggpubr::as_ggplot(ggpubr::get_legend(p_lfc)), + ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_target)), nrow = 1, align = "h", rel_widths = c(1.5, 1, 1, 1)) -combined_plot = cowplot::plot_grid(figures_without_legend, legends, rel_heights = c(10,5), nrow = 2, align = "hv") +combined_plot <- cowplot::plot_grid(figures_without_legend, legends, rel_heights = c(10,5), nrow = 2, align = "hv") combined_plot ``` -![](seurat_steps_files/figure-gfm/unnamed-chunk-21-1.png) +![](seurat_steps_files/figure-gfm/summary-vis-1.png) + +## Other follow-up analyses: + +As another follow-up analysis, you can infer possible signaling paths +between ligands and targets of interest. You can read how to do this in +the following vignette [Inferring ligand-to-target signaling +paths](ligand_target_signaling_path.md):`vignette("ligand_target_signaling_path", package="nichenetr")`. -# Remarks +Another follow-up analysis is getting a “tangible” measure of how well +top-ranked ligands predict the gene set of interest and assess which +genes of the gene set can be predicted well. You can read how to do this +in the following vignette [Assess how well top-ranked ligands can +predict a gene set of +interest](target_prediction_evaluation_geneset.md):`vignette("target_prediction_evaluation_geneset", package="nichenetr")`. -Top-ranked ligands and target genes shown here differ from the -predictions shown in the respective case study in the NicheNet paper -because 1) a different definition of expressed genes was used, and 2) we -have updated the ligand-target matrix to include more data sources. +In case you want to visualize ligand-target links between multiple +interacting cells, you can make an appealing circos plot as shown in +vignette [Circos plot visualization to show active ligand-target links +between interacting +cells](circos.md):`vignette("circos", package="nichenetr")`. ``` r sessionInfo() -## R version 4.3.1 (2023-06-16) +## R version 4.3.2 (2023-10-31) ## Platform: x86_64-redhat-linux-gnu (64-bit) ## Running under: CentOS Stream 8 ## @@ -507,42 +767,52 @@ sessionInfo() ## BLAS/LAPACK: /usr/lib64/libopenblaso-r0.3.15.so; LAPACK version 3.9.0 ## ## locale: -## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C -## [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C +## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 +## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C +## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C ## -## time zone: Europe/Brussels +## time zone: Asia/Bangkok ## tzcode source: system (glibc) ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: -## [1] forcats_0.5.1 stringr_1.5.0 dplyr_1.1.2 purrr_1.0.2 readr_2.1.2 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.3 tidyverse_1.3.1 Seurat_4.4.0 nichenetr_2.0.3 testthat_3.1.2 -## [13] SeuratObject_4.1.4 sp_2.0-0 +## [1] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.4 purrr_1.0.2 readr_2.1.2 tidyr_1.3.0 tibble_3.2.1 +## [8] ggplot2_3.4.4 tidyverse_1.3.1 SeuratObject_5.0.1 Seurat_4.4.0 nichenetr_2.0.4 ## ## loaded via a namespace (and not attached): -## [1] fs_1.5.2 matrixStats_1.0.0 spatstat.sparse_3.0-2 bitops_1.0-7 devtools_2.4.3 lubridate_1.8.0 httr_1.4.2 RColorBrewer_1.1-2 doParallel_1.0.17 -## [10] tools_4.3.1 sctransform_0.4.0 backports_1.4.1 utf8_1.2.2 R6_2.5.1 lazyeval_0.2.2 uwot_0.1.16 GetoptLong_1.0.5 withr_2.5.0 -## [19] prettyunits_1.1.1 gridExtra_2.3 fdrtool_1.2.17 progressr_0.14.0 cli_3.6.1 DiceKriging_1.6.0 spatstat.explore_3.2-1 labeling_0.4.2 spatstat.data_3.0-1 -## [28] randomForest_4.7-1.1 proxy_0.4-27 ggridges_0.5.3 pbapply_1.5-0 foreign_0.8-84 smoof_1.6.0.3 parallelly_1.30.0 sessioninfo_1.2.2 limma_3.56.2 -## [37] readxl_1.3.1 rstudioapi_0.13 visNetwork_2.1.2 generics_0.1.2 shape_1.4.6 ica_1.0-2 spatstat.random_3.1-5 car_3.1-2 Matrix_1.6-1 -## [46] S4Vectors_0.38.1 fansi_1.0.2 abind_1.4-5 lifecycle_1.0.3 yaml_2.2.2 carData_3.0-5 recipes_1.0.7 Rtsne_0.15 grid_4.3.1 -## [55] promises_1.2.0.1 crayon_1.5.0 miniUI_0.1.1.1 lattice_0.21-8 haven_2.4.3 cowplot_1.1.1 mlr_2.19.1 pillar_1.9.0 knitr_1.37 -## [64] ComplexHeatmap_2.16.0 rjson_0.2.21 future.apply_1.8.1 codetools_0.2-19 fastmatch_1.1-3 leiden_0.3.9 glue_1.6.2 ParamHelpers_1.14.1 data.table_1.14.2 -## [73] remotes_2.4.2 vctrs_0.6.3 png_0.1-7 cellranger_1.1.0 gtable_0.3.0 assertthat_0.2.1 cachem_1.0.6 gower_1.0.1 xfun_0.40 -## [82] mime_0.12 prodlim_2023.03.31 survival_3.5-5 timeDate_4022.108 iterators_1.0.14 hardhat_1.3.0 lava_1.7.2.1 DiagrammeR_1.0.10 ellipsis_0.3.2 -## [91] fitdistrplus_1.1-6 ROCR_1.0-11 ipred_0.9-14 nlme_3.1-162 usethis_2.2.2 RcppAnnoy_0.0.19 rprojroot_2.0.2 irlba_2.3.5 KernSmooth_2.23-21 -## [100] rpart_4.1.19 DBI_1.1.2 BiocGenerics_0.46.0 colorspace_2.0-2 Hmisc_5.1-0 nnet_7.3-19 tidyselect_1.2.0 processx_3.5.2 compiler_4.3.1 -## [109] parallelMap_1.5.1 rvest_1.0.2 htmlTable_2.4.1 xml2_1.3.3 desc_1.4.2 plotly_4.10.0 shadowtext_0.1.2 checkmate_2.2.0 scales_1.2.1 -## [118] caTools_1.18.2 lmtest_0.9-39 callr_3.7.0 digest_0.6.29 goftest_1.2-3 spatstat.utils_3.0-3 rmarkdown_2.11 htmltools_0.5.6 pkgconfig_2.0.3 -## [127] base64enc_0.1-3 lhs_1.1.6 highr_0.9 dbplyr_2.1.1 fastmap_1.1.0 rlang_1.1.1 GlobalOptions_0.1.2 htmlwidgets_1.6.2 shiny_1.7.1 -## [136] BBmisc_1.13 farver_2.1.0 zoo_1.8-9 jsonlite_1.7.3 mlrMBO_1.1.5.1 ModelMetrics_1.2.2.2 magrittr_2.0.2 Formula_1.2-5 patchwork_1.1.1 -## [145] munsell_0.5.0 Rcpp_1.0.11 ggnewscale_0.4.9 reticulate_1.24 stringi_1.7.6 pROC_1.18.4 brio_1.1.3 MASS_7.3-60 plyr_1.8.6 -## [154] pkgbuild_1.3.1 parallel_4.3.1 listenv_0.8.0 ggrepel_0.9.3 deldir_1.0-6 splines_4.3.1 tensor_1.5 hms_1.1.1 circlize_0.4.15 -## [163] ps_1.6.0 igraph_1.5.1 ggpubr_0.6.0 spatstat.geom_3.2-4 ggsignif_0.6.4 reshape2_1.4.4 stats4_4.3.1 pkgload_1.2.4 reprex_2.0.1 -## [172] evaluate_0.14 modelr_0.1.8 tweenr_2.0.2 tzdb_0.4.0 foreach_1.5.2 httpuv_1.6.5 RANN_2.6.1 polyclip_1.10-0 clue_0.3-64 -## [181] future_1.23.0 scattermore_1.2 ggforce_0.4.1 broom_0.7.12 xtable_1.8-4 emoa_0.5-0.2 e1071_1.7-13 rstatix_0.7.2 later_1.3.0 -## [190] viridisLite_0.4.0 class_7.3-22 IRanges_2.34.1 memoise_2.0.1 cluster_2.1.4 globals_0.14.0 caret_6.0-94 +## [1] fs_1.6.3 matrixStats_1.2.0 spatstat.sparse_3.0-3 bitops_1.0-7 devtools_2.4.3 lubridate_1.9.3 +## [7] httr_1.4.7 RColorBrewer_1.1-3 doParallel_1.0.17 tools_4.3.2 sctransform_0.4.0 backports_1.4.1 +## [13] utf8_1.2.4 R6_2.5.1 lazyeval_0.2.2 uwot_0.1.16 GetoptLong_1.0.5 withr_2.5.2 +## [19] sp_2.1-2 gridExtra_2.3 fdrtool_1.2.17 progressr_0.14.0 cli_3.6.2 spatstat.explore_3.2-1 +## [25] labeling_0.4.3 spatstat.data_3.0-3 randomForest_4.7-1.1 proxy_0.4-27 ggridges_0.5.5 pbapply_1.7-2 +## [31] foreign_0.8-85 sessioninfo_1.2.2 parallelly_1.36.0 limma_3.56.2 readxl_1.4.3 rstudioapi_0.15.0 +## [37] visNetwork_2.1.2 generics_0.1.3 shape_1.4.6 ica_1.0-3 spatstat.random_3.2-2 car_3.1-2 +## [43] Matrix_1.6-4 fansi_1.0.6 S4Vectors_0.38.1 abind_1.4-5 lifecycle_1.0.4 yaml_2.3.8 +## [49] carData_3.0-5 recipes_1.0.7 Rtsne_0.17 grid_4.3.2 promises_1.2.1 crayon_1.5.2 +## [55] miniUI_0.1.1.1 lattice_0.21-9 haven_2.4.3 cowplot_1.1.2 pillar_1.9.0 knitr_1.45 +## [61] ComplexHeatmap_2.16.0 rjson_0.2.21 future.apply_1.11.0 codetools_0.2-19 leiden_0.3.9 glue_1.6.2 +## [67] remotes_2.4.2 data.table_1.14.10 vctrs_0.6.5 png_0.1-8 spam_2.10-0 cellranger_1.1.0 +## [73] gtable_0.3.4 assertthat_0.2.1 cachem_1.0.8 gower_1.0.1 xfun_0.41 mime_0.12 +## [79] prodlim_2023.08.28 survival_3.5-7 timeDate_4032.109 iterators_1.0.14 hardhat_1.3.0 lava_1.7.3 +## [85] DiagrammeR_1.0.10 ellipsis_0.3.2 fitdistrplus_1.1-11 ROCR_1.0-11 ipred_0.9-14 nlme_3.1-163 +## [91] usethis_2.2.2 RcppAnnoy_0.0.21 irlba_2.3.5.1 KernSmooth_2.23-22 rpart_4.1.21 colorspace_2.1-0 +## [97] BiocGenerics_0.46.0 DBI_1.1.3 Hmisc_5.1-0 nnet_7.3-19 tidyselect_1.2.0 compiler_4.3.2 +## [103] rvest_1.0.2 htmlTable_2.4.1 xml2_1.3.6 plotly_4.10.0 shadowtext_0.1.2 checkmate_2.3.1 +## [109] scales_1.3.0 caTools_1.18.2 lmtest_0.9-40 digest_0.6.33 goftest_1.2-3 spatstat.utils_3.0-4 +## [115] rmarkdown_2.11 htmltools_0.5.7 pkgconfig_2.0.3 base64enc_0.1-3 highr_0.10 dbplyr_2.1.1 +## [121] fastmap_1.1.1 rlang_1.1.2 GlobalOptions_0.1.2 htmlwidgets_1.6.2 shiny_1.7.1 farver_2.1.1 +## [127] zoo_1.8-12 jsonlite_1.8.8 ModelMetrics_1.2.2.2 magrittr_2.0.3 Formula_1.2-5 dotCall64_1.1-1 +## [133] patchwork_1.1.3 munsell_0.5.0 Rcpp_1.0.11 ggnewscale_0.4.9 reticulate_1.34.0 stringi_1.7.6 +## [139] pROC_1.18.5 MASS_7.3-60 pkgbuild_1.4.3 plyr_1.8.9 parallel_4.3.2 listenv_0.9.0 +## [145] ggrepel_0.9.4 deldir_2.0-2 splines_4.3.2 tensor_1.5 hms_1.1.3 circlize_0.4.15 +## [151] igraph_1.2.11 ggpubr_0.6.0 spatstat.geom_3.2-7 ggsignif_0.6.4 pkgload_1.3.3 reshape2_1.4.4 +## [157] stats4_4.3.2 reprex_2.0.1 evaluate_0.23 modelr_0.1.8 tzdb_0.4.0 foreach_1.5.2 +## [163] tweenr_2.0.2 httpuv_1.6.13 RANN_2.6.1 polyclip_1.10-6 future_1.33.0 clue_0.3-64 +## [169] scattermore_1.2 ggforce_0.4.1 broom_0.7.12 xtable_1.8-4 e1071_1.7-14 rstatix_0.7.2 +## [175] later_1.3.2 viridisLite_0.4.2 class_7.3-22 memoise_2.0.1 IRanges_2.34.1 cluster_2.1.4 +## [181] timechange_0.2.0 globals_0.16.2 caret_6.0-94 ``` # References diff --git a/vignettes/seurat_steps_files/figure-gfm/agnostic-ligand-activity-heatmap-1.png b/vignettes/seurat_steps_files/figure-gfm/agnostic-ligand-activity-heatmap-1.png new file mode 100644 index 0000000..2cb9203 Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/agnostic-ligand-activity-heatmap-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/dotplot-1.png b/vignettes/seurat_steps_files/figure-gfm/dotplot-1.png new file mode 100644 index 0000000..8ad550c Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/dotplot-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/focused-ligand-activity-heatmap-1.png b/vignettes/seurat_steps_files/figure-gfm/focused-ligand-activity-heatmap-1.png new file mode 100644 index 0000000..7c6c240 Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/focused-ligand-activity-heatmap-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/focused-ligand-receptor-heatmap-1.png b/vignettes/seurat_steps_files/figure-gfm/focused-ligand-receptor-heatmap-1.png new file mode 100644 index 0000000..5a6f871 Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/focused-ligand-receptor-heatmap-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/focused-ligand-target-heatmap-1.png b/vignettes/seurat_steps_files/figure-gfm/focused-ligand-target-heatmap-1.png new file mode 100644 index 0000000..47801bd Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/focused-ligand-target-heatmap-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/histogram-1.png b/vignettes/seurat_steps_files/figure-gfm/histogram-1.png new file mode 100644 index 0000000..1ff8833 Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/histogram-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/lfc-heatmap-1.png b/vignettes/seurat_steps_files/figure-gfm/lfc-heatmap-1.png new file mode 100644 index 0000000..0d3e6d7 Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/lfc-heatmap-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/ligand-receptor-heatmap-1.png b/vignettes/seurat_steps_files/figure-gfm/ligand-receptor-heatmap-1.png new file mode 100644 index 0000000..68d0d78 Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/ligand-receptor-heatmap-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/ligand-target-heatmap-1.png b/vignettes/seurat_steps_files/figure-gfm/ligand-target-heatmap-1.png new file mode 100644 index 0000000..9ab09d3 Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/ligand-target-heatmap-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/lineplot-1.png b/vignettes/seurat_steps_files/figure-gfm/lineplot-1.png new file mode 100644 index 0000000..89d0bc2 Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/lineplot-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/summary-vis-1.png b/vignettes/seurat_steps_files/figure-gfm/summary-vis-1.png new file mode 100644 index 0000000..f9c52ae Binary files /dev/null and b/vignettes/seurat_steps_files/figure-gfm/summary-vis-1.png differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-3-1.png b/vignettes/seurat_steps_files/figure-gfm/umap-1-1.png similarity index 100% rename from vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-3-1.png rename to vignettes/seurat_steps_files/figure-gfm/umap-1-1.png diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/seurat_steps_files/figure-gfm/umap-2-1.png similarity index 100% rename from vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-4-1.png rename to vignettes/seurat_steps_files/figure-gfm/umap-2-1.png diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-13-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-13-1.png deleted file mode 100644 index 5d19a7e..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-13-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-15-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-15-1.png deleted file mode 100644 index e113a6e..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-15-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-17-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-17-1.png deleted file mode 100644 index fcd8523..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-17-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-1.png deleted file mode 100644 index 30241ad..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-2.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-2.png deleted file mode 100644 index 35c85ab..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-18-2.png and /dev/null differ diff --git a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-1.png b/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-1.png deleted file mode 100644 index 74b82ca..0000000 Binary files a/vignettes/seurat_steps_files/figure-gfm/unnamed-chunk-21-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_prioritization.Rmd b/vignettes/seurat_steps_prioritization.Rmd index b5d27f2..cf091ab 100644 --- a/vignettes/seurat_steps_prioritization.Rmd +++ b/vignettes/seurat_steps_prioritization.Rmd @@ -1,10 +1,10 @@ --- -title: "Perform NicheNet analysis with prioritization" +title: "Prioritization of ligands based on expression values" author: "Robin Browaeys & Chananchida Sang-aram" date: "2023-10-02" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Perform NicheNet analysis starting from a Seurat object: step-by-step analysis with prioritization} + %\VignetteIndexEntry{Prioritization of ligands based on expression values} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} bibliography: library.bib @@ -23,105 +23,92 @@ knitr::opts_chunk$set( ) ``` -```{r} -### Load Packages -library(nichenetr) # Please update to v2.0.4 -library(Seurat) -library(SeuratObject) -library(tidyverse) - -### Read in Seurat object -seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) - -# For newer Seurat versions, you may need to run the following -seuratObj <- UpdateSeuratObject(seuratObj) -``` - -In this vignette, we will extend the basic NicheNet analysis analysis from [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md) by incorporating gene expression as part of the prioritization This is a generalization of the [Differential NicheNet](differential_nichenet.md) and [MultiNicheNet](https://github.com/saeyslab/multinichenetr) approach. While the original NicheNet only ranks ligands based on the ligand activity analysis, it is now also possible to prioritize ligands based on upregulation of the ligand/receptor, and the cell-type and condition specificity of hte ligand and receptor. +In this vignette, we will extend the basic NicheNet analysis analysis from [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md) by incorporating gene expression as part of the prioritization This is a generalization of the [Differential NicheNet](differential_nichenet.md) and [MultiNicheNet](https://github.com/saeyslab/multinichenetr) approach. While the original NicheNet only ranks ligands based on the ligand activity analysis, it is now also possible to prioritize ligands based on cell type and condition specificity of the ligand and receptor. -Make sure you understand the different steps in a NicheNet analysis that are described in that vignette before proceeding with this vignette and performing a real NicheNet analysis on your data. +We will again make use of mouse NICHE-seq data to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [@medaglia_spatial_2017]. The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. -We will again make use of mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [@medaglia_spatial_2017]. We will NicheNet to explore immune cell crosstalk in response to this LCMV infection. In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be applied to look at how several immune cell populations in the lymph node (i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. +Make sure you understand the different steps in a NicheNet analysis that are described in the basic vignette before proceeding with this vignette. -Hence, we have to make some additional calculations, including DE of the ligand/receptor in a sender/receiver cell type, and the average expression of each ligand/receptor in each sender/receiver cell type. The DE analysis boils down to computing pairwise tests between the cell type of interest and other cell types in the dataset. We will subset the data to only the condition of interest, "LCMV". For this analysis we will consider all cell types as both sender and receiver, as we want the ligand/receptor to be specific. +# Prepare NicheNet analysis -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. +Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks. -## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks. +```{r} +library(nichenetr) # Please update to v2.0.6 +library(Seurat) +library(SeuratObject) +library(tidyverse) -The NicheNet ligand-receptor network and weighted networks are necessary to define and show possible ligand-receptor interactions between two cell populations. The ligand-target matrix denotes the prior potential that particular ligands might regulate the expression of particular target genes. This matrix is necessary to prioritize possible ligand-receptor interactions based on observed gene expression effects (i.e. NicheNet's ligand activity analysis) and infer affected target genes of these prioritized ligands. +``` ```{r} -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) -ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) -weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) - -lr_network = lr_network %>% distinct(from, to) -head(lr_network) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns +# Read Seurat object +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- UpdateSeuratObject(seuratObj) +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") -weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) -head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network -head(weighted_networks$gr) # interactions and their weights in the gene regulatory network +# Load in networks +lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) -seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") +lr_network <- lr_network %>% distinct(from, to) ``` # Perform the NicheNet analysis -In this case study, we want to apply NicheNet to predict which ligands expressed by all immune cells in the T cell area of the lymph node are most likely to have induced the differential expression in CD8 T cells after LCMV infection. - -As described in the main vignette, the pipeline of a basic NicheNet analysis consist of the following steps: - -In this case study, the receiver cell population is the 'CD8 T' cell population, whereas the sender cell populations are 'CD4 T', 'Treg', 'Mono', 'NK', 'B' and 'DC'. -We will consider a gene to be expressed when it is expressed in at least 10% of cells in one cluster. +We will use the sender-focused approach here. ```{r} -# 1. Define a “sender/niche” cell population and a “receiver/target” cell population present in your expression data and determine which genes are expressed in both populations -## receiver -receiver = "CD8 T" -expressed_genes_receiver = get_expressed_genes(receiver, seuratObj, pct = 0.10) -background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] +# 1. Define set of potential ligands +receiver <- "CD8 T" +expressed_genes_receiver <- get_expressed_genes(receiver, seuratObj, pct = 0.05) -## sender -sender_celltypes = c("CD4 T","Treg", "Mono", "NK", "B", "DC") +sender_celltypes <- c("CD4 T", "Treg", "Mono", "NK", "B", "DC") +list_expressed_genes_sender <- sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.05) +expressed_genes_sender <- list_expressed_genes_sender %>% unlist() %>% unique() -list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.10) # lapply to get the expressed genes of every sender cell type separately here -expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() +all_ligands <- unique(lr_network$from) +all_receptors <- unique(lr_network$to) -# 2. Define a gene set of interest: these are the genes in the “receiver/target” cell population that are potentially affected by ligands expressed by interacting cells (e.g. genes differentially expressed upon cell-cell interaction) +expressed_ligands <- intersect(all_ligands, expressed_genes_sender) +expressed_receptors <- intersect(all_receptors, expressed_genes_receiver) -seurat_obj_receiver= subset(seuratObj, idents = receiver) -seurat_obj_receiver = SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[["aggregate", drop=TRUE]]) +potential_ligands <- lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% + pull(from) %>% unique() -condition_oi = "LCMV" -condition_reference = "SS" - -DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = 0.10) %>% rownames_to_column("gene") -geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) -geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] +# 2. Define the gene set of interest +condition_oi <- "LCMV" +condition_reference <- "SS" + +seurat_obj_receiver <- subset(seuratObj, idents = receiver) -# 3. Define a set of potential ligands -ligands = lr_network %>% pull(from) %>% unique() -receptors = lr_network %>% pull(to) %>% unique() +DE_table_receiver <- FindMarkers(object = seurat_obj_receiver, + ident.1 = condition_oi, ident.2 = condition_reference, + group.by = "aggregate", + min.pct = 0.05) %>% rownames_to_column("gene") -expressed_ligands = intersect(ligands,expressed_genes_sender) -expressed_receptors = intersect(receptors,expressed_genes_receiver) +geneset_oi <- DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) +geneset_oi <- geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] -potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() +# 3. Define background genes +background_expressed_genes <- expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] # 4. Perform NicheNet ligand activity analysis -ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) +ligand_activities <- predict_ligand_activities(geneset = geneset_oi, + background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix, + potential_ligands = potential_ligands) -ligand_activities = ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) -ligand_activities +ligand_activities <- ligand_activities %>% arrange(-aupr_corrected) %>% + mutate(rank = rank(desc(aupr_corrected))) ``` -## Perform prioritization of ligand-receptor pairs +# Perform prioritization of ligand-receptor pairs -In addition to the NicheNet ligand activity (`activity_scaled`), you can prioritize based on: +We will prioritize ligand-receptor pairs based on the following criteria (with their corresponding weight names): * Upregulation of the ligand in a sender cell type compared to other cell types: `de_ligand` * Upregulation of the receptor in a receiver cell type: `de_receptor` @@ -130,157 +117,228 @@ In addition to the NicheNet ligand activity (`activity_scaled`), you can priorit * Condition-specificity of the ligand across all cell types: `ligand_condition_specificity` * Condition-specificity of the receptor across all cell types: `receptor_condition_specificity` -Note that the first four criteria are calculated only in the condition of interest. +This means that we will have to calculate: + +* Differential expression of the ligand/receptor in a sender/receiver cell type +* The average expression of each ligand/receptor in each sender/receiver cell type +* Differential expression of the ligand/receptor between the two conditions + +We provide a wrapper function `generate_info_tables` that will calculate all these values for you. This function returns a list with three dataframes: + +* `sender_receiver_de`: differential expression of the ligand and receptor in the sender-receiver cell type pair. These were first calculated separately (i.e., DE of ligand in sender cell type, DE of receptor in receiver cell type based on FindAllMarkers) and then combined based on possible interactions from the lr_network. +* `sender_receiver_info`: the average expression of the ligand and receptor in sender-receiver cell type pairs +* `lr_condition_de`: differential expression of the ligand and receptor between the two conditions across all cell types. + +Note that cell type specificity (i.e., the first four conditions) is calculated only in the condition of interest. + +The "scenario" argument can be either "case_control" or "one_condition". In "case_control" scenario, condition specificity is calculated. ```{r} -# By default, ligand_condition_specificty and receptor_condition_specificty are 0 -prioritizing_weights = c("de_ligand" = 1, - "de_receptor" = 1, - "activity_scaled" = 2, - "exprs_ligand" = 1, - "exprs_receptor" = 1, - "ligand_condition_specificity" = 0.5, - "receptor_condition_specificity" = 0.5) +lr_network_filtered <- lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) + +info_tables <- generate_info_tables(seuratObj, + celltype_colname = "celltype", + senders_oi = sender_celltypes, + receivers_oi = receiver, + lr_network = lr_network_filtered, + condition_colname = "aggregate", + condition_oi = condition_oi, + condition_reference = condition_reference, + scenario = "case_control") + +names(info_tables) +info_tables$sender_receiver_de %>% head() +info_tables$sender_receiver_info %>% head() +info_tables$lr_condition_de %>% head() ``` -We provide helper functions to calculate these values, including `calculate_de` and `get_exprs_avg`. `process_table_to_ic` transforms these different dataframes so they are compatible with the `generate_prioritization_tables` function. +Next, we generate the prioritization table. This table contains the rankings of ligand-receptor pairs based on the different criteria. We provide two scenarios: `case_control` and `one_condition`. In the "case_control" scenario, all weights are set to 1. If "one_condition", the weights are set to 0 for condition specificity and 1 for the remaining criteria. Users can also provide their own weights using the `prioritizing_weights` argument. ```{r} -lr_network_renamed <- lr_network %>% rename(ligand=from, receptor=to) +prior_table <- generate_prioritization_tables(info_tables$sender_receiver_info, + info_tables$sender_receiver_de, + ligand_activities, + info_tables$lr_condition_de, + scenario = "case_control") + +prior_table %>% head +``` +As you can see, the resulting table now show the rankings for *ligand-receptor interactions of a sender-receiver cell type pair*, instead of just the prioritized ligands. Cxcl10 now went up in the rankings due to both the high expression of its potential receptor Dpp4 and its high celltype specificity (`scaled_lfc_ligand`). You can also see this in the visualizations further below. + +We included all columns here, but if you just want relevant columns that were used to calculate the ranking: + +```{r} +prior_table %>% select(c('sender', 'receiver', 'ligand', 'receptor', 'scaled_p_val_ligand_adapted', 'scaled_p_val_receptor_adapted', 'scaled_avg_exprs_ligand', 'scaled_avg_exprs_receptor', 'scaled_p_val_ligand_adapted_group', 'scaled_p_val_receptor_adapted_group', 'scaled_activity')) +``` + +Note that we appended the suffix '_group' to columns that refer to differential expression between conditions, e.g., `lfc_ligand_group` and `lfc_receptor_group.` + +## Step-by-step prioritization + +`generate_info_tables` is a wrapper function that calculates all the information needed for the prioritization. However, in some cases you may need more flexibility on how these values are calculated (but note that you can pass extra arguments to `generate_info_tables` that will get passed on to `FindMarkers`, `FindAllMarkers`, and `AverageExpression`). Below, we show how we use helper functions `calculate_de` and `get_exprs_avg` to calculate the DE and get the average expression used for cell type specificity. `process_table_to_ic` transforms these different dataframes so they are compatible with the `generate_prioritization_tables` function. + +```{r} # Only calculate DE for LCMV condition, with genes that are in the ligand-receptor network -DE_table <- calculate_de(seuratObj, celltype_colname = "celltype", - condition_colname = "aggregate", condition_oi = condition_oi, - features = union(expressed_ligands, expressed_receptors)) +DE_table <- FindAllMarkers(subset(seuratObj, subset = aggregate == "LCMV"), + min.pct = 0, logfc.threshold = 0, return.thresh = 1, + features = unique(unlist(lr_network_filtered))) # Average expression information - only for LCMV condition -expression_info <- get_exprs_avg(seuratObj, "celltype", condition_colname = "aggregate", condition_oi = condition_oi) +expression_info <- get_exprs_avg(seuratObj, "celltype", condition_colname = "aggregate", condition_oi = condition_oi, + features = unique(unlist(lr_network_filtered))) # Calculate condition specificity - only for datasets with two conditions! condition_markers <- FindMarkers(object = seuratObj, ident.1 = condition_oi, ident.2 = condition_reference, group.by = "aggregate", min.pct = 0, logfc.threshold = 0, - features = union(expressed_ligands, expressed_receptors)) %>% rownames_to_column("gene") + features = unique(unlist(lr_network_filtered))) %>% rownames_to_column("gene") # Combine DE of senders and receivers -> used for prioritization -processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network_renamed, +processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network_filtered, senders_oi = sender_celltypes, receivers_oi = receiver) -processed_expr_table <- process_table_to_ic(expression_info, table_type = "expression", lr_network_renamed) +processed_expr_table <- process_table_to_ic(expression_info, table_type = "expression", lr_network_filtered) -processed_condition_markers <- process_table_to_ic(condition_markers, table_type = "group_DE", lr_network_renamed) +processed_condition_markers <- process_table_to_ic(condition_markers, table_type = "group_DE", lr_network_filtered) ``` -Finally we generate the prioritization table. The `lfc_ligand` and `lfc_receptor` columns are based on the differences between cell types within your condition of interest. This is equivalent to subsetting your Seurat object to only the condition of interest and running `Seurat::FindAllMarkers`. +And here is how you can define custom weights: -The columns that refer to differential expression between conditions are those with the _group suffix, e.g., `lfc_ligand_group` and `lfc_receptor_group.` These are celltype agnostic: they are calculated by using `Seurat::FindMarkers` between two conditions across all cell types. +```{r} +prioritizing_weights = c("de_ligand" = 1, + "de_receptor" = 1, + "activity_scaled" = 1, + "exprs_ligand" = 1, + "exprs_receptor" = 1, + "ligand_condition_specificity" = 1, + "receptor_condition_specificity" = 1) + +``` ```{r} prior_table <- generate_prioritization_tables(processed_expr_table, - processed_DE_table, - ligand_activities, - processed_condition_markers, - prioritizing_weights = prioritizing_weights) + processed_DE_table, + ligand_activities, + processed_condition_markers, + prioritizing_weights) + +prior_table %>% head +``` + +# Prioritizing across multiple receivers + +As NicheNet is a receiver-based pipeline, to prioritize ligand-receptor pairs across multiple receivers, we need to perform the NicheNet analysis for each receiver separately. Let's suppose we want to prioritize ligand-receptor pairs across all T cells (CD4, CD8, and Tregs). The CD8 T analysis has already been performed above. We will use the wrapper function to perform a basic NicheNet analysis on the other two: + +```{r} +nichenet_output <- lapply(c("CD4 T", "Treg"), function(receiver_ct){ + nichenet_seuratobj_aggregate(receiver = receiver_ct, + seurat_obj = seuratObj, + condition_colname = "aggregate", + condition_oi = condition_oi, + condition_reference = condition_reference, + sender = sender_celltypes, + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks, + expression_pct = 0.05) + +}) %>% setNames(c("CD4 T", "Treg")) -prior_table ``` -As you can see, the resulting table now show the rankings for *ligand-receptor interactions of a sender-receiver cell type pair*, instead of just the prioritized ligands. We included all columns here, but if you just want relevant columns that were used to calculate the ranking: +To generate the dataframes used for prioritization, we will simply change the `lr_network_filtered` argument to only calculate DE and expression values for ligand-receptor pairs of interest. ```{r} -prior_table %>% select(c('sender', 'receiver', 'ligand', 'receptor', 'scaled_lfc_ligand', 'scaled_lfc_receptor', 'scaled_p_val_ligand_adapted', 'scaled_p_val_receptor_adapted', 'scaled_avg_exprs_ligand', 'scaled_avg_exprs_receptor', 'scaled_lfc_ligand_group', 'scaled_lfc_receptor_group', 'scaled_activity')) +info_tables2 <- lapply(names(nichenet_output), function(receiver_ct) { + generate_info_tables(seuratObj, + celltype_colname = "celltype", + senders_oi = sender_celltypes, + receivers_oi = receiver_ct, + lr_network_filtered = lr_network %>% + filter(from %in% nichenet_output[[receiver_ct]]$ligand_activities$test_ligand & + to %in% nichenet_output[[receiver_ct]]$background_expressed_genes), + condition_colname = "aggregate", + condition_oi = condition_oi, + condition_reference = condition_reference, + scenario = "case_control") +}) + ``` -Cxcl10 now went up in the rankings due to both the high expression of its potential receptor Dpp4 and its high celltype specificity (`scaled_lfc_ligand`). You can also see this in the dotplot and heatmap below. +We can then combine the results from `generate_info_tables` using `bind_rows`, which will concatenate the rows together. For the ligand activities, we will also add an additional column containing the receiver cell type. Note that for the average expression table (`sender_receiver_info`) and condition specificity (`lr_condition_de`), we need to remove duplicate rows. -```{r fig.width = 15, fig.height = 10} -best_upstream_ligands = ligand_activities %>% top_n(20, aupr_corrected) %>% arrange(desc(aupr_corrected)) %>% pull(test_ligand) %>% unique() +```{r} +# Add CD8 T to list +info_tables2[[3]] <- info_tables -# DE analysis for each sender cell type -DE_table_all = Idents(seuratObj) %>% levels() %>% intersect(sender_celltypes) %>% - lapply(get_lfc_celltype, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = condition_oi, condition_reference = condition_reference, - expression_pct = 0.10, celltype_col = NULL) %>% reduce(full_join) -DE_table_all[is.na(DE_table_all)] = 0 +# bind rows of each element of info_tables using pmap +info_tables_combined <- purrr::pmap(info_tables2, bind_rows) -order_ligands <- make.names(best_upstream_ligands) %>% rev() +# Combine ligand activities and add receiver information +ligand_activities_combined <- bind_rows(nichenet_output$`CD4 T`$ligand_activities %>% mutate(receiver = "CD4 T"), + nichenet_output$Treg$ligand_activities %>% mutate(receiver = "Treg"), + ligand_activities %>% mutate(receiver = "CD8 T")) -# ligand activity heatmap -ligand_aupr_matrix <- ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) %>% - `rownames<-`(make.names(rownames(.))) %>% `colnames<-`(make.names(colnames(.))) +prior_table_combined <- generate_prioritization_tables( + sender_receiver_info = info_tables_combined$sender_receiver_info %>% distinct, + sender_receiver_de = info_tables_combined$sender_receiver_de, + ligand_activities = ligand_activities_combined, + lr_condition_de = info_tables_combined$lr_condition_de %>% distinct, + scenario = "case_control") + +head(prior_table_combined) -vis_ligand_aupr <- as.matrix(ligand_aupr_matrix[order_ligands, ], ncol=1) %>% magrittr::set_colnames("AUPR") -p_ligand_aupr <- make_heatmap_ggplot(vis_ligand_aupr, "Prioritized ligands","Ligand activity", - color = "darkorange",legend_position = "top", x_axis_position = "top", - legend_title = "AUPR\ntarget gene prediction ability)") + - theme(legend.text = element_text(size = 9)) - - -# LFC heatmap -# First combine ligand activities with DE information and make -ligand_activities_de <- ligand_activities %>% select(test_ligand, aupr_corrected) %>% rename(ligand = test_ligand) %>% left_join(DE_table_all %>% rename(ligand = gene)) -ligand_activities_de[is.na(ligand_activities_de)] <- 0 -lfc_matrix <- ligand_activities_de %>% select(-ligand, -aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities_de$ligand) %>% - `rownames<-`(make.names(rownames(.))) %>% `colnames<-`(make.names(colnames(.))) -vis_ligand_lfc <- lfc_matrix[order_ligands,] - -p_ligand_lfc <- make_threecolor_heatmap_ggplot(vis_ligand_lfc, "Prioritized ligands","LFC in Sender", - low_color = "midnightblue", mid_color = "white", mid = median(vis_ligand_lfc), high_color = "red", - legend_position = "top", x_axis_position = "top", legend_title = "LFC") + - theme(axis.text.y = element_text(face = "italic")) - - -# ligand expression Seurat dotplot -order_ligands_adapted <- str_replace_all(order_ligands, "\\.", "-") -rotated_dotplot <- DotPlot(seuratObj %>% subset(celltype %in% sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + - # flip of coordinates necessary because we want to show ligands in the rows when combining all plots - coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) - -# Combine figures and legend separately -figures_without_legend <- cowplot::plot_grid( - p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), - axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + - ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), - p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), - align = "hv", - nrow = 1, - rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8)) - -legends <- cowplot::plot_grid( - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), - ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), - nrow = 1, - align = "h", rel_widths = c(1.5, 1, 1)) - -combined_plot <- cowplot::plot_grid(figures_without_legend, legends, nrow = 2, align = "hv") -print(combined_plot) ``` ### Extra visualization of ligand-receptor pairs -We provide the function `make_mushroom_plot` which allows you to display expression of ligand-receptor pairs in semicircles. By default, the fill gradient shows the LFC between cell types, while the size of the semicircle corresponds to the scaled mean expression. +In addition to the usual heatmap visualizations, we provide a function `make_circos_lr` to visualize the ligand-receptor pairs in a circos plot. This was originally written for the (now deprecated) Differential NicheNet vignettes. The function takes in a prioritization table and a named vector for the color of senders and receivers. We first specify the number of top ligand-receptor pairs to show with `n`. + +```{r lr-circos-unused, fig.show = "hide"} +# Get top n ligand-receptor pairs +prior_table_oi <- prior_table_combined %>% slice_max(prioritization_score, n = 50) -```{r fig.height=8, fig.width=6} -make_mushroom_plot(prior_table, top_n = 30) +# Define colors for senders and receivers +senders_receivers <- prior_table_oi %>% select(sender, receiver) %>% unlist %>% unique %>% sort +celltype_colors <- RColorBrewer::brewer.pal(length(senders_receivers), name = 'Set3') %>% + magrittr::set_names(senders_receivers) + +circos_plot <- make_circos_lr(prior_table_oi, + colors_sender = celltype_colors, colors_receiver = celltype_colors) + +``` + +```{r lr-circos, fig.width = 8, fig.height = 8} +circos_plot ``` -We provide multiple ways to customize this plot, including changing the "size" and "fill" values to certain columns from the prioritization table (but without the `_ligand` or `_receptor` suffix). In addition, you can also choose to show the rankings of each ligand-receptor-sender pair, as well as show all data points for context. +Furthermore, we provide the function `make_mushroom_plot` which allows you to display expression of ligand-receptor pairs in a specific receiver. By default, the fill gradient shows the LFC between cell types, while the size of the semicircle corresponds to the scaled mean expression. You can also choose to show the rankings of each ligand-receptor-sender pair with `show_rankings`, as well as show all data points for context (`show_all_datapoints`). `true_color_range = TRUE` will adjust the limits of the color gradient to the min-max of the values, instead of the limit being from 0 to 1. Note that the numbers displayed here are the rankings within the chosen cell type and not across all receiver cell types (in case of multiple receivers). + +```{r mushroom-plot-1, fig.height=8, fig.width=8} +receiver_oi <- "CD8 T" +legend_adjust <- c(0.7, 0.7) +make_mushroom_plot(prior_table_combined %>% filter(receiver == receiver_oi), + top_n = 30, + true_color_range = TRUE, + show_rankings = TRUE, + show_all_datapoints = TRUE) + + theme(legend.justification = legend_adjust, + axis.title.x = element_text(hjust = 0.25)) +``` + +Furthermore, you can change the "size" and "fill" values to certain columns from the prioritization table (those with the `_ligand` or `_receptor` suffix). -```{r fig.height=8, fig.width=6} +```{r mushroom-plot-2, fig.height=8, fig.width=6} print(paste0("Column names that you can use are: ", paste0(prior_table %>% select(ends_with(c("_ligand", "_receptor", "_sender", "_receiver"))) %>% colnames() %>% str_remove("_ligand|_receptor|_sender|_receiver") %>% unique, collapse = ", "))) # Change size and color columns -make_mushroom_plot(prior_table, top_n = 30, size = "pct_expressed", color = "scaled_avg_exprs") +make_mushroom_plot(prior_table, top_n = 30, size = "pct_expressed", color = "scaled_avg_exprs") + + theme(legend.justification = legend_adjust, + axis.title.x = element_text(hjust = 0.25)) -# Show rankings and other datapoints -make_mushroom_plot(prior_table, top_n = 30, show_rankings = TRUE, show_all_datapoints = TRUE) - -# Show true limits instead of having it from 0 to 1 -make_mushroom_plot(prior_table, top_n = 30, true_color_range = TRUE) ``` - ```{r} sessionInfo() ``` diff --git a/vignettes/seurat_steps_prioritization.md b/vignettes/seurat_steps_prioritization.md index 03fa786..7a51449 100644 --- a/vignettes/seurat_steps_prioritization.md +++ b/vignettes/seurat_steps_prioritization.md @@ -1,4 +1,4 @@ -Perform NicheNet analysis with prioritization +Prioritization of ligands based on expression values ================ Robin Browaeys & Chananchida Sang-aram 2023-10-02 @@ -7,20 +7,6 @@ Robin Browaeys & Chananchida Sang-aram rmarkdown::render("vignettes/seurat_steps_prioritization.Rmd", output_format = "github_document") --> -``` r -### Load Packages -library(nichenetr) # Please update to v2.0.4 -library(Seurat) -library(SeuratObject) -library(tidyverse) - -### Read in Seurat object -seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) - -# For newer Seurat versions, you may need to run the following -seuratObj <- UpdateSeuratObject(seuratObj) -``` - In this vignette, we will extend the basic NicheNet analysis analysis from [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md) by incorporating gene expression @@ -29,175 +15,102 @@ as part of the prioritization This is a generalization of the [MultiNicheNet](https://github.com/saeyslab/multinichenetr) approach. While the original NicheNet only ranks ligands based on the ligand activity analysis, it is now also possible to prioritize ligands based -on upregulation of the ligand/receptor, and the cell-type and condition -specificity of hte ligand and receptor. - -Make sure you understand the different steps in a NicheNet analysis that -are described in that vignette before proceeding with this vignette and -performing a real NicheNet analysis on your data. - -We will again make use of mouse NICHE-seq data from Medaglia et al. to -explore intercellular communication in the T cell area in the inguinal -lymph node before and 72 hours after lymphocytic choriomeningitis virus -(LCMV) infection (Medaglia et al. 2017). We will NicheNet to explore -immune cell crosstalk in response to this LCMV infection. In this -dataset, differential expression is observed between CD8 T cells in -steady-state and CD8 T cells after LCMV infection. NicheNet can be -applied to look at how several immune cell populations in the lymph node -(i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can -regulate and induce these observed gene expression changes. NicheNet -will specifically prioritize ligands from these immune cells and their -target genes that change in expression upon LCMV infection. - -Hence, we have to make some additional calculations, including DE of the -ligand/receptor in a sender/receiver cell type, and the average -expression of each ligand/receptor in each sender/receiver cell type. -The DE analysis boils down to computing pairwise tests between the cell -type of interest and other cell types in the dataset. We will subset the -data to only the condition of interest, “LCMV”. For this analysis we -will consider all cell types as both sender and receiver, as we want the -ligand/receptor to be specific. - -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) -and the [Seurat object of the processed NICHE-seq single-cell +on cell type and condition specificity of the ligand and receptor. + +We will again make use of mouse NICHE-seq data to explore intercellular +communication in the T cell area in the inguinal lymph node before and +72 hours after lymphocytic choriomeningitis virus (LCMV) infection +(Medaglia et al. 2017). The used [ligand-target +matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object +of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. -## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks. +Make sure you understand the different steps in a NicheNet analysis that +are described in the basic vignette before proceeding with this +vignette. + +# Prepare NicheNet analysis -The NicheNet ligand-receptor network and weighted networks are necessary -to define and show possible ligand-receptor interactions between two -cell populations. The ligand-target matrix denotes the prior potential -that particular ligands might regulate the expression of particular -target genes. This matrix is necessary to prioritize possible -ligand-receptor interactions based on observed gene expression effects -(i.e. NicheNet’s ligand activity analysis) and infer affected target -genes of these prioritized ligands. +Load required packages, read in the Seurat object with processed +expression data of interacting cells and NicheNet’s ligand-target prior +model, ligand-receptor network and weighted integrated networks. ``` r -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) -ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) -weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) - -lr_network = lr_network %>% distinct(from, to) -head(lr_network) -## # A tibble: 6 × 2 -## from to -## -## 1 2300002M23Rik Ddr1 -## 2 2610528A11Rik Gpr15 -## 3 9530003J23Rik Itgal -## 4 a Atrn -## 5 a F11r -## 6 a Mc1r -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## 2300002M23Rik 2610528A11Rik 9530003J23Rik a A2m -## 0610005C13Rik 0.000000e+00 0.000000e+00 1.311297e-05 0.000000e+00 1.390053e-05 -## 0610009B22Rik 0.000000e+00 0.000000e+00 1.269301e-05 0.000000e+00 1.345536e-05 -## 0610009L18Rik 8.872902e-05 4.977197e-05 2.581909e-04 7.570125e-05 9.802264e-05 -## 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 2.585820e-03 -## 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 2.632082e-03 - -weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) -head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network -## # A tibble: 6 × 3 -## from to weight -## -## 1 0610010F05Rik App 0.110 -## 2 0610010F05Rik Cat 0.0673 -## 3 0610010F05Rik H1f2 0.0660 -## 4 0610010F05Rik Lrrc49 0.0829 -## 5 0610010F05Rik Nicn1 0.0864 -## 6 0610010F05Rik Srpk1 0.123 -head(weighted_networks$gr) # interactions and their weights in the gene regulatory network -## # A tibble: 6 × 3 -## from to weight -## -## 1 0610010K14Rik 0610010K14Rik 0.121 -## 2 0610010K14Rik 2510039O18Rik 0.121 -## 3 0610010K14Rik 2610021A01Rik 0.0256 -## 4 0610010K14Rik 9130401M01Rik 0.0263 -## 5 0610010K14Rik Alg1 0.127 -## 6 0610010K14Rik Alox12 0.128 - -seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") +library(nichenetr) # Please update to v2.0.6 +library(Seurat) +library(SeuratObject) +library(tidyverse) ``` -# Perform the NicheNet analysis +``` r +# Read Seurat object +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- UpdateSeuratObject(seuratObj) +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") -In this case study, we want to apply NicheNet to predict which ligands -expressed by all immune cells in the T cell area of the lymph node are -most likely to have induced the differential expression in CD8 T cells -after LCMV infection. +# Load in networks +lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) -As described in the main vignette, the pipeline of a basic NicheNet -analysis consist of the following steps: +lr_network <- lr_network %>% distinct(from, to) +``` -In this case study, the receiver cell population is the ‘CD8 T’ cell -population, whereas the sender cell populations are ‘CD4 T’, ‘Treg’, -‘Mono’, ‘NK’, ‘B’ and ‘DC’. We will consider a gene to be expressed when -it is expressed in at least 10% of cells in one cluster. +# Perform the NicheNet analysis + +We will use the sender-focused approach here. ``` r -# 1. Define a “sender/niche” cell population and a “receiver/target” cell population present in your expression data and determine which genes are expressed in both populations -## receiver -receiver = "CD8 T" -expressed_genes_receiver = get_expressed_genes(receiver, seuratObj, pct = 0.10) -background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] +# 1. Define set of potential ligands +receiver <- "CD8 T" +expressed_genes_receiver <- get_expressed_genes(receiver, seuratObj, pct = 0.05) -## sender -sender_celltypes = c("CD4 T","Treg", "Mono", "NK", "B", "DC") +sender_celltypes <- c("CD4 T", "Treg", "Mono", "NK", "B", "DC") +list_expressed_genes_sender <- sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.05) +expressed_genes_sender <- list_expressed_genes_sender %>% unlist() %>% unique() -list_expressed_genes_sender = sender_celltypes %>% unique() %>% lapply(get_expressed_genes, seuratObj, 0.10) # lapply to get the expressed genes of every sender cell type separately here -expressed_genes_sender = list_expressed_genes_sender %>% unlist() %>% unique() +all_ligands <- unique(lr_network$from) +all_receptors <- unique(lr_network$to) -# 2. Define a gene set of interest: these are the genes in the “receiver/target” cell population that are potentially affected by ligands expressed by interacting cells (e.g. genes differentially expressed upon cell-cell interaction) +expressed_ligands <- intersect(all_ligands, expressed_genes_sender) +expressed_receptors <- intersect(all_receptors, expressed_genes_receiver) -seurat_obj_receiver= subset(seuratObj, idents = receiver) -seurat_obj_receiver = SetIdent(seurat_obj_receiver, value = seurat_obj_receiver[["aggregate", drop=TRUE]]) +potential_ligands <- lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% + pull(from) %>% unique() -condition_oi = "LCMV" -condition_reference = "SS" - -DE_table_receiver = FindMarkers(object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = 0.10) %>% rownames_to_column("gene") -geneset_oi = DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) -geneset_oi = geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] +# 2. Define the gene set of interest +condition_oi <- "LCMV" +condition_reference <- "SS" -# 3. Define a set of potential ligands -ligands = lr_network %>% pull(from) %>% unique() -receptors = lr_network %>% pull(to) %>% unique() +seurat_obj_receiver <- subset(seuratObj, idents = receiver) -expressed_ligands = intersect(ligands,expressed_genes_sender) -expressed_receptors = intersect(receptors,expressed_genes_receiver) +DE_table_receiver <- FindMarkers(object = seurat_obj_receiver, + ident.1 = condition_oi, ident.2 = condition_reference, + group.by = "aggregate", + min.pct = 0.05) %>% rownames_to_column("gene") -potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() +geneset_oi <- DE_table_receiver %>% filter(p_val_adj <= 0.05 & abs(avg_log2FC) >= 0.25) %>% pull(gene) +geneset_oi <- geneset_oi %>% .[. %in% rownames(ligand_target_matrix)] + +# 3. Define background genes +background_expressed_genes <- expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)] # 4. Perform NicheNet ligand activity analysis -ligand_activities = predict_ligand_activities(geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) - -ligand_activities = ligand_activities %>% arrange(-aupr_corrected) %>% mutate(rank = rank(desc(aupr_corrected))) -ligand_activities -## # A tibble: 73 × 6 -## test_ligand auroc aupr aupr_corrected pearson rank -## -## 1 Ebi3 0.663 0.390 0.244 0.301 1 -## 2 Ptprc 0.642 0.310 0.165 0.167 2 -## 3 H2-M3 0.608 0.292 0.146 0.179 3 -## 4 H2-M2 0.611 0.279 0.133 0.153 5 -## 5 H2-T10 0.611 0.279 0.133 0.153 5 -## 6 H2-T22 0.611 0.279 0.133 0.153 5 -## 7 H2-T23 0.611 0.278 0.132 0.153 7 -## 8 H2-K1 0.605 0.268 0.122 0.142 8 -## 9 H2-Q4 0.605 0.268 0.122 0.141 10 -## 10 H2-Q6 0.605 0.268 0.122 0.141 10 -## # ℹ 63 more rows +ligand_activities <- predict_ligand_activities(geneset = geneset_oi, + background_expressed_genes = background_expressed_genes, + ligand_target_matrix = ligand_target_matrix, + potential_ligands = potential_ligands) + +ligand_activities <- ligand_activities %>% arrange(-aupr_corrected) %>% + mutate(rank = rank(desc(aupr_corrected))) ``` -## Perform prioritization of ligand-receptor pairs +# Perform prioritization of ligand-receptor pairs -In addition to the NicheNet ligand activity (`activity_scaled`), you can -prioritize based on: +We will prioritize ligand-receptor pairs based on the following criteria +(with their corresponding weight names): - Upregulation of the ligand in a sender cell type compared to other cell types: `de_ligand` @@ -211,236 +124,408 @@ prioritize based on: - Condition-specificity of the receptor across all cell types: `receptor_condition_specificity` -Note that the first four criteria are calculated only in the condition -of interest. +This means that we will have to calculate: + +- Differential expression of the ligand/receptor in a sender/receiver + cell type +- The average expression of each ligand/receptor in each sender/receiver + cell type +- Differential expression of the ligand/receptor between the two + conditions + +We provide a wrapper function `generate_info_tables` that will calculate +all these values for you. This function returns a list with three +dataframes: + +- `sender_receiver_de`: differential expression of the ligand and + receptor in the sender-receiver cell type pair. These were first + calculated separately (i.e., DE of ligand in sender cell type, DE of + receptor in receiver cell type based on FindAllMarkers) and then + combined based on possible interactions from the lr_network. +- `sender_receiver_info`: the average expression of the ligand and + receptor in sender-receiver cell type pairs +- `lr_condition_de`: differential expression of the ligand and receptor + between the two conditions across all cell types. + +Note that cell type specificity (i.e., the first four conditions) is +calculated only in the condition of interest. + +The “scenario” argument can be either “case_control” or “one_condition”. +In “case_control” scenario, condition specificity is calculated. ``` r -# By default, ligand_condition_specificty and receptor_condition_specificty are 0 -prioritizing_weights = c("de_ligand" = 1, - "de_receptor" = 1, - "activity_scaled" = 2, - "exprs_ligand" = 1, - "exprs_receptor" = 1, - "ligand_condition_specificity" = 0.5, - "receptor_condition_specificity" = 0.5) +lr_network_filtered <- lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) + +info_tables <- generate_info_tables(seuratObj, + celltype_colname = "celltype", + senders_oi = sender_celltypes, + receivers_oi = receiver, + lr_network = lr_network_filtered, + condition_colname = "aggregate", + condition_oi = condition_oi, + condition_reference = condition_reference, + scenario = "case_control") + +names(info_tables) +## [1] "sender_receiver_de" "sender_receiver_info" "lr_condition_de" +info_tables$sender_receiver_de %>% head() +## sender receiver ligand receptor lfc_ligand lfc_receptor ligand_receptor_lfc_avg p_val_ligand p_adj_ligand p_val_receptor p_adj_receptor pct_expressed_sender +## 1 DC CD8 T Ccl5 Cxcr3 6.432043 0.16714791 3.299595 1.893317e-25 2.563740e-21 7.758812e-05 1.000000e+00 1.000 +## 2 Mono CD8 T Lyz2 Itgal 5.493265 -0.01687003 2.738198 1.728697e-160 2.340828e-156 4.973381e-02 1.000000e+00 0.933 +## 3 DC CD8 T H2-M2 Cd8a 3.416479 1.94059972 2.678539 1.017174e-272 1.377355e-268 5.250531e-206 7.109745e-202 0.429 +## 4 DC CD8 T Cxcl16 Cxcr6 4.182085 0.54826454 2.365175 1.138617e-243 1.541801e-239 5.987787e-21 8.108063e-17 0.929 +## 5 Mono CD8 T Cxcl9 Cxcr3 4.328801 0.16714791 2.247975 3.834954e-124 5.192911e-120 7.758812e-05 1.000000e+00 0.547 +## 6 Mono CD8 T Cxcl9 Dpp4 4.328801 0.16416445 2.246483 3.834954e-124 5.192911e-120 6.628900e-04 1.000000e+00 0.547 +## pct_expressed_receiver +## 1 0.042 +## 2 0.188 +## 3 0.659 +## 4 0.089 +## 5 0.042 +## 6 0.148 +info_tables$sender_receiver_info %>% head() +## # A tibble: 6 × 7 +## sender receiver ligand receptor avg_ligand avg_receptor ligand_receptor_prod +## +## 1 DC Mono B2m Tap1 216. 8.59 1856. +## 2 DC NK B2m Klrd1 216. 7.43 1607. +## 3 DC B B2m Tap1 216. 7.35 1588. +## 4 DC Treg B2m Tap1 216. 7.18 1552. +## 5 Mono Mono B2m Tap1 158. 8.59 1353. +## 6 DC DC B2m Tap1 216. 5.91 1277. +info_tables$lr_condition_de %>% head() +## ligand receptor lfc_ligand lfc_receptor ligand_receptor_lfc_avg p_val_ligand p_adj_ligand p_val_receptor p_adj_receptor +## 1 H2-Ab1 Cd4 2.4021254 0.11569357 1.2589095 4.424390e-06 5.991066e-02 5.634068e-02 1.000000e+00 +## 2 Cxcl10 Dpp4 1.6066163 0.35175421 0.9791853 6.700636e-29 9.073332e-25 1.170731e-06 1.585287e-02 +## 3 B2m Tap1 0.7071427 1.13931050 0.9232266 6.936359e-174 9.392524e-170 3.585450e-52 4.855057e-48 +## 4 H2-T22 Klrd1 1.5223370 -0.05659737 0.7328698 1.006291e-111 1.362618e-107 6.202530e-01 1.000000e+00 +## 5 H2-T23 Klrd1 1.4651999 -0.05659737 0.7043013 1.789643e-114 2.423356e-110 6.202530e-01 1.000000e+00 +## 6 Cxcl10 Cxcr3 1.6066163 -0.25400642 0.6763049 6.700636e-29 9.073332e-25 1.918372e-06 2.597667e-02 ``` -We provide helper functions to calculate these values, including -`calculate_de` and `get_exprs_avg`. `process_table_to_ic` transforms -these different dataframes so they are compatible with the -`generate_prioritization_tables` function. +Next, we generate the prioritization table. This table contains the +rankings of ligand-receptor pairs based on the different criteria. We +provide two scenarios: `case_control` and `one_condition`. In the +“case_control” scenario, all weights are set to 1. If “one_condition”, +the weights are set to 0 for condition specificity and 1 for the +remaining criteria. Users can also provide their own weights using the +`prioritizing_weights` argument. ``` r -lr_network_renamed <- lr_network %>% rename(ligand=from, receptor=to) +prior_table <- generate_prioritization_tables(info_tables$sender_receiver_info, + info_tables$sender_receiver_de, + ligand_activities, + info_tables$lr_condition_de, + scenario = "case_control") + +prior_table %>% head +## # A tibble: 6 × 51 +## sender receiver ligand receptor lfc_ligand lfc_receptor ligand_receptor_lfc_avg p_val_ligand p_adj_ligand p_val_receptor p_adj_receptor pct_expressed_sender +## +## 1 NK CD8 T Ptprc Dpp4 0.596 0.164 0.380 2.18e- 7 2.96e- 3 0.000663 1 0.894 +## 2 Mono CD8 T Ptprc Dpp4 0.438 0.164 0.301 3.52e- 5 4.77e- 1 0.000663 1 0.867 +## 3 Mono CD8 T Cxcl10 Dpp4 4.27 0.164 2.22 2.53e- 79 3.43e- 75 0.000663 1 0.867 +## 4 Mono CD8 T Cxcl9 Dpp4 4.33 0.164 2.25 3.83e-124 5.19e-120 0.000663 1 0.547 +## 5 Treg CD8 T Ptprc Dpp4 0.282 0.164 0.223 1.44e- 2 1 e+ 0 0.000663 1 0.685 +## 6 Mono CD8 T Cxcl11 Dpp4 2.36 0.164 1.26 9.28e-121 1.26e-116 0.000663 1 0.307 +## # ℹ 39 more variables: pct_expressed_receiver , avg_ligand , avg_receptor , ligand_receptor_prod , lfc_pval_ligand , +## # p_val_ligand_adapted , scaled_lfc_ligand , scaled_p_val_ligand , scaled_lfc_pval_ligand , scaled_p_val_ligand_adapted , activity , +## # rank , activity_zscore , scaled_activity , lfc_pval_receptor , p_val_receptor_adapted , scaled_lfc_receptor , +## # scaled_p_val_receptor , scaled_lfc_pval_receptor , scaled_p_val_receptor_adapted , scaled_avg_exprs_ligand , +## # scaled_avg_exprs_receptor , lfc_ligand_group , p_val_ligand_group , lfc_pval_ligand_group , p_val_ligand_adapted_group , +## # scaled_lfc_ligand_group , scaled_p_val_ligand_group , scaled_lfc_pval_ligand_group , scaled_p_val_ligand_adapted_group , +## # lfc_receptor_group , p_val_receptor_group , lfc_pval_receptor_group , p_val_receptor_adapted_group , scaled_lfc_receptor_group , … +``` + +As you can see, the resulting table now show the rankings for +*ligand-receptor interactions of a sender-receiver cell type pair*, +instead of just the prioritized ligands. Cxcl10 now went up in the +rankings due to both the high expression of its potential receptor Dpp4 +and its high celltype specificity (`scaled_lfc_ligand`). You can also +see this in the visualizations further below. +We included all columns here, but if you just want relevant columns that +were used to calculate the ranking: + +``` r +prior_table %>% select(c('sender', 'receiver', 'ligand', 'receptor', 'scaled_p_val_ligand_adapted', 'scaled_p_val_receptor_adapted', 'scaled_avg_exprs_ligand', 'scaled_avg_exprs_receptor', 'scaled_p_val_ligand_adapted_group', 'scaled_p_val_receptor_adapted_group', 'scaled_activity')) +## # A tibble: 1,272 × 11 +## sender receiver ligand receptor scaled_p_val_ligand_adapted scaled_p_val_receptor_adapted scaled_avg_exprs_ligand scaled_avg_exprs_receptor scaled_p_val_ligand_…¹ +## +## 1 NK CD8 T Ptprc Dpp4 0.869 0.829 1.00 1.00 0.850 +## 2 Mono CD8 T Ptprc Dpp4 0.841 0.829 0.867 1.00 0.850 +## 3 Mono CD8 T Cxcl10 Dpp4 0.960 0.829 1.00 1.00 0.929 +## 4 Mono CD8 T Cxcl9 Dpp4 0.975 0.829 1.00 1.00 0.787 +## 5 Treg CD8 T Ptprc Dpp4 0.756 0.829 0.741 1.00 0.850 +## 6 Mono CD8 T Cxcl11 Dpp4 0.973 0.829 1.00 1.00 0.732 +## 7 B CD8 T Ptprc Dpp4 0.748 0.829 0.666 1.00 0.850 +## 8 DC CD8 T Icam1 Il2rg 0.876 0.714 1.00 0.995 0.717 +## 9 DC CD8 T Ccl22 Dpp4 0.997 0.829 1.00 1.00 0.539 +## 10 NK CD8 T Cd320 Jaml 0.889 0.943 0.905 1.00 0.472 +## # ℹ 1,262 more rows +## # ℹ abbreviated name: ¹​scaled_p_val_ligand_adapted_group +## # ℹ 2 more variables: scaled_p_val_receptor_adapted_group , scaled_activity +``` + +Note that we appended the suffix ’\_group’ to columns that refer to +differential expression between conditions, e.g., `lfc_ligand_group` and +`lfc_receptor_group.` + +## Step-by-step prioritization + +`generate_info_tables` is a wrapper function that calculates all the +information needed for the prioritization. However, in some cases you +may need more flexibility on how these values are calculated (but note +that you can pass extra arguments to `generate_info_tables` that will +get passed on to `FindMarkers`, `FindAllMarkers`, and +`AverageExpression`). Below, we show how we use helper functions +`calculate_de` and `get_exprs_avg` to calculate the DE and get the +average expression used for cell type specificity. `process_table_to_ic` +transforms these different dataframes so they are compatible with the +`generate_prioritization_tables` function. + +``` r # Only calculate DE for LCMV condition, with genes that are in the ligand-receptor network -DE_table <- calculate_de(seuratObj, celltype_colname = "celltype", - condition_colname = "aggregate", condition_oi = condition_oi, - features = union(expressed_ligands, expressed_receptors)) +DE_table <- FindAllMarkers(subset(seuratObj, subset = aggregate == "LCMV"), + min.pct = 0, logfc.threshold = 0, return.thresh = 1, + features = unique(unlist(lr_network_filtered))) # Average expression information - only for LCMV condition -expression_info <- get_exprs_avg(seuratObj, "celltype", condition_colname = "aggregate", condition_oi = condition_oi) +expression_info <- get_exprs_avg(seuratObj, "celltype", condition_colname = "aggregate", condition_oi = condition_oi, + features = unique(unlist(lr_network_filtered))) # Calculate condition specificity - only for datasets with two conditions! condition_markers <- FindMarkers(object = seuratObj, ident.1 = condition_oi, ident.2 = condition_reference, group.by = "aggregate", min.pct = 0, logfc.threshold = 0, - features = union(expressed_ligands, expressed_receptors)) %>% rownames_to_column("gene") + features = unique(unlist(lr_network_filtered))) %>% rownames_to_column("gene") # Combine DE of senders and receivers -> used for prioritization -processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network_renamed, +processed_DE_table <- process_table_to_ic(DE_table, table_type = "celltype_DE", lr_network_filtered, senders_oi = sender_celltypes, receivers_oi = receiver) -processed_expr_table <- process_table_to_ic(expression_info, table_type = "expression", lr_network_renamed) +processed_expr_table <- process_table_to_ic(expression_info, table_type = "expression", lr_network_filtered) -processed_condition_markers <- process_table_to_ic(condition_markers, table_type = "group_DE", lr_network_renamed) +processed_condition_markers <- process_table_to_ic(condition_markers, table_type = "group_DE", lr_network_filtered) ``` -Finally we generate the prioritization table. The `lfc_ligand` and -`lfc_receptor` columns are based on the differences between cell types -within your condition of interest. This is equivalent to subsetting your -Seurat object to only the condition of interest and running -`Seurat::FindAllMarkers`. +And here is how you can define custom weights: -The columns that refer to differential expression between conditions are -those with the \_group suffix, e.g., `lfc_ligand_group` and -`lfc_receptor_group.` These are celltype agnostic: they are calculated -by using `Seurat::FindMarkers` between two conditions across all cell -types. +``` r +prioritizing_weights = c("de_ligand" = 1, + "de_receptor" = 1, + "activity_scaled" = 1, + "exprs_ligand" = 1, + "exprs_receptor" = 1, + "ligand_condition_specificity" = 1, + "receptor_condition_specificity" = 1) +``` ``` r prior_table <- generate_prioritization_tables(processed_expr_table, - processed_DE_table, - ligand_activities, - processed_condition_markers, - prioritizing_weights = prioritizing_weights) - -prior_table -## # A tibble: 858 × 51 -## sender receiver ligand receptor lfc_ligand lfc_receptor ligand_receptor_lfc_avg p_val_ligand p_adj_ligand p_val_receptor p_adj_receptor pct_expressed_sender pct_expressed_receiver avg_ligand avg_receptor ligand_receptor_prod -## -## 1 NK CD8 T Ptprc Dpp4 0.596 0.164 0.380 2.18e- 7 2.96e- 3 6.63e- 4 1 e+ 0 0.894 0.148 16.6 1.35 22.5 -## 2 Mono CD8 T Ptprc Dpp4 0.438 0.164 0.301 3.52e- 5 4.77e- 1 6.63e- 4 1 e+ 0 0.867 0.148 14.9 1.35 20.1 -## 3 Mono CD8 T Ebi3 Il27ra 0.580 0.0609 0.320 9.77e- 49 1.32e- 44 8.28e- 4 1 e+ 0 0.147 0.131 0.546 1.13 0.619 -## 4 Treg CD8 T Ptprc Dpp4 0.282 0.164 0.223 1.44e- 2 1 e+ 0 6.63e- 4 1 e+ 0 0.685 0.148 13.2 1.35 17.9 -## 5 Mono CD8 T Cxcl10 Dpp4 4.27 0.164 2.22 2.53e- 79 3.43e- 75 6.63e- 4 1 e+ 0 0.867 0.148 54.8 1.35 74.1 -## 6 DC CD8 T H2-M2 Cd8a 3.42 1.94 2.68 1.02e-272 1.38e-268 5.25e-206 7.11e-202 0.429 0.659 9.73 10.7 104. -## 7 B CD8 T H2-M3 Cd8a 0.312 1.94 1.13 2.42e- 2 1 e+ 0 5.25e-206 7.11e-202 0.16 0.659 1.59 10.7 17.0 -## 8 DC CD8 T H2-D1 Cd8a 1.15 1.94 1.55 2.60e- 7 3.52e- 3 5.25e-206 7.11e-202 1 0.659 60.7 10.7 651. -## 9 Mono CD8 T H2-T22 Cd8a 0.362 1.94 1.15 7.36e- 4 1 e+ 0 5.25e-206 7.11e-202 0.813 0.659 10.4 10.7 111. -## 10 DC CD8 T Ccl22 Dpp4 2.87 0.164 1.52 1.59e-296 2.15e-292 6.63e- 4 1 e+ 0 0.5 0.148 6.37 1.35 8.61 -## # ℹ 848 more rows -## # ℹ 35 more variables: lfc_pval_ligand , p_val_ligand_adapted , scaled_lfc_ligand , scaled_p_val_ligand , scaled_lfc_pval_ligand , scaled_p_val_ligand_adapted , activity , rank , -## # activity_zscore , scaled_activity , lfc_pval_receptor , p_val_receptor_adapted , scaled_lfc_receptor , scaled_p_val_receptor , scaled_lfc_pval_receptor , scaled_p_val_receptor_adapted , -## # scaled_avg_exprs_ligand , scaled_avg_exprs_receptor , lfc_ligand_group , p_val_ligand_group , lfc_pval_ligand_group , p_val_ligand_adapted_group , scaled_lfc_ligand_group , -## # scaled_p_val_ligand_group , scaled_lfc_pval_ligand_group , scaled_p_val_ligand_adapted_group , lfc_receptor_group , p_val_receptor_group , lfc_pval_receptor_group , -## # p_val_receptor_adapted_group , scaled_lfc_receptor_group , scaled_p_val_receptor_group , scaled_lfc_pval_receptor_group , scaled_p_val_receptor_adapted_group , prioritization_score + processed_DE_table, + ligand_activities, + processed_condition_markers, + prioritizing_weights) + +prior_table %>% head +## # A tibble: 6 × 51 +## sender receiver ligand receptor lfc_ligand lfc_receptor ligand_receptor_lfc_avg p_val_ligand p_adj_ligand p_val_receptor p_adj_receptor pct_expressed_sender +## +## 1 NK CD8 T Ptprc Dpp4 0.596 0.164 0.380 2.18e- 7 2.96e- 3 0.000663 1 0.894 +## 2 Mono CD8 T Ptprc Dpp4 0.438 0.164 0.301 3.52e- 5 4.77e- 1 0.000663 1 0.867 +## 3 Mono CD8 T Cxcl10 Dpp4 4.27 0.164 2.22 2.53e- 79 3.43e- 75 0.000663 1 0.867 +## 4 Mono CD8 T Cxcl9 Dpp4 4.33 0.164 2.25 3.83e-124 5.19e-120 0.000663 1 0.547 +## 5 Treg CD8 T Ptprc Dpp4 0.282 0.164 0.223 1.44e- 2 1 e+ 0 0.000663 1 0.685 +## 6 Mono CD8 T Cxcl11 Dpp4 2.36 0.164 1.26 9.28e-121 1.26e-116 0.000663 1 0.307 +## # ℹ 39 more variables: pct_expressed_receiver , avg_ligand , avg_receptor , ligand_receptor_prod , lfc_pval_ligand , +## # p_val_ligand_adapted , scaled_lfc_ligand , scaled_p_val_ligand , scaled_lfc_pval_ligand , scaled_p_val_ligand_adapted , activity , +## # rank , activity_zscore , scaled_activity , lfc_pval_receptor , p_val_receptor_adapted , scaled_lfc_receptor , +## # scaled_p_val_receptor , scaled_lfc_pval_receptor , scaled_p_val_receptor_adapted , scaled_avg_exprs_ligand , +## # scaled_avg_exprs_receptor , lfc_ligand_group , p_val_ligand_group , lfc_pval_ligand_group , p_val_ligand_adapted_group , +## # scaled_lfc_ligand_group , scaled_p_val_ligand_group , scaled_lfc_pval_ligand_group , scaled_p_val_ligand_adapted_group , +## # lfc_receptor_group , p_val_receptor_group , lfc_pval_receptor_group , p_val_receptor_adapted_group , scaled_lfc_receptor_group , … ``` -As you can see, the resulting table now show the rankings for -*ligand-receptor interactions of a sender-receiver cell type pair*, -instead of just the prioritized ligands. We included all columns here, -but if you just want relevant columns that were used to calculate the -ranking: +# Prioritizing across multiple receivers + +As NicheNet is a receiver-based pipeline, to prioritize ligand-receptor +pairs across multiple receivers, we need to perform the NicheNet +analysis for each receiver separately. Let’s suppose we want to +prioritize ligand-receptor pairs across all T cells (CD4, CD8, and +Tregs). The CD8 T analysis has already been performed above. We will use +the wrapper function to perform a basic NicheNet analysis on the other +two: ``` r -prior_table %>% select(c('sender', 'receiver', 'ligand', 'receptor', 'scaled_lfc_ligand', 'scaled_lfc_receptor', 'scaled_p_val_ligand_adapted', 'scaled_p_val_receptor_adapted', 'scaled_avg_exprs_ligand', 'scaled_avg_exprs_receptor', 'scaled_lfc_ligand_group', 'scaled_lfc_receptor_group', 'scaled_activity')) -## # A tibble: 858 × 13 -## sender receiver ligand receptor scaled_lfc_ligand scaled_lfc_receptor scaled_p_val_ligand_adapted scaled_p_val_receptor_a…¹ scaled_avg_exprs_lig…² scaled_avg_exprs_rec…³ scaled_lfc_ligand_gr…⁴ scaled_lfc_receptor_…⁵ scaled_activity -## -## 1 NK CD8 T Ptprc Dpp4 0.837 0.901 0.845 0.887 1.00 1.00 0.779 0.831 0.862 -## 2 Mono CD8 T Ptprc Dpp4 0.793 0.901 0.817 0.887 0.867 1.00 0.779 0.831 0.862 -## 3 Mono CD8 T Ebi3 Il27ra 0.827 0.803 0.933 0.873 1.00 0.859 0.538 0.0986 1.00 -## 4 Treg CD8 T Ptprc Dpp4 0.747 0.901 0.736 0.887 0.741 1.00 0.779 0.831 0.862 -## 5 Mono CD8 T Cxcl10 Dpp4 0.994 0.901 0.952 0.887 1.00 1.00 0.990 0.831 0.431 -## 6 DC CD8 T H2-M2 Cd8a 0.984 1 0.994 1 1.00 1.00 0.308 0.0845 0.664 -## 7 B CD8 T H2-M3 Cd8a 0.763 1 0.721 1 1.00 1.00 0.846 0.0845 0.748 -## 8 DC CD8 T H2-D1 Cd8a 0.907 1 0.841 1 1.00 1.00 0.885 0.0845 0.593 -## 9 Mono CD8 T H2-T22 Cd8a 0.772 1 0.785 1 1.00 1.00 0.981 0.0845 0.664 -## 10 DC CD8 T Ccl22 Dpp4 0.979 0.901 0.998 0.887 1.00 1.00 0.490 0.831 0.491 -## # ℹ 848 more rows -## # ℹ abbreviated names: ¹​scaled_p_val_receptor_adapted, ²​scaled_avg_exprs_ligand, ³​scaled_avg_exprs_receptor, ⁴​scaled_lfc_ligand_group, ⁵​scaled_lfc_receptor_group +nichenet_output <- lapply(c("CD4 T", "Treg"), function(receiver_ct){ + nichenet_seuratobj_aggregate(receiver = receiver_ct, + seurat_obj = seuratObj, + condition_colname = "aggregate", + condition_oi = condition_oi, + condition_reference = condition_reference, + sender = sender_celltypes, + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks, + expression_pct = 0.05) + +}) %>% setNames(c("CD4 T", "Treg")) +## [1] "Read in and process NicheNet's networks" +## [1] "Define expressed ligands and receptors in receiver and sender cells" +## [1] "Perform DE analysis in receiver cell" +## [1] "Perform NicheNet ligand activity analysis" +## [1] "Infer active target genes of the prioritized ligands" +## [1] "Infer receptors of the prioritized ligands" +## [1] "Perform DE analysis in sender cells" +## [1] "Read in and process NicheNet's networks" +## [1] "Define expressed ligands and receptors in receiver and sender cells" +## [1] "Perform DE analysis in receiver cell" +## [1] "Perform NicheNet ligand activity analysis" +## [1] "Infer active target genes of the prioritized ligands" +## [1] "Infer receptors of the prioritized ligands" +## [1] "Perform DE analysis in sender cells" ``` -Cxcl10 now went up in the rankings due to both the high expression of -its potential receptor Dpp4 and its high celltype specificity -(`scaled_lfc_ligand`). You can also see this in the dotplot and heatmap -below. +To generate the dataframes used for prioritization, we will simply +change the `lr_network_filtered` argument to only calculate DE and +expression values for ligand-receptor pairs of interest. ``` r -best_upstream_ligands = ligand_activities %>% top_n(20, aupr_corrected) %>% arrange(desc(aupr_corrected)) %>% pull(test_ligand) %>% unique() - -# DE analysis for each sender cell type -DE_table_all = Idents(seuratObj) %>% levels() %>% intersect(sender_celltypes) %>% - lapply(get_lfc_celltype, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = condition_oi, condition_reference = condition_reference, - expression_pct = 0.10, celltype_col = NULL) %>% reduce(full_join) -DE_table_all[is.na(DE_table_all)] = 0 - -order_ligands <- make.names(best_upstream_ligands) %>% rev() +info_tables2 <- lapply(names(nichenet_output), function(receiver_ct) { + generate_info_tables(seuratObj, + celltype_colname = "celltype", + senders_oi = sender_celltypes, + receivers_oi = receiver_ct, + lr_network_filtered = lr_network %>% + filter(from %in% nichenet_output[[receiver_ct]]$ligand_activities$test_ligand & + to %in% nichenet_output[[receiver_ct]]$background_expressed_genes), + condition_colname = "aggregate", + condition_oi = condition_oi, + condition_reference = condition_reference, + scenario = "case_control") +}) +``` -# ligand activity heatmap -ligand_aupr_matrix <- ligand_activities %>% select(aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities$test_ligand) %>% - `rownames<-`(make.names(rownames(.))) %>% `colnames<-`(make.names(colnames(.))) +We can then combine the results from `generate_info_tables` using +`bind_rows`, which will concatenate the rows together. For the ligand +activities, we will also add an additional column containing the +receiver cell type. Note that for the average expression table +(`sender_receiver_info`) and condition specificity (`lr_condition_de`), +we need to remove duplicate rows. -vis_ligand_aupr <- as.matrix(ligand_aupr_matrix[order_ligands, ], ncol=1) %>% magrittr::set_colnames("AUPR") -p_ligand_aupr <- make_heatmap_ggplot(vis_ligand_aupr, "Prioritized ligands","Ligand activity", - color = "darkorange",legend_position = "top", x_axis_position = "top", - legend_title = "AUPR\ntarget gene prediction ability)") + - theme(legend.text = element_text(size = 9)) - - -# LFC heatmap -# First combine ligand activities with DE information and make -ligand_activities_de <- ligand_activities %>% select(test_ligand, aupr_corrected) %>% rename(ligand = test_ligand) %>% left_join(DE_table_all %>% rename(ligand = gene)) -ligand_activities_de[is.na(ligand_activities_de)] <- 0 -lfc_matrix <- ligand_activities_de %>% select(-ligand, -aupr_corrected) %>% as.matrix() %>% magrittr::set_rownames(ligand_activities_de$ligand) %>% - `rownames<-`(make.names(rownames(.))) %>% `colnames<-`(make.names(colnames(.))) -vis_ligand_lfc <- lfc_matrix[order_ligands,] - -p_ligand_lfc <- make_threecolor_heatmap_ggplot(vis_ligand_lfc, "Prioritized ligands","LFC in Sender", - low_color = "midnightblue", mid_color = "white", mid = median(vis_ligand_lfc), high_color = "red", - legend_position = "top", x_axis_position = "top", legend_title = "LFC") + - theme(axis.text.y = element_text(face = "italic")) - - -# ligand expression Seurat dotplot -order_ligands_adapted <- str_replace_all(order_ligands, "\\.", "-") -rotated_dotplot <- DotPlot(seuratObj %>% subset(celltype %in% sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu") + - # flip of coordinates necessary because we want to show ligands in the rows when combining all plots - coord_flip() + theme(legend.text = element_text(size = 10), legend.title = element_text(size = 12)) - -# Combine figures and legend separately -figures_without_legend <- cowplot::plot_grid( - p_ligand_aupr + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()), - rotated_dotplot + theme(legend.position = "none", axis.ticks = element_blank(), axis.title.x = element_text(size = 12), - axis.text.y = element_text(face = "italic", size = 9), axis.text.x = element_text(size = 9, angle = 90,hjust = 0)) + - ylab("Expression in Sender") + xlab("") + scale_y_discrete(position = "right"), - p_ligand_lfc + theme(legend.position = "none", axis.ticks = element_blank()) + theme(axis.title.x = element_text()) + ylab(""), - align = "hv", - nrow = 1, - rel_widths = c(ncol(vis_ligand_aupr)+6, ncol(vis_ligand_lfc) + 7, ncol(vis_ligand_lfc) + 8)) - -legends <- cowplot::plot_grid( - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_aupr)), - ggpubr::as_ggplot(ggpubr::get_legend(rotated_dotplot)), - ggpubr::as_ggplot(ggpubr::get_legend(p_ligand_lfc)), - nrow = 1, - align = "h", rel_widths = c(1.5, 1, 1)) - -combined_plot <- cowplot::plot_grid(figures_without_legend, legends, nrow = 2, align = "hv") -print(combined_plot) +``` r +# Add CD8 T to list +info_tables2[[3]] <- info_tables + +# bind rows of each element of info_tables using pmap +info_tables_combined <- purrr::pmap(info_tables2, bind_rows) + +# Combine ligand activities and add receiver information +ligand_activities_combined <- bind_rows(nichenet_output$`CD4 T`$ligand_activities %>% mutate(receiver = "CD4 T"), + nichenet_output$Treg$ligand_activities %>% mutate(receiver = "Treg"), + ligand_activities %>% mutate(receiver = "CD8 T")) + +prior_table_combined <- generate_prioritization_tables( + sender_receiver_info = info_tables_combined$sender_receiver_info %>% distinct, + sender_receiver_de = info_tables_combined$sender_receiver_de, + ligand_activities = ligand_activities_combined, + lr_condition_de = info_tables_combined$lr_condition_de %>% distinct, + scenario = "case_control") + +head(prior_table_combined) +## # A tibble: 6 × 51 +## sender receiver ligand receptor lfc_ligand lfc_receptor ligand_receptor_lfc_avg p_val_ligand p_adj_ligand p_val_receptor p_adj_receptor pct_expressed_sender +## +## 1 NK CD8 T Ptprc Dpp4 0.596 0.164 0.380 0.000000218 0.00296 6.63e- 4 1 e+ 0 0.894 +## 2 NK CD4 T Ptprc Cd4 0.596 0.996 0.796 0.000000218 0.00296 2.63e-34 3.56e-30 0.894 +## 3 B CD4 T H2-Eb1 Cd4 4.02 0.996 2.51 0 0 2.63e-34 3.56e-30 0.93 +## 4 Mono CD8 T Ptprc Dpp4 0.438 0.164 0.301 0.0000352 0.477 6.63e- 4 1 e+ 0 0.867 +## 5 Mono CD4 T Ptprc Cd4 0.438 0.996 0.717 0.0000352 0.477 2.63e-34 3.56e-30 0.867 +## 6 NK CD4 T Ptprc Cd247 0.596 0.457 0.526 0.000000218 0.00296 5.61e- 4 1 e+ 0 0.894 +## # ℹ 39 more variables: pct_expressed_receiver , avg_ligand , avg_receptor , ligand_receptor_prod , lfc_pval_ligand , +## # p_val_ligand_adapted , scaled_lfc_ligand , scaled_p_val_ligand , scaled_lfc_pval_ligand , scaled_p_val_ligand_adapted , activity , +## # rank , activity_zscore , scaled_activity , lfc_pval_receptor , p_val_receptor_adapted , scaled_lfc_receptor , +## # scaled_p_val_receptor , scaled_lfc_pval_receptor , scaled_p_val_receptor_adapted , scaled_avg_exprs_ligand , +## # scaled_avg_exprs_receptor , lfc_ligand_group , p_val_ligand_group , lfc_pval_ligand_group , p_val_ligand_adapted_group , +## # scaled_lfc_ligand_group , scaled_p_val_ligand_group , scaled_lfc_pval_ligand_group , scaled_p_val_ligand_adapted_group , +## # lfc_receptor_group , p_val_receptor_group , lfc_pval_receptor_group , p_val_receptor_adapted_group , scaled_lfc_receptor_group , … ``` -![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-8-1.png) - ### Extra visualization of ligand-receptor pairs -We provide the function `make_mushroom_plot` which allows you to display -expression of ligand-receptor pairs in semicircles. By default, the fill -gradient shows the LFC between cell types, while the size of the -semicircle corresponds to the scaled mean expression. +In addition to the usual heatmap visualizations, we provide a function +`make_circos_lr` to visualize the ligand-receptor pairs in a circos +plot. This was originally written for the (now deprecated) Differential +NicheNet vignettes. The function takes in a prioritization table and a +named vector for the color of senders and receivers. We first specify +the number of top ligand-receptor pairs to show with `n`. ``` r -make_mushroom_plot(prior_table, top_n = 30) -``` +# Get top n ligand-receptor pairs +prior_table_oi <- prior_table_combined %>% slice_max(prioritization_score, n = 50) -![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-9-1.png) +# Define colors for senders and receivers +senders_receivers <- prior_table_oi %>% select(sender, receiver) %>% unlist %>% unique %>% sort +celltype_colors <- RColorBrewer::brewer.pal(length(senders_receivers), name = 'Set3') %>% + magrittr::set_names(senders_receivers) -We provide multiple ways to customize this plot, including changing the -“size” and “fill” values to certain columns from the prioritization -table (but without the `_ligand` or `_receptor` suffix). In addition, -you can also choose to show the rankings of each ligand-receptor-sender -pair, as well as show all data points for context. +circos_plot <- make_circos_lr(prior_table_oi, + colors_sender = celltype_colors, colors_receiver = celltype_colors) +``` ``` r -print(paste0("Column names that you can use are: ", paste0(prior_table %>% select(ends_with(c("_ligand", "_receptor", "_sender", "_receiver"))) %>% colnames() %>% - str_remove("_ligand|_receptor|_sender|_receiver") %>% unique, collapse = ", "))) -## [1] "Column names that you can use are: lfc, p_val, p_adj, avg, lfc_pval, scaled_lfc, scaled_p_val, scaled_lfc_pval, scaled_avg_exprs, pct_expressed" - -# Change size and color columns -make_mushroom_plot(prior_table, top_n = 30, size = "pct_expressed", color = "scaled_avg_exprs") +circos_plot ``` -![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-1.png) +![](seurat_steps_prioritization_files/figure-gfm/lr-circos-1.png) + +Furthermore, we provide the function `make_mushroom_plot` which allows +you to display expression of ligand-receptor pairs in a specific +receiver. By default, the fill gradient shows the LFC between cell +types, while the size of the semicircle corresponds to the scaled mean +expression. You can also choose to show the rankings of each +ligand-receptor-sender pair with `show_rankings`, as well as show all +data points for context (`show_all_datapoints`). +`true_color_range = TRUE` will adjust the limits of the color gradient +to the min-max of the values, instead of the limit being from 0 to 1. +Note that the numbers displayed here are the rankings within the chosen +cell type and not across all receiver cell types (in case of multiple +receivers). ``` r - -# Show rankings and other datapoints -make_mushroom_plot(prior_table, top_n = 30, show_rankings = TRUE, show_all_datapoints = TRUE) +receiver_oi <- "CD8 T" +legend_adjust <- c(0.7, 0.7) +make_mushroom_plot(prior_table_combined %>% filter(receiver == receiver_oi), + top_n = 30, + true_color_range = TRUE, + show_rankings = TRUE, + show_all_datapoints = TRUE) + + theme(legend.justification = legend_adjust, + axis.title.x = element_text(hjust = 0.25)) ``` -![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-2.png) +![](seurat_steps_prioritization_files/figure-gfm/mushroom-plot-1-1.png) + +Furthermore, you can change the “size” and “fill” values to certain +columns from the prioritization table (those with the `_ligand` or +`_receptor` suffix). ``` r +print(paste0("Column names that you can use are: ", paste0(prior_table %>% select(ends_with(c("_ligand", "_receptor", "_sender", "_receiver"))) %>% colnames() %>% + str_remove("_ligand|_receptor|_sender|_receiver") %>% unique, collapse = ", "))) +## [1] "Column names that you can use are: lfc, p_val, p_adj, avg, lfc_pval, scaled_lfc, scaled_p_val, scaled_lfc_pval, scaled_avg_exprs, pct_expressed" -# Show true limits instead of having it from 0 to 1 -make_mushroom_plot(prior_table, top_n = 30, true_color_range = TRUE) +# Change size and color columns +make_mushroom_plot(prior_table, top_n = 30, size = "pct_expressed", color = "scaled_avg_exprs") + + theme(legend.justification = legend_adjust, + axis.title.x = element_text(hjust = 0.25)) ``` -![](seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-3.png) +![](seurat_steps_prioritization_files/figure-gfm/mushroom-plot-2-1.png) ``` r sessionInfo() -## R version 4.3.1 (2023-06-16) +## R version 4.3.2 (2023-10-31) ## Platform: x86_64-redhat-linux-gnu (64-bit) ## Running under: CentOS Stream 8 ## @@ -448,43 +533,51 @@ sessionInfo() ## BLAS/LAPACK: /usr/lib64/libopenblaso-r0.3.15.so; LAPACK version 3.9.0 ## ## locale: -## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C -## [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C +## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 +## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C +## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C ## -## time zone: Europe/Brussels +## time zone: Asia/Bangkok ## tzcode source: system (glibc) ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: -## [1] forcats_0.5.1 stringr_1.5.0 dplyr_1.1.2 purrr_1.0.2 readr_2.1.2 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.3 tidyverse_1.3.1 SeuratObject_4.1.4 Seurat_4.4.0 nichenetr_2.0.3 -## [13] testthat_3.1.2 +## [1] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.4 purrr_1.0.2 readr_2.1.2 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.4 +## [9] tidyverse_1.3.1 SeuratObject_5.0.1 Seurat_4.4.0 nichenetr_2.1.0 ## ## loaded via a namespace (and not attached): -## [1] fs_1.5.2 matrixStats_1.0.0 spatstat.sparse_3.0-2 bitops_1.0-7 devtools_2.4.3 lubridate_1.8.0 httr_1.4.2 RColorBrewer_1.1-2 doParallel_1.0.17 -## [10] tools_4.3.1 sctransform_0.4.0 backports_1.4.1 utf8_1.2.2 R6_2.5.1 lazyeval_0.2.2 uwot_0.1.16 GetoptLong_1.0.5 withr_2.5.0 -## [19] sp_2.0-0 prettyunits_1.1.1 gridExtra_2.3 fdrtool_1.2.17 progressr_0.14.0 cli_3.6.1 DiceKriging_1.6.0 spatstat.explore_3.2-1 labeling_0.4.2 -## [28] spatstat.data_3.0-1 randomForest_4.7-1.1 proxy_0.4-27 ggridges_0.5.3 pbapply_1.5-0 foreign_0.8-84 R.utils_2.11.0 smoof_1.6.0.3 parallelly_1.30.0 -## [37] sessioninfo_1.2.2 limma_3.56.2 readxl_1.3.1 rstudioapi_0.13 visNetwork_2.1.2 generics_0.1.2 shape_1.4.6 ica_1.0-2 spatstat.random_3.1-5 -## [46] car_3.1-2 Matrix_1.6-1 S4Vectors_0.38.1 fansi_1.0.2 abind_1.4-5 R.methodsS3_1.8.1 lifecycle_1.0.3 yaml_2.2.2 carData_3.0-5 -## [55] recipes_1.0.7 Rtsne_0.15 grid_4.3.1 promises_1.2.0.1 crayon_1.5.0 miniUI_0.1.1.1 lattice_0.21-8 haven_2.4.3 cowplot_1.1.1 -## [64] mlr_2.19.1 pillar_1.9.0 knitr_1.37 ComplexHeatmap_2.16.0 rjson_0.2.21 future.apply_1.8.1 codetools_0.2-19 fastmatch_1.1-3 leiden_0.3.9 -## [73] glue_1.6.2 ParamHelpers_1.14.1 data.table_1.14.2 remotes_2.4.2 vctrs_0.6.3 png_0.1-7 cellranger_1.1.0 gtable_0.3.0 assertthat_0.2.1 -## [82] cachem_1.0.6 gower_1.0.1 xfun_0.40 mime_0.12 prodlim_2023.03.31 survival_3.5-5 timeDate_4022.108 iterators_1.0.14 hardhat_1.3.0 -## [91] lava_1.7.2.1 DiagrammeR_1.0.10 ellipsis_0.3.2 fitdistrplus_1.1-6 ROCR_1.0-11 ipred_0.9-14 nlme_3.1-162 usethis_2.2.2 RcppAnnoy_0.0.19 -## [100] rprojroot_2.0.2 irlba_2.3.5 KernSmooth_2.23-21 rpart_4.1.19 DBI_1.1.2 BiocGenerics_0.46.0 colorspace_2.0-2 Hmisc_5.1-0 nnet_7.3-19 -## [109] tidyselect_1.2.0 processx_3.5.2 compiler_4.3.1 parallelMap_1.5.1 rvest_1.0.2 htmlTable_2.4.1 xml2_1.3.3 desc_1.4.2 plotly_4.10.0 -## [118] shadowtext_0.1.2 checkmate_2.2.0 scales_1.2.1 caTools_1.18.2 lmtest_0.9-39 callr_3.7.0 digest_0.6.29 goftest_1.2-3 spatstat.utils_3.0-3 -## [127] rmarkdown_2.11 htmltools_0.5.6 pkgconfig_2.0.3 base64enc_0.1-3 lhs_1.1.6 highr_0.9 dbplyr_2.1.1 fastmap_1.1.0 rlang_1.1.1 -## [136] GlobalOptions_0.1.2 htmlwidgets_1.6.2 shiny_1.7.1 BBmisc_1.13 farver_2.1.0 zoo_1.8-9 jsonlite_1.7.3 mlrMBO_1.1.5.1 R.oo_1.24.0 -## [145] ModelMetrics_1.2.2.2 magrittr_2.0.2 Formula_1.2-5 patchwork_1.1.1 munsell_0.5.0 Rcpp_1.0.11 ggnewscale_0.4.9 reticulate_1.24 stringi_1.7.6 -## [154] pROC_1.18.4 brio_1.1.3 MASS_7.3-60 plyr_1.8.6 pkgbuild_1.3.1 parallel_4.3.1 listenv_0.8.0 ggrepel_0.9.3 deldir_1.0-6 -## [163] splines_4.3.1 tensor_1.5 hms_1.1.1 circlize_0.4.15 ps_1.6.0 igraph_1.5.1 ggpubr_0.6.0 spatstat.geom_3.2-4 ggsignif_0.6.4 -## [172] reshape2_1.4.4 stats4_4.3.1 pkgload_1.2.4 reprex_2.0.1 evaluate_0.14 modelr_0.1.8 tweenr_2.0.2 tzdb_0.4.0 foreach_1.5.2 -## [181] httpuv_1.6.5 RANN_2.6.1 polyclip_1.10-0 clue_0.3-64 future_1.23.0 scattermore_1.2 ggforce_0.4.1 broom_0.7.12 xtable_1.8-4 -## [190] emoa_0.5-0.2 e1071_1.7-13 rstatix_0.7.2 later_1.3.0 viridisLite_0.4.0 class_7.3-22 memoise_2.0.1 IRanges_2.34.1 cluster_2.1.4 -## [199] globals_0.14.0 caret_6.0-94 +## [1] fs_1.6.3 matrixStats_1.2.0 spatstat.sparse_3.0-3 bitops_1.0-7 lubridate_1.9.3 httr_1.4.7 +## [7] RColorBrewer_1.1-3 doParallel_1.0.17 tools_4.3.2 sctransform_0.4.0 backports_1.4.1 utf8_1.2.4 +## [13] R6_2.5.1 lazyeval_0.2.2 uwot_0.1.16 GetoptLong_1.0.5 withr_2.5.2 sp_2.1-2 +## [19] gridExtra_2.3 fdrtool_1.2.17 progressr_0.14.0 cli_3.6.2 spatstat.explore_3.2-1 labeling_0.4.3 +## [25] spatstat.data_3.0-3 randomForest_4.7-1.1 proxy_0.4-27 ggridges_0.5.5 pbapply_1.7-2 foreign_0.8-85 +## [31] parallelly_1.36.0 limma_3.56.2 readxl_1.4.3 rstudioapi_0.15.0 gridGraphics_0.5-1 visNetwork_2.1.2 +## [37] generics_0.1.3 shape_1.4.6 ica_1.0-3 spatstat.random_3.2-2 car_3.1-2 Matrix_1.6-4 +## [43] fansi_1.0.6 S4Vectors_0.38.1 abind_1.4-5 lifecycle_1.0.4 yaml_2.3.8 carData_3.0-5 +## [49] recipes_1.0.7 Rtsne_0.17 grid_4.3.2 promises_1.2.1 crayon_1.5.2 miniUI_0.1.1.1 +## [55] lattice_0.21-9 haven_2.4.3 cowplot_1.1.2 pillar_1.9.0 knitr_1.45 ComplexHeatmap_2.16.0 +## [61] rjson_0.2.21 future.apply_1.11.0 codetools_0.2-19 leiden_0.3.9 glue_1.6.2 data.table_1.14.10 +## [67] vctrs_0.6.5 png_0.1-8 spam_2.10-0 cellranger_1.1.0 gtable_0.3.4 assertthat_0.2.1 +## [73] gower_1.0.1 xfun_0.41 mime_0.12 prodlim_2023.08.28 survival_3.5-7 timeDate_4032.109 +## [79] iterators_1.0.14 hardhat_1.3.0 lava_1.7.3 DiagrammeR_1.0.10 ellipsis_0.3.2 fitdistrplus_1.1-11 +## [85] ROCR_1.0-11 ipred_0.9-14 nlme_3.1-163 RcppAnnoy_0.0.21 irlba_2.3.5.1 KernSmooth_2.23-22 +## [91] rpart_4.1.21 colorspace_2.1-0 BiocGenerics_0.46.0 DBI_1.1.3 Hmisc_5.1-0 nnet_7.3-19 +## [97] tidyselect_1.2.0 compiler_4.3.2 rvest_1.0.2 htmlTable_2.4.1 xml2_1.3.6 plotly_4.10.0 +## [103] shadowtext_0.1.2 checkmate_2.3.1 scales_1.3.0 caTools_1.18.2 lmtest_0.9-40 digest_0.6.33 +## [109] goftest_1.2-3 spatstat.utils_3.0-4 rmarkdown_2.11 htmltools_0.5.7 pkgconfig_2.0.3 base64enc_0.1-3 +## [115] highr_0.10 dbplyr_2.1.1 fastmap_1.1.1 rlang_1.1.2 GlobalOptions_0.1.2 htmlwidgets_1.6.2 +## [121] shiny_1.7.1 farver_2.1.1 zoo_1.8-12 jsonlite_1.8.8 ModelMetrics_1.2.2.2 magrittr_2.0.3 +## [127] Formula_1.2-5 dotCall64_1.1-1 patchwork_1.1.3 munsell_0.5.0 Rcpp_1.0.11 ggnewscale_0.4.9 +## [133] reticulate_1.34.0 stringi_1.7.6 pROC_1.18.5 MASS_7.3-60 plyr_1.8.9 parallel_4.3.2 +## [139] listenv_0.9.0 ggrepel_0.9.4 deldir_2.0-2 splines_4.3.2 tensor_1.5 hms_1.1.3 +## [145] circlize_0.4.15 igraph_1.2.11 ggpubr_0.6.0 spatstat.geom_3.2-7 ggsignif_0.6.4 reshape2_1.4.4 +## [151] stats4_4.3.2 reprex_2.0.1 evaluate_0.23 modelr_0.1.8 tzdb_0.4.0 foreach_1.5.2 +## [157] tweenr_2.0.2 httpuv_1.6.13 RANN_2.6.1 polyclip_1.10-6 future_1.33.0 clue_0.3-64 +## [163] scattermore_1.2 ggforce_0.4.1 broom_0.7.12 xtable_1.8-4 e1071_1.7-14 rstatix_0.7.2 +## [169] later_1.3.2 viridisLite_0.4.2 class_7.3-22 IRanges_2.34.1 cluster_2.1.4 timechange_0.2.0 +## [175] globals_0.16.2 caret_6.0-94 ``` ### References diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/lr-circos-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/lr-circos-1.png new file mode 100644 index 0000000..b2c79b5 Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/lr-circos-1.png differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/lr-circos-unused-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/lr-circos-unused-1.png new file mode 100644 index 0000000..bcbfffb Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/lr-circos-unused-1.png differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/lr-circos-unused-2.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/lr-circos-unused-2.png new file mode 100644 index 0000000..e5f4c12 Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/lr-circos-unused-2.png differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/mushroom-plot-1-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/mushroom-plot-1-1.png new file mode 100644 index 0000000..976c32c Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/mushroom-plot-1-1.png differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/mushroom-plot-2-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/mushroom-plot-2-1.png new file mode 100644 index 0000000..c662f46 Binary files /dev/null and b/vignettes/seurat_steps_prioritization_files/figure-gfm/mushroom-plot-2-1.png differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-1.png deleted file mode 100644 index 1a5d264..0000000 Binary files a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-2.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-2.png deleted file mode 100644 index cf72779..0000000 Binary files a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-2.png and /dev/null differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-3.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-3.png deleted file mode 100644 index 6998180..0000000 Binary files a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-10-3.png and /dev/null differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-8-1.png deleted file mode 100644 index 639cf9b..0000000 Binary files a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-8-1.png and /dev/null differ diff --git a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-9-1.png deleted file mode 100644 index 59af991..0000000 Binary files a/vignettes/seurat_steps_prioritization_files/figure-gfm/unnamed-chunk-9-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper.Rmd b/vignettes/seurat_wrapper.Rmd index 9a80897..3988bea 100644 --- a/vignettes/seurat_wrapper.Rmd +++ b/vignettes/seurat_wrapper.Rmd @@ -23,26 +23,21 @@ knitr::opts_chunk$set( ) ``` -In this vignette, you can learn how to perform a basic NicheNet analysis on a Seurat v3/v4 object. Such a NicheNet analysis can help you to generate hypotheses about an intercellular communication process of interest for which you have single-cell gene expression data as a Seurat object. Specifically, NicheNet can predict 1) which ligands from one or more cell population(s) ("sender/niche") are most likely to affect target gene expression in an interacting cell population ("receiver/target") and 2) which specific target genes are affected by which of these predicted ligands. +In this vignette, you can learn how to perform a basic NicheNet analysis on a Seurat (v3-v5) object containing single-cell expression data. **Assuming you have captured the changes in gene expression resulting from your cell-cell communication (CCC) process of interest,** a NicheNet analysis can help you to generate hypotheses about the CCC process. Specifically, NicheNet can predict 1) which ligands from the microenvironment or cell population(s) ("sender/niche") are most likely to affect target gene expression in an interacting cell population ("receiver/target") and 2) which specific target genes are affected by which of these predicted ligands. -Because NicheNet studies how ligands affect gene expression in putatively neighboring/interacting cells, you need to have data about this effect in gene expression you want to study. So, there need to be 'some kind of' differential expression in a receiver cell population, caused by ligands from one of more interacting sender cell populations. +The wrapper function we will show consists of the same different steps that are discussed in detail in the main NicheNet vignette [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md).Please make sure you understand the different steps described in this vignette before performing a real NicheNet analysis on your data. We generally recommend the step-by-step analysis as it allows users to adapt specific steps of the pipeline to make them more appropriate for their data. -In this vignette, we demonstrate the use of NicheNet on a Seurat Object. The wrapper function we will show consists of the same different steps that are discussed in detail in the main, basis, NicheNet vignette [NicheNet's ligand activity analysis on a gene set of interest: predict active ligands and their target genes](ligand_activity_geneset.md):`vignette("ligand_activity_geneset", package="nichenetr")`. Make sure you understand the different steps in a NicheNet analysis that are described in that vignette before proceeding with this vignette and performing a real NicheNet analysis on your data. -In another vignette [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md):`vignette("seurat_steps", package="nichenetr")`, we also show the execution of these steps one for one, but in contrast to the main vignette now specifically for a Seurat Object. This allows users to adapt specific steps of the pipeline to make them more appropriate for their data (recommended). +To perform a NicheNet analysis, three features are extracted from the input data: the potential ligands, the gene set of interest, and the background gene set. This vignette will extract each feature as described in this flowchart: -As example expression data of interacting cells, we will use mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [@medaglia_spatial_2017]. We will NicheNet to explore immune cell crosstalk in response to this LCMV infection. +![](images/figure2.svg){width=70%} -In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be applied to look at how several immune cell populations in the lymph node (i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. +As example expression data of interacting cells, we will use mouse NICHE-seq data to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [@medaglia_spatial_2017]. We will focus on CD8 T cells as the receiver population, and as this dataset contains two conditions (before and after LCMV infection), the differentially expressed genes between these two conditions in CD8 T cells will be used as our gene set of interest. We will then prioritize which ligands from the microenvironment (sender-agnostic approach) and from specific immune cell populations like monocytes, dendritic cells, NK cells, B cells, and CD4 T cells (sender-focused approach) can regulate and induce these observed gene expression changes. -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. +The [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. # Prepare NicheNet analysis -## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks. - -The NicheNet ligand-receptor network and weighted networks are necessary to define and show possible ligand-receptor interactions between two cell populations. The ligand-target matrix denotes the prior potential that particular ligands might regulate the expression of particular target genes. This matrix is necessary to prioritize possible ligand-receptor interactions based on observed gene expression effects (i.e. NicheNet's ligand activity analysis) and infer affected target genes of these prioritized ligands. - -### Load Packages: +### Load packages ```{r} library(nichenetr) # Please update to v2.0.4 @@ -53,145 +48,178 @@ library(tidyverse) If you would use and load other packages, we recommend to load these 3 packages after the others. -### Read in NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks: +### Read in NicheNet's networks + +The ligand-target prior model, ligand-receptor network, and weighted integrated networks are needed for this vignette. The ligand-target prior model is a matrix describing the potential that a ligand may regulate a target gene, and it is used to run the ligand activity analysis. The ligand-receptor network contains information on potential ligand-receptor bindings, and it is used to identify potential ligands. Finally, the weighted ligand-receptor network contains weights representing the potential that a ligand will bind to a receptor, and it is used for visualization. ```{r} -organism = "mouse" +organism <- "mouse" if(organism == "human"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) - weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) } else if(organism == "mouse"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) - weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) - + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) } -lr_network = lr_network %>% distinct(from, to) +lr_network <- lr_network %>% distinct(from, to) head(lr_network) ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network head(weighted_networks$gr) # interactions and their weights in the gene regulatory network ``` -### Read in the expression data of interacting cells: +### Read in the expression data of interacting cells -The dataset used here is publicly available single-cell data from immune cells in the T cell area of the inguinal lymph node. The data was processed and aggregated by applying the Seurat alignment pipeline. The Seurat object contains this aggregated data. Note that this should be a Seurat v3/v4 object and that gene should be named by their official mouse/human gene symbol. If your expression data has the older gene symbols, you may want to use our alias conversion function to avoid the loss of gene names. +We processed and aggregated the original dataset by using the Seurat alignment pipeline. As we created this object using Seurat v3, it has to be updated with `UpdateSeuratObject`. Note that genes should be named by their official mouse/human gene symbol. If your expression data has the older gene symbols, you may want to use our alias conversion function to avoid the loss of gene names. ```{r} -seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) # For newer Seurat versions, you may need to run the following seuratObj <- UpdateSeuratObject(seuratObj) -seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") # convert gene names +# Convert gene names +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") + seuratObj@meta.data %>% head() ``` -Visualize which cell populations are present: CD4 T cells (including regulatory T cells), CD8 T cells, B cells, NK cells, dendritic cells (DCs) and inflammatory monocytes -```{r} -seuratObj@meta.data$celltype %>% table() # note that the number of cells of some cell types is very low and should preferably be higher for a real application +Visualize which cell populations are present: CD4 T cells (including regulatory T cells), CD8 T cells, B cells, NK cells, dendritic cells (DCs) and inflammatory monocytes. + +```{r umap-1} +# Note that the number of cells of some cell types is very low and should preferably be higher for a real application +seuratObj@meta.data$celltype %>% table() DimPlot(seuratObj, reduction = "tsne") ``` -Visualize the data to see to which condition cells belong. The metadata dataframe column that denotes the condition (steady-state or after LCMV infection) is here called 'aggregate'. +Visualize the data to see to which condition cells belong. The metadata column that denotes the condition (steady-state or after LCMV infection) is here called 'aggregate'. -```{r} +```{r umap-2} seuratObj@meta.data$aggregate %>% table() DimPlot(seuratObj, reduction = "tsne", group.by = "aggregate") ``` # Perform the NicheNet analysis -In this case study, we want to apply NicheNet to predict which ligands expressed by all immune cells in the T cell area of the lymph node are most likely to have induced the differential expression in CD8 T cells after LCMV infection. +In this case study, we want to apply NicheNet to predict which ligands expressed by the microenvironment (sender-agnostic) and immune cells in the T cell area of the lymph node (sender-focused) are most likely to have induced the differential expression in CD8 T cells after LCMV infection. In contrary to NicheNet v1 where we only used the "sender-focused" approach, we now recommend users to run both the "sender-agnostic" approach and "sender-focused" approach. These approaches only affect the list of potential ligands that are considered for prioritization. As described in the flowchart above, we do not define any sender populations in the 'sender agnostic' approach but consider all ligands for which its cognate receptor is expressed in the receiver population. The sender-focused approach will then filter the list of ligands to ones where the ligands are expressed in the sender cell population(s). As described in the main vignette, the pipeline of a basic NicheNet analysis consist of the following steps: +* 1. Define a set of potential ligands for both the sender-agnostic and sender-focused approach +* 2. Define the gene set of interest: these are the genes in the “receiver/target” cell population that are potentially affected by ligands expressed by interacting cells (e.g. genes differentially expressed upon cell-cell interaction) +* 3. Define the background genes +* 4. Perform NicheNet ligand activity analysis: rank the potential ligands based on the presence of their target genes in the gene set of interest (compared to the background set of genes) +* 5. Infer target genes and receptors of top-ranked ligands -* 1. Define a “sender/niche” cell population and a “receiver/target” cell population present in your expression data and determine which genes are expressed in both populations -* 2. Define a gene set of interest: these are the genes in the “receiver/target” cell population that are potentially affected by ligands expressed by interacting cells (e.g. genes differentially expressed upon cell-cell interaction) -* 3. Define a set of potential ligands: these are ligands that are expressed by the “sender/niche” cell population and bind a (putative) receptor expressed by the “receiver/target” population -* 4) Perform NicheNet ligand activity analysis: rank the potential ligands based on the presence of their target genes in the gene set of interest (compared to the background set of genes) -* 5) Infer receptors and top-predicted target genes of ligands that are top-ranked in the ligand activity analysis +All these steps are contained in one of three wrapper functions: `nichenet_seuratobj_aggregate`, `nichenet_seuratobj_cluster_de` and `nichenet_seuratobj_aggregate_cluster_de`. These functions differ on how the gene set of interest is calculated, as follows: -All these steps are contained in one of three following similar single functions: `nichenet_seuratobj_aggregate`, `nichenet_seuratobj_cluster_de` and `nichenet_seuratobj_aggregate_cluster_de`. +| **Function** | **Gene set of interest** | **Background genes** | +|-----------------------------------------|----------------------------------------------------|----------------------------------------| +| nichenet_seuratobj_aggregate | DE between two conditions of the same cell type | All expressed genes in the cell type | +| nichenet_seuratobj_cluster_de | DE between two cell types | All expressed genes in both cell types | +| nichenet_seuratobj_aggregate_cluster_de | DE between two cell types from specific conditions | All expressed genes in both cell types | -In addition to these steps, the function `nichenet_seuratobj_aggregate` that is used for the analysis when having two conditions will also calculate differential expression of the ligands in the sender cell type. Note that this ligand differential expression is not used for prioritization and ranking of the ligands! -## NicheNet analysis on Seurat object: explain differential expression between two conditions +**Note:** Cell types should be the identities of the seurat object (check using `table(Idents(seuratObj))`) -In this case study, the receiver cell population is the 'CD8 T' cell population, whereas the sender cell populations are 'CD4 T', 'Treg', 'Mono', 'NK', 'B' and 'DC'. The above described functions will consider a gene to be expressed when it is expressed in at least a predefined fraction of cells in one cluster (default: 10%). +## `nichenet_seuratobj_aggregate`: explain differential expression between two conditions -The gene set of interest are the genes differentially expressed in CD8 T cells after LCMV infection. The condition of interest is thus 'LCMV', whereas the reference/steady-state condition is 'SS'. The notion of conditions can be extracted from the metadata column 'aggregate', the method to calculate the differential expression is the standard Seurat Wilcoxon test. +For the sender-agnostic approach the sender is set to 'undefined'. The receiver cell population is the 'CD8 T' cell population, and the gene set of interest are the genes differentially expressed in CD8 T cells after LCMV infection. Thus, the condition of interest is 'LCMV', whereas the reference/steady-state condition is 'SS'. The column containing condition information is 'aggregate'. The method to calculate differential expression is the standard Seurat Wilcoxon test. To use other methods, users will have to go through step-by-step analysis. The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is 30 (`top_n_ligands`). The number of target genes to consider per ligand when performing the target gene inference is 200 (`top_n_targets`). We only retain ligands and receptors that are expressed in at least a predefined fraction of cells in one cluster (`expression_pct`, default: 10%). -The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is 30 by default. +```{r} +nichenet_output_agnostic <- nichenet_seuratobj_aggregate( + seurat_obj = seuratObj, + sender = "undefined", + receiver = "CD8 T", + condition_colname = "aggregate", + condition_oi = "LCMV", + condition_reference = "SS", + expression_pct = 0.05, + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks + ) +``` -To perform the NicheNet analysis with these specifications, run the following: +For the sender-focused approach, simply provide one or more sender populations: ```{r} -# indicated cell types should be cell class identities -# check via: -# seuratObj %>% Idents() %>% table() -nichenet_output = nichenet_seuratobj_aggregate( +nichenet_output <- nichenet_seuratobj_aggregate( seurat_obj = seuratObj, - receiver = "CD8 T", - condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), + receiver = "CD8 T", + condition_colname = "aggregate", + condition_oi = "LCMV", + condition_reference = "SS", + expression_pct = 0.05, ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, - weighted_networks = weighted_networks) + weighted_networks = weighted_networks + ) ``` +**Note:** It is also possible that you want to consider all cell types present as possible sender cell by defining `sender = "all"`. This also includes the receiver cell type, making that you can look at autocrine signaling as well. + ### Interpret the NicheNet analysis output +We will investigate the output of the sender-focused approach. + +```{r} +names(nichenet_output) +``` + #### Ligand activity analysis results -A first thing NicheNet does, is prioritizing ligands based on predicted ligand activity. To see the ranking of these ligands, run the following command: +To see the ranking of ligands based on the predicted ligand activity: ```{r} nichenet_output$ligand_activities ``` -The different ligand activity measures (auroc, aupr, pearson correlation coefficient) are a measure for how well a ligand can predict the observed differentially expressed genes compared to the background of expressed genes. In our validation study, we showed that the area under the precision-recall curve (AUPR) between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity (this was the Pearson correlation for v1). Therefore, NicheNet ranks the ligands based on their AUPR. This allows us to prioritize ligands inducing the antiviral response in CD8 T cells. +Ligands are ranked based on the area under the precision-recall curve (AUPR) between a ligand's target predictions and the observed transcriptional response. Although other metrics like the AUROC and pearson correlation coefficient are also computed, we demonstrated in our validation study that the AUPRwas the most informative measure to define ligand activity (this was the Pearson correlation for v1). The vignette on how we performed the validation can be found at [Evaluation of NicheNet's ligand-target predictions](model_evaluation.md). -To get a list of the 30 top-ranked ligands: run the following command +To get a list of the top 30 ligands: ```{r} nichenet_output$top_ligands ``` -These ligands are expressed by one or more of the input sender cells. To see which cell population expresses which of these top-ranked ligands, you can run the following: +Below we will show visualizations that are in the output object. In some cases (including this one), not all top ligands that are present in `top_ligands` will be shown in the plot. The left-out ligands are ligands that don't have target genes with high enough regulatory potential scores, and therefore did not survive the used cutoffs (in the functions `get_weighted_ligand_target_links` and `prepare_ligand_target_visualization` that are run internally). To include them, you can increase the number of target genes considered or be less stringent in the used cutoffs (`top_n_targets` and `cutoff_visualization` , respectively). In this case, CCl22 (ranked 25th) is missing from the plots. + +To see which sender cell population expresses which of the top-ranked ligands: -```{r, fig.width=8} +```{r dotplot-sender, fig.width=8} nichenet_output$ligand_expression_dotplot ``` As you can see, most op the top-ranked ligands seem to be mainly expressed by dendritic cells and monocytes. -It could also be interesting to see whether some of these ligands are differentially expressed after LCMV infection. +It could also be interesting to see whether some of these ligands are differentially expressed after LCMV infection. -```{r, fig.width=4, fig.height=8} +```{r lfc-heatmap, fig.width=4, fig.height=8} nichenet_output$ligand_differential_expression_heatmap ``` -As you can see, most op the top-ranked ligands seem also to be upregulated themselves in monocytes after viral infection. This is not a prerequisite to be top-ranked (cf: ranking only determined based on enrichment of target genes among DE genes in the receiver, CD8T cells), but is nice additional "evidence" that these ligands might indeed be important. - +Although this ligand differential expression is not used for prioritization and ranking of the ligands (the ranking is only determined based on enrichment of target genes among DE genes in the receiver, CD8T cells), most of the top-ranked ligands also seem to be upregulated themselves in monocytes after viral infection. This is nice additional "evidence" that these ligands might indeed be important. #### Inferred active ligand-target links -NicheNet also infers active target genes of these top-ranked ligands. To see which top-ranked ligands are predicted to have regulated the expression of which differentially expressed genes, you can run following command for a heatmap visualization: +NicheNet also infers active target genes of these top-ranked ligands, best visualized with the following heatmap showing which top-ranked ligands are predicted to have regulated the expression of which differentially expressed genes: -```{r} +```{r ligand-target-heatmap, fig.width=10} nichenet_output$ligand_target_heatmap ``` -This is a normal ggplot object that can be adapted likewise. For example if you want to change the color code to blue instead of purple, change the axis ticks of the legend, and change the axis labels of the heatmap, you can do the following: +This is a normal ggplot object that can be adapted accordingly. For example if you want to change the color code to blue instead of purple, change the axis ticks of the legend, and change the axis labels of the heatmap, you can do the following: -```{r} -nichenet_output$ligand_target_heatmap + scale_fill_gradient2(low = "whitesmoke", high = "royalblue", breaks = c(0,0.0045,0.009)) + xlab("anti-LCMV response genes in CD8 T cells") + ylab("Prioritized immmune cell ligands") +```{r ligand-target-heatmap-adapted} +nichenet_output$ligand_target_heatmap + + scale_fill_gradient2(low = "whitesmoke",high = "royalblue") + + xlab("Anti-LCMV response genes in CD8 T cells") + ylab("Prioritized immmune cell ligands") ``` If you want, you can also extract the ligand-target links and their regulatory potential scores in matrix or data frame format (e.g. for visualization in other ways or output to a csv file). @@ -204,35 +232,38 @@ nichenet_output$ligand_target_matrix %>% .[1:10,1:6] nichenet_output$ligand_target_df # weight column = regulatory potential ``` -To get a list of the top-predicted target genes of the 30 top-ranked ligands: run the following command +To get a list of the top-predicted target genes of the 30 top-ranked ligands: ```{r} nichenet_output$top_targets ``` -You can visualize the expression of these as well. Because we only focus on CD8 T cells as receiver cells, we will only show expression in these cells. To emphasize that these target genes are differentially expressed, we split cells up in steadys-state cells and cells after response to LCMV infection. +You can visualize the expression of these target genes as well (only the top 50 are shown here). Because we only focus on CD8 T cells as receiver cells, we will only show expression in these cells. To emphasize that these target genes are differentially expressed, we split cells up in steady-state cells and cells after response to LCMV infection. -```{r, fig.width=12} -DotPlot(seuratObj %>% subset(idents = "CD8 T"), features = nichenet_output$top_targets %>% rev(), split.by = "aggregate") + RotatedAxis() +```{r dotplot-condition, fig.width=10, fig.height=8} +DotPlot(seuratObj %>% subset(idents = "CD8 T"), + features = nichenet_output$top_targets[1:50] %>% + rev(), split.by = "aggregate") + coord_flip() ``` -```{r} -VlnPlot(seuratObj %>% subset(idents = "CD8 T"), features = c("Zbp1","Ifit3","Irf7"), split.by = "aggregate", pt.size = 0, combine = FALSE) +```{r violin-plot} +VlnPlot(seuratObj %>% subset(idents = "CD8 T"), + features = c("Ptprc", "H2-M3", "Cxcl10"), split.by = "aggregate", pt.size = 0, combine = TRUE) ``` -To visualize ligand activities, expression, differential expression and target genes of ligands, run the following command +The display the combined plot of ligand activities, expression, differential expression and target genes of ligands: -```{r, fig.width = 16, fig.height= 10} +```{r summary-vis, fig.width = 16, fig.height= 10} nichenet_output$ligand_activity_target_heatmap ``` -**important: above figure can be considered as one of the most important summary figures of the NicheNet analysis. Here you can see which ligand-receptor pairs have both high differential expression and ligand activity (=target gene enrichment). These are very interesting predictions as key regulators of your intercellular communication process of interest ! ** +**Important: the above figure can be considered as one of the most important summary figures of the NicheNet analysis. Here you can see which ligand-receptor pairs have both high differential expression and ligand activity (=target gene enrichment). These are very interesting predictions as key regulators of your intercellular communication process of interest!** #### Inferred ligand-receptor interactions for top-ranked ligands NicheNet also infers the receiver cell receptors of these top-ranked ligands. You can run following command for a heatmap visualization of the ligand-receptor links: -```{r} +```{r ligand-receptor-heatmap} nichenet_output$ligand_receptor_heatmap ``` @@ -242,11 +273,11 @@ If you want, you can also extract the ligand-receptor links and their interactio nichenet_output$ligand_receptor_matrix %>% .[1:10,1:6] ``` -```{r} +```{r dotplot-receptor} nichenet_output$ligand_receptor_df # weight column accords to number of data sources that document this interaction ``` -To get a list of the receptors of the 20 top-ranked ligands: run the following command +To get a list of the receptors of the 30 top-ranked ligands: ```{r} nichenet_output$top_receptors @@ -254,129 +285,130 @@ nichenet_output$top_receptors You can visualize the expression of these as well. Because we only focus on CD8 T cells as receiver cells, we will only show expression in these cells. -```{r, fig.width=12} -DotPlot(seuratObj %>% subset(idents = "CD8 T"), features = nichenet_output$top_receptors %>% rev(), split.by = "aggregate") + RotatedAxis() +```{r dotplot-receptors, fig.width=12} +DotPlot(seuratObj %>% subset(idents = "CD8 T"), + features = nichenet_output$top_receptors %>% rev(), split.by = "aggregate") + + coord_flip() ``` - If you are interested in checking which geneset (and background set of genes) was used during the ligand activity analysis: ```{r} nichenet_output$geneset_oi nichenet_output$background_expressed_genes %>% length() ``` -### Rerun the NicheNet analysis with different sender cell definition - -Instead of focusing on multiple sender cell types, it is possible that you are only interested in doing the analyis for one sender cell type, such as dendritic cells in this case. - -```{r, fig.width=14} -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "DC", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) - -nichenet_output$ligand_activity_target_heatmap -``` - -Instead of focusing on one or multiple predefined sender cell types, it is also possible that you want to consider all cell types present as possible sender cell. This also includes the receiver cell type, making that you can look at autocrine signaling as well. +### Results of the sender-agnostic approach -```{r, fig.width=14} -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "all", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) - -nichenet_output$ligand_activity_target_heatmap -``` - -In some cases, it could be possible that you don't have data of potential sender cells. If you still want to predict possible upstream ligands that could have been responsible for the observed differential expression in your cell type, you can do this by following command. This will consider all possible ligands in the NicheNet databases for which a receptor is expressed by the receiver cell of interest. - -```{r, fig.width=8} -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "undefined", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) - -nichenet_output$ligand_activity_target_heatmap +```{r summary-vis-agnostic, fig.width=14} +# There is no log-fold change or expression plot because we did not define cell types +nichenet_output_agnostic$ligand_activity_target_heatmap ``` As you can see in this analysis result, many genes DE in CD8 T cells after LCMV infection are strongly predicted type I interferon targets. The presence of a type I interferon signature in the receiver cell type, but the absence of expression of type I interferons in sender cell types, might indicate that type I interferons are expressed by a different, non-profiled cell type, or at a time point before sampling. The latter could make sense, because there always is a time delay between expression of a ligand-encoding gene and the effect of the ligand on a target/receiver cell (i.e. expression of target genes). -### Run multiple NicheNet analyses on different receiver cell populations +#### Running multiple NicheNet analyses on different receiver cell populations In some cases, you might be interested in multiple target/receiver cell populations. You can decide to run this for every cell type separately, or in one line of code as demonstrated here (results are the same). As example, we could have been interested in explaining DE between steady-state and LCMV infection in both CD8 and CD4 T cells. ```{r} -receiver_celltypes_oi = c("CD4 T", "CD8 T") -# receiver_celltypes_oi = seuratObj %>% Idents() %>% unique() # for all celltypes in the dataset: use only when this would make sense biologically +# To run with all celltypes in the dataset (only when this would make sense biologically!) +# receiver_celltypes_oi <- seuratObj %>% Idents() %>% unique() + +receiver_celltypes_oi <- c("CD4 T", "CD8 T") -nichenet_output = receiver_celltypes_oi %>% lapply(nichenet_seuratobj_aggregate, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) +nichenet_output <- receiver_celltypes_oi %>% lapply(nichenet_seuratobj_aggregate, + seurat_obj = seuratObj, + condition_colname = "aggregate", + condition_oi = "LCMV", + condition_reference = "SS", + sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks) -names(nichenet_output) = receiver_celltypes_oi +names(nichenet_output) <- receiver_celltypes_oi ``` Check which ligands were top-ranked for both CD8T and CD4T and which ligands were more cell-type specific ```{r} -common_ligands = intersect(nichenet_output$`CD4 T`$top_ligands, nichenet_output$`CD8 T`$top_ligands) -print("common ligands are: ") +common_ligands <- intersect(nichenet_output$`CD4 T`$top_ligands, nichenet_output$`CD8 T`$top_ligands) +print("Common ligands:") print(common_ligands) -cd4_ligands = nichenet_output$`CD4 T`$top_ligands %>% setdiff(nichenet_output$`CD8 T`$top_ligands) -cd8_ligands = nichenet_output$`CD8 T`$top_ligands %>% setdiff(nichenet_output$`CD4 T`$top_ligands) +cd4_ligands <- nichenet_output$`CD4 T`$top_ligands %>% setdiff(nichenet_output$`CD8 T`$top_ligands) +cd8_ligands <- nichenet_output$`CD8 T`$top_ligands %>% setdiff(nichenet_output$`CD4 T`$top_ligands) -print("Ligands specifically regulating DE in CD4T: ") +print("Ligands specifically regulating DE in CD4T:") print(cd4_ligands) -print("Ligands specifically regulating DE in CD8T: ") +print("Ligands specifically regulating DE in CD8T:") print(cd8_ligands) ``` -## NicheNet analysis on Seurat object: explain differential expression between two cell populations +## `nichenet_seuratobj_cluster_de`: explain differential expression between two cell types -Previously, we demonstrated the use of a wrapper function for applying NicheNet to explain differential expression between two conditions in one cell type. However, also differential expression between two cell populations might sometimes be (partially) caused by communication with cells in the neighborhood. For example, differentiation from a progenitor cell to the differentiated cell might be induced by niche cells. A concrete example is discussed in this paper: [Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1). +Unlike the case above where we applied NicheNet to explain differential expression between two conditions in one cell type, here we try to explain differential expression between two cell populations. DE between cell populations are sometimes (partially) caused by communication with cells in the neighborhood, e.g., the differentiation from a progenitor cell to a differentiated cell might be induced by niche cells. A concrete example is discussed in the paper by Bonnardel et al. (2019): [Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1). -Therefore, we will now also demonstrate the use of another Seurat wrapper function that can be used in the case of explaining differential expression between cell populations. But keep in mind that the comparison that you make should be biologically relevant. It is possible to use NicheNet to explain differential expression between any two cell populations in your dataset, but in most cases, differential expression between cell populations will be a result of cell-intrinsic properties (i.e. different cell types have a different gene expression profile) and not of intercellular communication processes. In such a case, it does not make any sense to use NicheNet. +However, keep in mind that the comparison that you make should be biologically relevant. as in most cases, differential expression between cell populations will be a result of cell-intrinsic properties (i.e. different cell types have a different gene expression profile) and not of an intercellular communication processes. In such a case, it does not make any sense to use NicheNet. -For demonstration purposes, we will here first change the seuratObject of the data described above, such that it can be used in this setting. +For demonstration purposes, we will change the Seurat object of the same dataset such that it can be used in this setting. ```{r} -seuratObj@meta.data$celltype = paste(seuratObj@meta.data$celltype,seuratObj@meta.data$aggregate, sep = "_") - -seuratObj@meta.data$celltype %>% table() - -seuratObj = SetIdent(seuratObj,value = "celltype") +seuratObj <- SetIdent(seuratObj, value = paste(seuratObj$celltype, seuratObj$aggregate, sep = "_")) +Idents(seuratObj) %>% table() ``` Now perform the NicheNet analysis to explain differential expression between the 'affected' cell population 'CD8 T cells after LCMV infection' and the reference cell population 'CD8 T cells in steady-state' by ligands expressed by monocytes and DCs after LCMV infection. ```{r} -nichenet_output = nichenet_seuratobj_cluster_de( +nichenet_output <- nichenet_seuratobj_cluster_de( seurat_obj = seuratObj, - receiver_reference = "CD8 T_SS", receiver_affected = "CD8 T_LCMV", - sender = c("DC_LCMV","Mono_LCMV"), - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) + receiver_reference = "CD8 T_SS", + receiver_affected = "CD8 T_LCMV", + sender = c("DC_LCMV", "Mono_LCMV"), + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks) ``` -Check the top-ranked ligands and their target genes +Check the top-ranked ligands and their target genes: -```{r, fig.width = 12} +```{r summary-vis-cluster-de, fig.width = 12} nichenet_output$ligand_activity_target_heatmap ``` -Check the expression of the top-ranked ligands +Check the expression of the top-ranked ligands: -```{r} -DotPlot(seuratObj, features = nichenet_output$top_ligands %>% rev(), cols = "RdYlBu") + RotatedAxis() +```{r dotplot-cluster-de, fig.width=12,fig.height=6} +DotPlot(seuratObj, features = nichenet_output$top_ligands %>% rev(), cols = "RdYlBu") + + RotatedAxis() ``` -It could be interested to check which top-ranked ligands are differentially expressed in monocytes after LCMV infection +It could be interesting to check which top-ranked ligands are differentially expressed in monocytes after LCMV infection: + ```{r} -Mono_upregulated_ligands = FindMarkers(seuratObj, ident.1 = "Mono_LCMV", ident.2 = "Mono_SS") %>% rownames_to_column("gene") %>% filter(avg_log2FC > 0.25 & p_val_adj <= 0.05) %>% pull(gene) %>% intersect(nichenet_output$top_ligands) +Mono_upregulated_ligands <- FindMarkers(seuratObj, ident.1 = "Mono_LCMV", ident.2 = "Mono_SS") %>% + rownames_to_column("gene") %>% filter(avg_log2FC > 0.25 & p_val_adj <= 0.05) %>% + pull(gene) %>% intersect(nichenet_output$top_ligands) -print("Monocyte ligands upregulated after LCMV infection and explaining DE between CD8T-StSt and CD8T-LCMV are: ") +print("Monocyte ligands upregulated after LCMV infection and explaining DE between CD8T-SS and CD8T-LCMV are: ") print(Mono_upregulated_ligands) ``` # Remarks 1. Top-ranked ligands and target genes shown here differ from the predictions shown in the respective case study in the NicheNet paper because a different definition of expressed genes was used. -2. Differential expression is here done via the classical Wilcoxon test used in Seurat to define marker genes of a cell cluster by comparing it to other clusters. This is not optimal if you would have repeated samples for your conditions. In such a case, we recommend to follow the vignette [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md):`vignette("seurat_steps", package="nichenetr")` and tweak the differential expression step there (and perform the analysis e.g. as discussed in https://github.com/HelenaLC/muscat). +2. Differential expression is here done via the classical Wilcoxon test used in Seurat to define marker genes of a cell cluster by comparing it to other clusters. This is not optimal if you would have repeated samples for your conditions. In such a case, we recommend to follow the vignette [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md) and tweak the differential expression step there (and perform the analysis e.g., as discussed in https://github.com/HelenaLC/muscat). + + +```{r} +sessionInfo() +``` # References diff --git a/vignettes/seurat_wrapper.md b/vignettes/seurat_wrapper.md index b6460b0..df678e2 100644 --- a/vignettes/seurat_wrapper.md +++ b/vignettes/seurat_wrapper.md @@ -8,71 +8,53 @@ rmarkdown::render("vignettes/seurat_wrapper.Rmd", output_format = "github_docume --> In this vignette, you can learn how to perform a basic NicheNet analysis -on a Seurat v3/v4 object. Such a NicheNet analysis can help you to -generate hypotheses about an intercellular communication process of -interest for which you have single-cell gene expression data as a Seurat -object. Specifically, NicheNet can predict 1) which ligands from one or -more cell population(s) (“sender/niche”) are most likely to affect -target gene expression in an interacting cell population +on a Seurat (v3-v5) object containing single-cell expression data. +**Assuming you have captured the changes in gene expression resulting +from your cell-cell communication (CCC) process of interest,** a +NicheNet analysis can help you to generate hypotheses about the CCC +process. Specifically, NicheNet can predict 1) which ligands from the +microenvironment or cell population(s) (“sender/niche”) are most likely +to affect target gene expression in an interacting cell population (“receiver/target”) and 2) which specific target genes are affected by which of these predicted ligands. -Because NicheNet studies how ligands affect gene expression in -putatively neighboring/interacting cells, you need to have data about -this effect in gene expression you want to study. So, there need to be -‘some kind of’ differential expression in a receiver cell population, -caused by ligands from one of more interacting sender cell populations. - -In this vignette, we demonstrate the use of NicheNet on a Seurat Object. The wrapper function we will show consists of the same different steps -that are discussed in detail in the main, basis, NicheNet vignette -[NicheNet’s ligand activity analysis on a gene set of interest: predict -active ligands and their target -genes](ligand_activity_geneset.md):`vignette("ligand_activity_geneset", package="nichenetr")`. -Make sure you understand the different steps in a NicheNet analysis that -are described in that vignette before proceeding with this vignette and -performing a real NicheNet analysis on your data. In another vignette -[Perform NicheNet analysis starting from a Seurat object: step-by-step -analysis](seurat_steps.md):`vignette("seurat_steps", package="nichenetr")`, -we also show the execution of these steps one for one, but in contrast -to the main vignette now specifically for a Seurat Object. This allows -users to adapt specific steps of the pipeline to make them more -appropriate for their data (recommended). +that are discussed in detail in the main NicheNet vignette [Perform +NicheNet analysis starting from a Seurat object: step-by-step +analysis](seurat_steps.md).Please make sure you understand the different +steps described in this vignette before performing a real NicheNet +analysis on your data. We generally recommend the step-by-step analysis +as it allows users to adapt specific steps of the pipeline to make them +more appropriate for their data. + +To perform a NicheNet analysis, three features are extracted from the +input data: the potential ligands, the gene set of interest, and the +background gene set. This vignette will extract each feature as +described in this flowchart: + + As example expression data of interacting cells, we will use mouse -NICHE-seq data from Medaglia et al. to explore intercellular -communication in the T cell area in the inguinal lymph node before and -72 hours after lymphocytic choriomeningitis virus (LCMV) infection -(Medaglia et al. 2017). We will NicheNet to explore immune cell -crosstalk in response to this LCMV infection. - -In this dataset, differential expression is observed between CD8 T cells -in steady-state and CD8 T cells after LCMV infection. NicheNet can be -applied to look at how several immune cell populations in the lymph node -(i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can -regulate and induce these observed gene expression changes. NicheNet -will specifically prioritize ligands from these immune cells and their -target genes that change in expression upon LCMV infection. - -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) -and the [Seurat object of the processed NICHE-seq single-cell +NICHE-seq data to explore intercellular communication in the T cell area +in the inguinal lymph node before and 72 hours after lymphocytic +choriomeningitis virus (LCMV) infection (Medaglia et al. 2017). We will +focus on CD8 T cells as the receiver population, and as this dataset +contains two conditions (before and after LCMV infection), the +differentially expressed genes between these two conditions in CD8 T +cells will be used as our gene set of interest. We will then prioritize +which ligands from the microenvironment (sender-agnostic approach) and +from specific immune cell populations like monocytes, dendritic cells, +NK cells, B cells, and CD4 T cells (sender-focused approach) can +regulate and induce these observed gene expression changes. + +The [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and +the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. # Prepare NicheNet analysis -## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks. - -The NicheNet ligand-receptor network and weighted networks are necessary -to define and show possible ligand-receptor interactions between two -cell populations. The ligand-target matrix denotes the prior potential -that particular ligands might regulate the expression of particular -target genes. This matrix is necessary to prioritize possible -ligand-receptor interactions based on observed gene expression effects -(i.e. NicheNet’s ligand activity analysis) and infer affected target -genes of these prioritized ligands. - -### Load Packages: +### Load packages ``` r library(nichenetr) # Please update to v2.0.4 @@ -84,23 +66,33 @@ library(tidyverse) If you would use and load other packages, we recommend to load these 3 packages after the others. -### Read in NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks: +### Read in NicheNet’s networks + +The ligand-target prior model, ligand-receptor network, and weighted +integrated networks are needed for this vignette. The ligand-target +prior model is a matrix describing the potential that a ligand may +regulate a target gene, and it is used to run the ligand activity +analysis. The ligand-receptor network contains information on potential +ligand-receptor bindings, and it is used to identify potential ligands. +Finally, the weighted ligand-receptor network contains weights +representing the potential that a ligand will bind to a receptor, and it +is used for visualization. ``` r -organism = "mouse" +organism <- "mouse" if(organism == "human"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) - weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds")) } else if(organism == "mouse"){ - lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) - ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) - weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) + lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) + ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) + weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) } -lr_network = lr_network %>% distinct(from, to) +lr_network <- lr_network %>% distinct(from, to) head(lr_network) ## # A tibble: 6 × 2 ## from to @@ -119,7 +111,6 @@ ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns ## 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 2.585820e-03 ## 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 2.632082e-03 -weighted_networks_lr = weighted_networks$lr_sig %>% inner_join(lr_network, by = c("from","to")) head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network ## # A tibble: 6 × 3 ## from to weight @@ -142,24 +133,24 @@ head(weighted_networks$gr) # interactions and their weights in the gene regulato ## 6 0610010K14Rik Alox12 0.128 ``` -### Read in the expression data of interacting cells: +### Read in the expression data of interacting cells -The dataset used here is publicly available single-cell data from immune -cells in the T cell area of the inguinal lymph node. The data was -processed and aggregated by applying the Seurat alignment pipeline. The -Seurat object contains this aggregated data. Note that this should be a -Seurat v3/v4 object and that gene should be named by their official -mouse/human gene symbol. If your expression data has the older gene -symbols, you may want to use our alias conversion function to avoid the -loss of gene names. +We processed and aggregated the original dataset by using the Seurat +alignment pipeline. As we created this object using Seurat v3, it has to +be updated with `UpdateSeuratObject`. Note that genes should be named by +their official mouse/human gene symbol. If your expression data has the +older gene symbols, you may want to use our alias conversion function to +avoid the loss of gene names. ``` r -seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) # For newer Seurat versions, you may need to run the following seuratObj <- UpdateSeuratObject(seuratObj) -seuratObj = alias_to_symbol_seurat(seuratObj, "mouse") # convert gene names +# Convert gene names +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") + seuratObj@meta.data %>% head() ## nGene nUMI orig.ident aggregate res.0.6 celltype nCount_RNA nFeature_RNA ## W380370 880 1611 LN_SS SS 1 CD8 T 1607 876 @@ -172,21 +163,22 @@ seuratObj@meta.data %>% head() Visualize which cell populations are present: CD4 T cells (including regulatory T cells), CD8 T cells, B cells, NK cells, dendritic cells -(DCs) and inflammatory monocytes +(DCs) and inflammatory monocytes. ``` r -seuratObj@meta.data$celltype %>% table() # note that the number of cells of some cell types is very low and should preferably be higher for a real application +# Note that the number of cells of some cell types is very low and should preferably be higher for a real application +seuratObj@meta.data$celltype %>% table() ## . ## B CD4 T CD8 T DC Mono NK Treg ## 382 2562 1645 18 90 131 199 DimPlot(seuratObj, reduction = "tsne") ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-4-1.png) +![](seurat_wrapper_files/figure-gfm/umap-1-1.png) Visualize the data to see to which condition cells belong. The metadata -dataframe column that denotes the condition (steady-state or after LCMV -infection) is here called ‘aggregate’. +column that denotes the condition (steady-state or after LCMV infection) +is here called ‘aggregate’. ``` r seuratObj@meta.data$aggregate %>% table() @@ -196,83 +188,106 @@ seuratObj@meta.data$aggregate %>% table() DimPlot(seuratObj, reduction = "tsne", group.by = "aggregate") ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-5-1.png) +![](seurat_wrapper_files/figure-gfm/umap-2-1.png) # Perform the NicheNet analysis In this case study, we want to apply NicheNet to predict which ligands -expressed by all immune cells in the T cell area of the lymph node are -most likely to have induced the differential expression in CD8 T cells -after LCMV infection. +expressed by the microenvironment (sender-agnostic) and immune cells in +the T cell area of the lymph node (sender-focused) are most likely to +have induced the differential expression in CD8 T cells after LCMV +infection. In contrary to NicheNet v1 where we only used the +“sender-focused” approach, we now recommend users to run both the +“sender-agnostic” approach and “sender-focused” approach. These +approaches only affect the list of potential ligands that are considered +for prioritization. As described in the flowchart above, we do not +define any sender populations in the ‘sender agnostic’ approach but +consider all ligands for which its cognate receptor is expressed in the +receiver population. The sender-focused approach will then filter the +list of ligands to ones where the ligands are expressed in the sender +cell population(s). As described in the main vignette, the pipeline of a basic NicheNet -analysis consist of the following steps: - -- 1. Define a “sender/niche” cell population and a “receiver/target” - cell population present in your expression data and determine - which genes are expressed in both populations - -- 2. Define a gene set of interest: these are the genes in the - “receiver/target” cell population that are potentially affected by - ligands expressed by interacting cells (e.g. genes differentially - expressed upon cell-cell interaction) - -- 3. Define a set of potential ligands: these are ligands that are - expressed by the “sender/niche” cell population and bind a - (putative) receptor expressed by the “receiver/target” population - -- 4) Perform NicheNet ligand activity analysis: rank the potential - ligands based on the presence of their target genes in the gene - set of interest (compared to the background set of genes) - -- 5) Infer receptors and top-predicted target genes of ligands that are - top-ranked in the ligand activity analysis - -All these steps are contained in one of three following similar single -functions: `nichenet_seuratobj_aggregate`, -`nichenet_seuratobj_cluster_de` and -`nichenet_seuratobj_aggregate_cluster_de`. - -In addition to these steps, the function `nichenet_seuratobj_aggregate` -that is used for the analysis when having two conditions will also -calculate differential expression of the ligands in the sender cell -type. Note that this ligand differential expression is not used for -prioritization and ranking of the ligands! - -## NicheNet analysis on Seurat object: explain differential expression between two conditions - -In this case study, the receiver cell population is the ‘CD8 T’ cell -population, whereas the sender cell populations are ‘CD4 T’, ‘Treg’, -‘Mono’, ‘NK’, ‘B’ and ‘DC’. The above described functions will consider -a gene to be expressed when it is expressed in at least a predefined -fraction of cells in one cluster (default: 10%). - -The gene set of interest are the genes differentially expressed in CD8 T -cells after LCMV infection. The condition of interest is thus ‘LCMV’, -whereas the reference/steady-state condition is ‘SS’. The notion of -conditions can be extracted from the metadata column ‘aggregate’, the -method to calculate the differential expression is the standard Seurat -Wilcoxon test. - -The number of top-ranked ligands that are further used to predict active -target genes and construct an active ligand-receptor network is 30 by -default. - -To perform the NicheNet analysis with these specifications, run the -following: - -``` r -# indicated cell types should be cell class identities -# check via: -# seuratObj %>% Idents() %>% table() -nichenet_output = nichenet_seuratobj_aggregate( +analysis consist of the following steps: \* 1. Define a set of potential +ligands for both the sender-agnostic and sender-focused approach \* 2. +Define the gene set of interest: these are the genes in the +“receiver/target” cell population that are potentially affected by +ligands expressed by interacting cells (e.g. genes differentially +expressed upon cell-cell interaction) \* 3. Define the background genes +\* 4. Perform NicheNet ligand activity analysis: rank the potential +ligands based on the presence of their target genes in the gene set of +interest (compared to the background set of genes) \* 5. Infer target +genes and receptors of top-ranked ligands + +All these steps are contained in one of three wrapper functions: +`nichenet_seuratobj_aggregate`, `nichenet_seuratobj_cluster_de` and +`nichenet_seuratobj_aggregate_cluster_de`. These functions differ on how +the gene set of interest is calculated, as follows: + +| **Function** | **Gene set of interest** | **Background genes** | +|-----------------------------------------|----------------------------------------------------|----------------------------------------| +| nichenet_seuratobj_aggregate | DE between two conditions of the same cell type | All expressed genes in the cell type | +| nichenet_seuratobj_cluster_de | DE between two cell types | All expressed genes in both cell types | +| nichenet_seuratobj_aggregate_cluster_de | DE between two cell types from specific conditions | All expressed genes in both cell types | + +**Note:** Cell types should be the identities of the seurat object +(check using `table(Idents(seuratObj))`) + +## `nichenet_seuratobj_aggregate`: explain differential expression between two conditions + +For the sender-agnostic approach the sender is set to ‘undefined’. The +receiver cell population is the ‘CD8 T’ cell population, and the gene +set of interest are the genes differentially expressed in CD8 T cells +after LCMV infection. Thus, the condition of interest is ‘LCMV’, whereas +the reference/steady-state condition is ‘SS’. The column containing +condition information is ‘aggregate’. The method to calculate +differential expression is the standard Seurat Wilcoxon test. To use +other methods, users will have to go through step-by-step analysis. The +number of top-ranked ligands that are further used to predict active +target genes and construct an active ligand-receptor network is 30 +(`top_n_ligands`). The number of target genes to consider per ligand +when performing the target gene inference is 200 (`top_n_targets`). We +only retain ligands and receptors that are expressed in at least a +predefined fraction of cells in one cluster (`expression_pct`, default: +10%). + +``` r +nichenet_output_agnostic <- nichenet_seuratobj_aggregate( seurat_obj = seuratObj, + sender = "undefined", receiver = "CD8 T", - condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", + condition_colname = "aggregate", + condition_oi = "LCMV", + condition_reference = "SS", + expression_pct = 0.05, + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks + ) +## [1] "Read in and process NicheNet's networks" +## [1] "Define expressed ligands and receptors in receiver and sender cells" +## [1] "Perform DE analysis in receiver cell" +## [1] "Perform NicheNet ligand activity analysis" +## [1] "Infer active target genes of the prioritized ligands" +## [1] "Infer receptors of the prioritized ligands" +``` + +For the sender-focused approach, simply provide one or more sender +populations: + +``` r +nichenet_output <- nichenet_seuratobj_aggregate( + seurat_obj = seuratObj, sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), + receiver = "CD8 T", + condition_colname = "aggregate", + condition_oi = "LCMV", + condition_reference = "SS", + expression_pct = 0.05, ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, - weighted_networks = weighted_networks) + weighted_networks = weighted_networks + ) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis in receiver cell" @@ -282,60 +297,84 @@ nichenet_output = nichenet_seuratobj_aggregate( ## [1] "Perform DE analysis in sender cells" ``` +**Note:** It is also possible that you want to consider all cell types +present as possible sender cell by defining `sender = "all"`. This also +includes the receiver cell type, making that you can look at autocrine +signaling as well. + ### Interpret the NicheNet analysis output +We will investigate the output of the sender-focused approach. + +``` r +names(nichenet_output) +## [1] "ligand_activities" "top_ligands" "top_targets" +## [4] "top_receptors" "ligand_target_matrix" "ligand_target_heatmap" +## [7] "ligand_target_df" "ligand_expression_dotplot" "ligand_differential_expression_heatmap" +## [10] "ligand_activity_target_heatmap" "ligand_receptor_matrix" "ligand_receptor_heatmap" +## [13] "ligand_receptor_df" "geneset_oi" "background_expressed_genes" +``` + #### Ligand activity analysis results -A first thing NicheNet does, is prioritizing ligands based on predicted -ligand activity. To see the ranking of these ligands, run the following -command: +To see the ranking of ligands based on the predicted ligand activity: ``` r nichenet_output$ligand_activities -## # A tibble: 73 × 6 +## # A tibble: 127 × 6 ## test_ligand auroc aupr aupr_corrected pearson rank ## -## 1 Ebi3 0.663 0.390 0.244 0.301 1 -## 2 Ptprc 0.642 0.310 0.165 0.167 2 -## 3 H2-M3 0.608 0.292 0.146 0.179 3 -## 4 H2-M2 0.611 0.279 0.133 0.153 5 -## 5 H2-T10 0.611 0.279 0.133 0.153 5 -## 6 H2-T22 0.611 0.279 0.133 0.153 5 -## 7 H2-T23 0.611 0.278 0.132 0.153 7 -## 8 H2-K1 0.605 0.268 0.122 0.142 8 -## 9 H2-Q4 0.605 0.268 0.122 0.141 10 -## 10 H2-Q6 0.605 0.268 0.122 0.141 10 -## # ℹ 63 more rows -``` - -The different ligand activity measures (auroc, aupr, pearson correlation -coefficient) are a measure for how well a ligand can predict the -observed differentially expressed genes compared to the background of -expressed genes. In our validation study, we showed that the area under -the precision-recall curve (AUPR) between a ligand’s target predictions -and the observed transcriptional response was the most informative -measure to define ligand activity (this was the Pearson correlation for -v1). Therefore, NicheNet ranks the ligands based on their AUPR. This -allows us to prioritize ligands inducing the antiviral response in CD8 T -cells. - -To get a list of the 30 top-ranked ligands: run the following command +## 1 Il27 0.682 0.391 0.316 0.445 1 +## 2 Ebi3 0.666 0.264 0.189 0.256 2 +## 3 Tnf 0.671 0.205 0.131 0.249 3 +## 4 Ptprc 0.660 0.198 0.124 0.168 4 +## 5 H2-Eb1 0.656 0.195 0.120 0.182 5 +## 6 Vsig10 0.649 0.194 0.119 0.170 6 +## 7 H2-M3 0.632 0.192 0.118 0.185 7 +## 8 Clcf1 0.637 0.175 0.101 0.162 8 +## 9 H2-M2 0.634 0.174 0.0989 0.146 11 +## 10 H2-T-ps 0.634 0.174 0.0989 0.146 11 +## # ℹ 117 more rows +``` + +Ligands are ranked based on the area under the precision-recall curve +(AUPR) between a ligand’s target predictions and the observed +transcriptional response. Although other metrics like the AUROC and +pearson correlation coefficient are also computed, we demonstrated in +our validation study that the AUPRwas the most informative measure to +define ligand activity (this was the Pearson correlation for v1). The +vignette on how we performed the validation can be found at [Evaluation +of NicheNet’s ligand-target predictions](model_evaluation.md). + +To get a list of the top 30 ligands: ``` r nichenet_output$top_ligands -## [1] "Ebi3" "Ptprc" "H2-M3" "H2-M2" "H2-T10" "H2-T22" "H2-T23" "H2-K1" "H2-Q4" "H2-Q6" "H2-Q7" "H2-D1" "Sirpa" "Cd48" "Tgfb1" "Ccl22" "App" "Selplg" "Cxcl10" "Btla" "Adam17" "Icam1" "Cxcl11" "Tgm2" "B2m" -## [26] "Cxcl9" "Cd72" "Hp" "Itgb2" "Vcan" +## [1] "Il27" "Ebi3" "Tnf" "Ptprc" "H2-Eb1" "Vsig10" "H2-M3" "Clcf1" "H2-M2" "H2-T-ps" "H2-T10" "H2-T22" "H2-T24" "H2-T23" +## [15] "H2-K1" "H2-Q4" "H2-Q6" "H2-Q7" "H2-D1" "H2-Oa" "Il18bp" "Sirpa" "Cd48" "App" "Ccl22" "Siglech" "Ccl5" "Siglec1" +## [29] "Cd320" "Adam17" ``` -These ligands are expressed by one or more of the input sender cells. To -see which cell population expresses which of these top-ranked ligands, -you can run the following: +Below we will show visualizations that are in the output object. In some +cases (including this one), not all top ligands that are present in +`top_ligands` will be shown in the plot. The left-out ligands are +ligands that don’t have target genes with high enough regulatory +potential scores, and therefore did not survive the used cutoffs (in the +functions `get_weighted_ligand_target_links` and +`prepare_ligand_target_visualization` that are run internally). To +include them, you can increase the number of target genes considered or +be less stringent in the used cutoffs (`top_n_targets` and +`cutoff_visualization` , respectively). In this case, CCl22 (ranked +25th) is missing from the plots. + +To see which sender cell population expresses which of the top-ranked +ligands: ``` r nichenet_output$ligand_expression_dotplot ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-9-1.png) +![](seurat_wrapper_files/figure-gfm/dotplot-sender-1.png) As you can see, most op the top-ranked ligands seem to be mainly expressed by dendritic cells and monocytes. @@ -347,38 +386,40 @@ differentially expressed after LCMV infection. nichenet_output$ligand_differential_expression_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-10-1.png) +![](seurat_wrapper_files/figure-gfm/lfc-heatmap-1.png) -As you can see, most op the top-ranked ligands seem also to be -upregulated themselves in monocytes after viral infection. This is not a -prerequisite to be top-ranked (cf: ranking only determined based on -enrichment of target genes among DE genes in the receiver, CD8T cells), -but is nice additional “evidence” that these ligands might indeed be -important. +Although this ligand differential expression is not used for +prioritization and ranking of the ligands (the ranking is only +determined based on enrichment of target genes among DE genes in the +receiver, CD8T cells), most of the top-ranked ligands also seem to be +upregulated themselves in monocytes after viral infection. This is nice +additional “evidence” that these ligands might indeed be important. #### Inferred active ligand-target links -NicheNet also infers active target genes of these top-ranked ligands. To -see which top-ranked ligands are predicted to have regulated the -expression of which differentially expressed genes, you can run -following command for a heatmap visualization: +NicheNet also infers active target genes of these top-ranked ligands, +best visualized with the following heatmap showing which top-ranked +ligands are predicted to have regulated the expression of which +differentially expressed genes: ``` r nichenet_output$ligand_target_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-11-1.png) +![](seurat_wrapper_files/figure-gfm/ligand-target-heatmap-1.png) -This is a normal ggplot object that can be adapted likewise. For example -if you want to change the color code to blue instead of purple, change -the axis ticks of the legend, and change the axis labels of the heatmap, -you can do the following: +This is a normal ggplot object that can be adapted accordingly. For +example if you want to change the color code to blue instead of purple, +change the axis ticks of the legend, and change the axis labels of the +heatmap, you can do the following: ``` r -nichenet_output$ligand_target_heatmap + scale_fill_gradient2(low = "whitesmoke", high = "royalblue", breaks = c(0,0.0045,0.009)) + xlab("anti-LCMV response genes in CD8 T cells") + ylab("Prioritized immmune cell ligands") +nichenet_output$ligand_target_heatmap + + scale_fill_gradient2(low = "whitesmoke",high = "royalblue") + + xlab("Anti-LCMV response genes in CD8 T cells") + ylab("Prioritized immmune cell ligands") ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-12-1.png) +![](seurat_wrapper_files/figure-gfm/ligand-target-heatmap-adapted-1.png) If you want, you can also extract the ligand-target links and their regulatory potential scores in matrix or data frame format (e.g. for @@ -386,92 +427,88 @@ visualization in other ways or output to a csv file). ``` r nichenet_output$ligand_target_matrix %>% .[1:10,1:6] -## Bst2 Cd274 Cxcl10 Cxcr4 Ddit4 Ddx58 -## Vcan 0.000000000 0.00000000 0.00000000 0.007730215 0.008496498 0.00000000 -## Itgb2 0.000000000 0.00000000 0.00000000 0.009843522 0.009705963 0.00000000 -## Hp 0.000000000 0.00000000 0.00000000 0.008886796 0.010263817 0.00000000 -## Cd72 0.000000000 0.00000000 0.00000000 0.008311072 0.009318998 0.00000000 -## B2m 0.000000000 0.00000000 0.00000000 0.009044523 0.010623390 0.00000000 -## Tgm2 0.010030030 0.00000000 0.04939643 0.014778849 0.015946489 0.04583594 -## Cxcl11 0.000000000 0.00000000 0.00000000 0.007855882 0.000000000 0.00000000 -## Icam1 0.008581207 0.00000000 0.01196470 0.012707198 0.014780600 0.00000000 -## Adam17 0.008167279 0.06549177 0.01109420 0.071842451 0.014236968 0.00000000 -## Btla 0.000000000 0.00000000 0.00000000 0.000000000 0.008851392 0.00000000 +## Adar B2m Bst2 Calhm6 Cd274 Cxcl10 +## Adam17 0 0.000000000 0.008167279 0 0.06549177 0.011094196 +## Cd320 0 0.000000000 0.000000000 0 0.00000000 0.000000000 +## Siglec1 0 0.000000000 0.000000000 0 0.00000000 0.000000000 +## Ccl5 0 0.000000000 0.000000000 0 0.00000000 0.008424993 +## Siglech 0 0.008857572 0.011974948 0 0.01257584 0.008780173 +## App 0 0.000000000 0.000000000 0 0.04432138 0.000000000 +## Cd48 0 0.000000000 0.000000000 0 0.00000000 0.000000000 +## Sirpa 0 0.000000000 0.000000000 0 0.00000000 0.007796006 +## Il18bp 0 0.000000000 0.000000000 0 0.00000000 0.007808540 +## H2.Oa 0 0.000000000 0.000000000 0 0.00000000 0.008143571 ``` ``` r nichenet_output$ligand_target_df # weight column = regulatory potential -## # A tibble: 510 × 3 -## ligand target weight -## -## 1 Ebi3 Bst2 0.0500 -## 2 Ebi3 Cd274 0.0504 -## 3 Ebi3 Cxcl10 0.0570 -## 4 Ebi3 Cxcr4 0.0430 -## 5 Ebi3 Ddit4 0.0485 -## 6 Ebi3 Ddx58 0.0402 -## 7 Ebi3 Ddx60 0.0488 -## 8 Ebi3 Dhx58 0.0406 -## 9 Ebi3 Dtx3l 0.0405 -## 10 Ebi3 Eif2ak2 0.0400 -## # ℹ 500 more rows +## # A tibble: 656 × 3 +## ligand target weight +## +## 1 Il27 Adar 0.163 +## 2 Il27 B2m 0.170 +## 3 Il27 Bst2 0.111 +## 4 Il27 Calhm6 0.129 +## 5 Il27 Cd274 0.111 +## 6 Il27 Cxcl10 0.178 +## 7 Il27 Cxcr4 0.178 +## 8 Il27 Ddx58 0.227 +## 9 Il27 Ddx60 0.160 +## 10 Il27 Dtx3l 0.150 +## # ℹ 646 more rows ``` To get a list of the top-predicted target genes of the 30 top-ranked -ligands: run the following command +ligands: ``` r nichenet_output$top_targets -## [1] "Bst2" "Cd274" "Cxcl10" "Cxcr4" "Ddit4" "Ddx58" "Ddx60" "Dhx58" "Dtx3l" "Eif2ak2" "Gbp7" "H2-D1" "H2-K1" "H2-M3" "H2-Q6" "H2-Q7" "Ifi35" "Ifit1bl1" "Ifit3" "Ifit3b" -## [21] "Irf1" "Irf7" "Irf9" "Isg15" "Lcp1" "Lgals3bp" "Mx1" "Mx2" "Nampt" "Nmi" "Oas1a" "Oas2" "Oas3" "Parp14" "Parp9" "Pml" "Psmb9" "Rsad2" "Stat1" "Stat2" -## [41] "Tap1" "Xaf1" "Zbp1" "Cd69" "H3f3b" "Id3" "Ifi44" "Ifih1" "H2-T10" "H2-T22" "H2-T23" "Vim" "Ifit2" "Isg20" "Gbp3" "Hspa5" "Ifit1" "Nt5c3" "Igfbp4" "Gbp2" -## [61] "Ifi203" "Ifi206" "Ifi208" "Ifi209" "Ifi213" "Mndal" "Ube2l6" +## [1] "Adar" "B2m" "Bst2" "Calhm6" "Cd274" "Cxcl10" "Cxcr4" "Ddx58" "Ddx60" "Dtx3l" "Eif2ak2" "Gbp2" "Gbp3" +## [14] "Gbp7" "H2-D1" "H2-K1" "H2-M3" "H2-Q6" "H2-Q7" "H2-T10" "H2-T22" "H2-T23" "Ifi203" "Ifi206" "Ifi208" "Ifi209" +## [27] "Ifi213" "Ifi35" "Ifi44" "Ifih1" "Ifit1bl1" "Ifit2" "Ifit3" "Ifit3b" "Ifitm3" "Irf1" "Irf7" "Irf9" "Lgals3bp" +## [40] "Ly6e" "Mndal" "Mx1" "Mx2" "Nampt" "Nlrc5" "Nmi" "Oas2" "Oas3" "Parp12" "Parp14" "Parp9" "Pml" +## [53] "Psmb8" "Psmb9" "Psme1" "Psme2b" "Rnf213" "Samhd1" "Sp110" "Stat1" "Stat2" "Tap1" "Tapbp" "Tnfsf10" "Trafd1" +## [66] "Ube2l6" "Xaf1" "Ddit4" "Dhx58" "Gzmb" "Isg15" "Lcp1" "Oas1a" "Oas1g" "Rsad2" "Zbp1" "Cd47" "Ctss" +## [79] "Trim21" "Cd69" "H3f3b" "Id3" "Vim" "Isg20" "Oasl1" "Hspa5" "Ifit1" "Nt5c3" "Usp18" "Basp1" "Plac8" +## [92] "Sp100" "Sp140" "Ubc" ``` -You can visualize the expression of these as well. Because we only focus -on CD8 T cells as receiver cells, we will only show expression in these -cells. To emphasize that these target genes are differentially -expressed, we split cells up in steadys-state cells and cells after -response to LCMV infection. +You can visualize the expression of these target genes as well (only the +top 50 are shown here). Because we only focus on CD8 T cells as receiver +cells, we will only show expression in these cells. To emphasize that +these target genes are differentially expressed, we split cells up in +steady-state cells and cells after response to LCMV infection. ``` r -DotPlot(seuratObj %>% subset(idents = "CD8 T"), features = nichenet_output$top_targets %>% rev(), split.by = "aggregate") + RotatedAxis() +DotPlot(seuratObj %>% subset(idents = "CD8 T"), + features = nichenet_output$top_targets[1:50] %>% + rev(), split.by = "aggregate") + coord_flip() ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-16-1.png) +![](seurat_wrapper_files/figure-gfm/dotplot-condition-1.png) ``` r -VlnPlot(seuratObj %>% subset(idents = "CD8 T"), features = c("Zbp1","Ifit3","Irf7"), split.by = "aggregate", pt.size = 0, combine = FALSE) -## [[1]] +VlnPlot(seuratObj %>% subset(idents = "CD8 T"), + features = c("Ptprc", "H2-M3", "Cxcl10"), split.by = "aggregate", pt.size = 0, combine = TRUE) ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-17-1.png) - - ## - ## [[2]] - -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-17-2.png) - - ## - ## [[3]] +![](seurat_wrapper_files/figure-gfm/violin-plot-1.png) -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-17-3.png) - -To visualize ligand activities, expression, differential expression and -target genes of ligands, run the following command +The display the combined plot of ligand activities, expression, +differential expression and target genes of ligands: ``` r nichenet_output$ligand_activity_target_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-18-1.png) +![](seurat_wrapper_files/figure-gfm/summary-vis-1.png) -**important: above figure can be considered as one of the most important -summary figures of the NicheNet analysis. Here you can see which -ligand-receptor pairs have both high differential expression and ligand -activity (=target gene enrichment). These are very interesting +**Important: the above figure can be considered as one of the most +important summary figures of the NicheNet analysis. Here you can see +which ligand-receptor pairs have both high differential expression and +ligand activity (=target gene enrichment). These are very interesting predictions as key regulators of your intercellular communication -process of interest ! ** +process of interest!** #### Inferred ligand-receptor interactions for top-ranked ligands @@ -483,7 +520,7 @@ the ligand-receptor links: nichenet_output$ligand_receptor_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-19-1.png) +![](seurat_wrapper_files/figure-gfm/ligand-receptor-heatmap-1.png) If you want, you can also extract the ligand-receptor links and their interaction confidence scores in matrix or data frame format (e.g. for @@ -491,44 +528,43 @@ visualization in other ways or output to a csv file). ``` r nichenet_output$ligand_receptor_matrix %>% .[1:10,1:6] -## H2.T23 H2.T22 H2.T10 H2.Q7 H2.Q6 H2.Q4 -## Itgb2 0 0 0 0 0 0 -## Spn 0 0 0 0 0 0 -## Msn 0 0 0 0 0 0 -## Itgal 0 0 0 0 0 0 -## Ezr 0 0 0 0 0 0 -## Il2rg 0 0 0 0 0 0 -## Sell 0 0 0 0 0 0 -## Itga4 0 0 0 0 0 0 -## Selplg 0 0 0 0 0 0 -## Tap1 0 0 0 0 0 0 +## H2.T24 H2.T23 H2.T22 H2.T10 H2.T.ps H2.Q7 +## Il6ra 0 0 0 0 0 0 +## Itgb1 0 0 0 0 0 0 +## Notch1 0 0 0 0 0 0 +## Ptprc 0 0 0 0 0 0 +## Spn 0 0 0 0 0 0 +## Cd47 0 0 0 0 0 0 +## Cd69 0 0 0 0 0 0 +## Ccr7 0 0 0 0 0 0 +## Dpp4 0 0 0 0 0 0 +## Cd247 0 0 0 0 0 0 ``` ``` r nichenet_output$ligand_receptor_df # weight column accords to number of data sources that document this interaction -## # A tibble: 53 × 3 +## # A tibble: 54 × 3 ## ligand receptor weight ## -## 1 Adam17 Notch1 1.05 -## 2 App Cd74 0.670 -## 3 B2m Klrd1 0.733 -## 4 B2m Tap1 0.782 -## 5 B2m Tap2 0.834 -## 6 Btla Cd247 0.333 -## 7 Ccl22 Ccr7 0.679 -## 8 Ccl22 Dpp4 0.717 -## 9 Cd48 Cd2 0.964 -## 10 Cd72 Cd5 0.786 -## # ℹ 43 more rows +## 1 Adam17 Il6ra 0.447 +## 2 Adam17 Itgb1 0.454 +## 3 Adam17 Notch1 1.05 +## 4 App Cd74 0.670 +## 5 App Sorl1 0.922 +## 6 Ccl22 Ccr7 0.679 +## 7 Ccl22 Dpp4 0.717 +## 8 Ccl5 Cxcr3 0.848 +## 9 Cd320 Jaml 0.507 +## 10 Cd320 Tmem167 0.432 +## # ℹ 44 more rows ``` -To get a list of the receptors of the 20 top-ranked ligands: run the -following command +To get a list of the receptors of the 30 top-ranked ligands: ``` r nichenet_output$top_receptors -## [1] "Notch1" "Cd74" "Klrd1" "Tap1" "Tap2" "Cd247" "Ccr7" "Dpp4" "Cd2" "Cd5" "Il27ra" "Cd8a" "Itgb2" "Ezr" "Il2rg" "Itgal" "Msn" "Spn" "Cd82" "Thy1" "Sell" "Cd47" "Cd69" "Tgfbr2" "Itga4" -## [26] "Selplg" +## [1] "Il6ra" "Itgb1" "Notch1" "Cd74" "Sorl1" "Ccr7" "Dpp4" "Cxcr3" "Jaml" "Tmem167" "Cd2" "Il6st" "Il27ra" +## [14] "Cd8a" "Klrd1" "Cd4" "Cd247" "Cd47" "Ptprc" "Spn" "Cd69" "Tnfrsf1b" ``` You can visualize the expression of these as well. Because we only focus @@ -536,99 +572,59 @@ on CD8 T cells as receiver cells, we will only show expression in these cells. ``` r -DotPlot(seuratObj %>% subset(idents = "CD8 T"), features = nichenet_output$top_receptors %>% rev(), split.by = "aggregate") + RotatedAxis() +DotPlot(seuratObj %>% subset(idents = "CD8 T"), + features = nichenet_output$top_receptors %>% rev(), split.by = "aggregate") + + coord_flip() ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-23-1.png) +![](seurat_wrapper_files/figure-gfm/dotplot-receptors-1.png) If you are interested in checking which geneset (and background set of genes) was used during the ligand activity analysis: ``` r nichenet_output$geneset_oi -## [1] "Ifi27l2b" "Irf7" "Ly6a" "Stat1" "Ly6c2" "Ifit3" "Ifit1" "Ly6c1" "Bst2" "B2m" "Rnf213" "Ifit1bl1" "Plac8" "Slfn1" -## [15] "Ifi209" "Isg15" "Igtp" "Ifi206" "Shisa5" "Ms4a4c" "H2-K1" "Zbp1" "Oasl2" "Isg20" "Samhd1" "Ifi208" "Ms4a6b" "Trim30a" -## [29] "Usp18" "Mndal" "H2-T23" "Slfn8" "Gbp2" "Ifi203" "Iigp1" "Tmsb4x" "H2-T22" "Rsad2" "Ly6e" "Rtp4" "Ifit3b" "Zfas1" -## [43] "Ifit2" "Phf11b" "Xaf1" "Smchd1" "Daxx" "Alb" "Samd9l" "Actb" "Parp9" "Gbp4" "Lgals3bp" "Mx1" "Ifi213" "Irgm1" -## [57] "2410006H16Rik" "Gbp7" "Cmpk2" "Dtx3l" "Slfn5" "H2-D1" "Oasl1" "Herc6" "Ifih1" "Rpsa" "P2ry13" "Irgm2" "Tapbp" "Rps8" -## [71] "Stat2" "Ifi44" "Phf11c" "Rpl8" "Psmb8" "Gm12250" "Igfbp4" "Rplp2-ps1" "Ddx58" "Rac2" "Trafd1" "Sp100" "Gbp9" "Pml" -## [85] "Oas2" "Slfn2" "Psme1" "Apoe" "Gas5" "H2-Q7" "Basp1" "Ms4a4b" "Rps27a" "Cd52" "Znfx1" "Rpl13" "Oas3" "Nt5c3" -## [99] "Rnf114" "Tap1" "Rps28" "Oas1a" "Rplp0" "Ddx60" "Vim" "Gbp6" "Ifi35" "Itm2b" "Ctss" "Tgtp1" "Pabpc1" "H2-Q6" -## [113] "Parp14" "Hspa8" "Tor3a" "Rpl23" "Mx2" "Tmbim6" "Thy1" "Ncoa7" "Dhx58" "Rps10" "Rps19" "Psmb9" "Il2rg" "Etnk1" -## [127] "Irf9" "Rps3a1" "Gbp10" "1600014C10Rik" "Parp12" "Trim30d" "Eif2ak2" "Eef1b2" "Eef2" "Npc2" "Rps2" "Rps3" "Sp110" "Ube2l6" -## [141] "Nmi" "Uba7" "Psmb10" "Cxcl10" "Rpl13a" "Trim30c" "Nhp2" "Tbrg1" "Jaml" "Usp25" "Tor1aip2" "Adar" "Gzma" "Gm2000" -## [155] "Rps18-ps5" "Cd53" "Phf11" "Hspa5" "Cfl1" "Crip1" "Slco3a1" "Tlr7" "Trim21" "Gbp8" "Rpl10" "Mycbp2" "Rps16" "Nlrc5" -## [169] "Rplp2" "Acadl" "Trim12c" "Rps4x" "Irf1" "Psma2" "Nme2" "Tut4" "Apobec3" "Snord12" "Phip" "Ifitm3" "Sp140" "Dusp2" -## [183] "Mrpl30" "Malat1" "H2-M3" "Gbp3" "Tmsb10" "Dtx1" "Eef1g" "Rbl1" "Epb41l4aos" "Xpo1" "Rgcc" "Gm9844" "Rpl35" "Rps26" -## [197] "Cxcr4" "Eif3m" "Treml2" "Rpl35a" "Pdcd4" "Arrb2" "Ubc" "Clic4" "H2-T10" "Rpl10a" "Lcp1" "Cd274" "Ddit4" "Cnn2" -## [211] "Nampt" "Ascc3" "Cd47" "Snord49b" "Ilrun" "Calhm6" "Psme2b" "Hcst" "Myh9" "Rps27" "Mov10" "Gm15772" "Arf4" "Arhgdib" -## [225] "Ppib" "Ubb" "Trim25" "Tspo" "Id3" "Snord35a" "Rnf8" "Casp8" "Ptpn7" "Itk" "Rps27rt" "Cd69" "H3f3b" "Nop10" -## [239] "Anxa6" "Hk1" "Prkcb" "Iqgap1" "Keap1" "Rpl7" "Parp10" +## [1] "Ifi27l2b" "Irf7" "Ly6a" "Stat1" "Ly6c2" "Ifit3" "Ifit1" "Ly6c1" "Bst2" +## [10] "B2m" "Rnf213" "Ifit1bl1" "Plac8" "Slfn1" "Ifi209" "Isg15" "Igtp" "Ifi206" +## [19] "Shisa5" "Ms4a4c" "H2-K1" "Zbp1" "Oasl2" "Isg20" "Samhd1" "Ifi208" "Ms4a6b" +## [28] "Trim30a" "Usp18" "Mndal" "H2-T23" "Slfn8" "Gbp2" "Ifi203" "Iigp1" "Tmsb4x" +## [37] "H2-T22" "Rsad2" "Ly6e" "Rtp4" "Ifit3b" "Zfas1" "Ifit2" "Phf11b" "Xaf1" +## [46] "Smchd1" "Daxx" "Alb" "Samd9l" "Actb" "Parp9" "Gbp4" "Lgals3bp" "Mx1" +## [55] "Ifi213" "Irgm1" "2410006H16Rik" "Gbp7" "Cmpk2" "Dtx3l" "Slfn5" "H2-D1" "Oasl1" +## [64] "Herc6" "Ifih1" "Rpsa" "P2ry13" "Apoa2" "Irgm2" "Tapbp" "Rps8" "Stat2" +## [73] "Ifi44" "Phf11c" "Rpl8" "Psmb8" "Gm12250" "Igfbp4" "Rplp2-ps1" "Ddx58" "Rac2" +## [82] "Trafd1" "Sp100" "Gbp9" "Pml" "Oas2" "Slfn2" "Psme1" "Apoe" "Gas5" +## [91] "H2-Q7" "Basp1" "Ms4a4b" "Rps27a" "Cd52" "Znfx1" "Rpl13" "Ahsg" "Oas3" +## [100] "Nt5c3" "Rnf114" "Tap1" "Rps28" "Oas1a" "Rplp0" "Ddx60" "Vim" "Gbp6" +## [109] "Ifi35" "Itm2b" "Ctss" "Tgtp1" "Trf" "Pabpc1" "H2-Q6" "Parp14" "Hspa8" +## [118] "Tor3a" "Rpl23" "Mx2" "Tmbim6" "Thy1" "Ncoa7" "Dhx58" "Rps10" "Rps19" +## [127] "Psmb9" "Il2rg" "Etnk1" "Irf9" "Rps3a1" "Gbp10" "1600014C10Rik" "Parp12" "Trim30d" +## [136] "Eif2ak2" "Eef1b2" "Eef2" "Ncf2" "Npc2" "Rps2" "Rps3" "Sp110" "Ube2l6" +## [145] "Nmi" "Uba7" "Psmb10" "Cxcl10" "Rpl13a" "Trim30c" "Nhp2" "Tbrg1" "Jaml" +## [154] "Usp25" "Tor1aip2" "Adar" "Gzma" "Gm2000" "Rps18-ps5" "Cd53" "Phf11" "Hspa5" +## [163] "Cfl1" "Crip1" "Slco3a1" "Tlr7" "Trim21" "Gbp8" "Rpl10" "Mycbp2" "Rps16" +## [172] "Nlrc5" "Rplp2" "Acadl" "Trim12c" "Rps4x" "Irf1" "Psma2" "Nme2" "Tut4" +## [181] "Apobec3" "Snord12" "Phip" "Gzmb" "Ifitm3" "Sp140" "Dusp2" "Mrpl30" "Malat1" +## [190] "H2-M3" "Gbp3" "Tmsb10" "Dtx1" "Tmem184b" "Eef1g" "Rbl1" "Epb41l4aos" "Xpo1" +## [199] "Rgcc" "Gm9844" "Rpl35" "Rps26" "Il18bp" "Sdc3" "Cxcr4" "Eif3m" "Treml2" +## [208] "Rpl35a" "Lgals8" "Pdcd4" "Arrb2" "Ubc" "Clic4" "H2-T10" "Rpl10a" "Lcp1" +## [217] "Cd274" "Ddit4" "Cnn2" "Nampt" "Ascc3" "Ms4a6d" "Cd47" "Ogfrl1" "Snord49b" +## [226] "Ilrun" "Calhm6" "Psme2b" "Hcst" "Myh9" "Rps27" "Mov10" "Gm15772" "Arf4" +## [235] "Arhgdib" "Ppib" "Ubb" "Trim25" "Tspo" "Id3" "Snord35a" "Zup1" "Oas1g" +## [244] "Ms4a6c" "Rnf8" "Casp8" "Tnfsf10" "Ptpn7" "Itk" "Rps27rt" "Cd69" "H3f3b" +## [253] "Nop10" "Anxa6" "Hk1" "Prkcb" "Iqgap1" "Keap1" "Rpl7" "Parp10" nichenet_output$background_expressed_genes %>% length() -## [1] 1662 -``` - -### Rerun the NicheNet analysis with different sender cell definition - -Instead of focusing on multiple sender cell types, it is possible that -you are only interested in doing the analyis for one sender cell type, -such as dendritic cells in this case. - -``` r -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "DC", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) -## [1] "Read in and process NicheNet's networks" -## [1] "Define expressed ligands and receptors in receiver and sender cells" -## [1] "Perform DE analysis in receiver cell" -## [1] "Perform NicheNet ligand activity analysis" -## [1] "Infer active target genes of the prioritized ligands" -## [1] "Infer receptors of the prioritized ligands" -## [1] "Perform DE analysis in sender cells" - -nichenet_output$ligand_activity_target_heatmap -``` - -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-25-1.png) - -Instead of focusing on one or multiple predefined sender cell types, it -is also possible that you want to consider all cell types present as -possible sender cell. This also includes the receiver cell type, making -that you can look at autocrine signaling as well. - -``` r -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "all", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) -## [1] "Read in and process NicheNet's networks" -## [1] "Define expressed ligands and receptors in receiver and sender cells" -## [1] "Perform DE analysis in receiver cell" -## [1] "Perform NicheNet ligand activity analysis" -## [1] "Infer active target genes of the prioritized ligands" -## [1] "Infer receptors of the prioritized ligands" -## [1] "Perform DE analysis in sender cells" - -nichenet_output$ligand_activity_target_heatmap +## [1] 3476 ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png) - -In some cases, it could be possible that you don’t have data of -potential sender cells. If you still want to predict possible upstream -ligands that could have been responsible for the observed differential -expression in your cell type, you can do this by following command. This -will consider all possible ligands in the NicheNet databases for which a -receptor is expressed by the receiver cell of interest. +### Results of the sender-agnostic approach ``` r -nichenet_output = nichenet_seuratobj_aggregate(seurat_obj = seuratObj, receiver = "CD8 T", condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = "undefined", ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) -## [1] "Read in and process NicheNet's networks" -## [1] "Define expressed ligands and receptors in receiver and sender cells" -## [1] "Perform DE analysis in receiver cell" -## [1] "Perform NicheNet ligand activity analysis" -## [1] "Infer active target genes of the prioritized ligands" -## [1] "Infer receptors of the prioritized ligands" - -nichenet_output$ligand_activity_target_heatmap +# There is no log-fold change or expression plot because we did not define cell types +nichenet_output_agnostic$ligand_activity_target_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png) +![](seurat_wrapper_files/figure-gfm/summary-vis-agnostic-1.png) As you can see in this analysis result, many genes DE in CD8 T cells after LCMV infection are strongly predicted type I interferon targets. @@ -640,7 +636,7 @@ The latter could make sense, because there always is a time delay between expression of a ligand-encoding gene and the effect of the ligand on a target/receiver cell (i.e. expression of target genes). -### Run multiple NicheNet analyses on different receiver cell populations +#### Running multiple NicheNet analyses on different receiver cell populations In some cases, you might be interested in multiple target/receiver cell populations. You can decide to run this for every cell type separately, @@ -649,10 +645,20 @@ example, we could have been interested in explaining DE between steady-state and LCMV infection in both CD8 and CD4 T cells. ``` r -receiver_celltypes_oi = c("CD4 T", "CD8 T") -# receiver_celltypes_oi = seuratObj %>% Idents() %>% unique() # for all celltypes in the dataset: use only when this would make sense biologically +# To run with all celltypes in the dataset (only when this would make sense biologically!) +# receiver_celltypes_oi <- seuratObj %>% Idents() %>% unique() -nichenet_output = receiver_celltypes_oi %>% lapply(nichenet_seuratobj_aggregate, seurat_obj = seuratObj, condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) +receiver_celltypes_oi <- c("CD4 T", "CD8 T") + +nichenet_output <- receiver_celltypes_oi %>% lapply(nichenet_seuratobj_aggregate, + seurat_obj = seuratObj, + condition_colname = "aggregate", + condition_oi = "LCMV", + condition_reference = "SS", + sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis in receiver cell" @@ -668,69 +674,65 @@ nichenet_output = receiver_celltypes_oi %>% lapply(nichenet_seuratobj_aggregate, ## [1] "Infer receptors of the prioritized ligands" ## [1] "Perform DE analysis in sender cells" -names(nichenet_output) = receiver_celltypes_oi +names(nichenet_output) <- receiver_celltypes_oi ``` Check which ligands were top-ranked for both CD8T and CD4T and which ligands were more cell-type specific ``` r -common_ligands = intersect(nichenet_output$`CD4 T`$top_ligands, nichenet_output$`CD8 T`$top_ligands) -print("common ligands are: ") -## [1] "common ligands are: " +common_ligands <- intersect(nichenet_output$`CD4 T`$top_ligands, nichenet_output$`CD8 T`$top_ligands) +print("Common ligands:") +## [1] "Common ligands:" print(common_ligands) -## [1] "Ebi3" "Ptprc" "H2-M3" "H2-M2" "H2-T10" "H2-T22" "H2-T23" "Sirpa" "H2-K1" "H2-Q4" "H2-Q6" "H2-Q7" "H2-D1" "Ccl22" "Cd48" "App" "Tgfb1" "Selplg" "Icam1" "Btla" "Cd72" "B2m" "Hp" "Itgb2" +## [1] "Ebi3" "Ptprc" "H2-M3" "H2-M2" "H2-T10" "H2-T22" "H2-T23" "Sirpa" "H2-K1" "H2-Q4" "H2-Q6" "H2-Q7" "H2-D1" "Ccl22" "Cd48" "App" +## [17] "Tgfb1" "Selplg" "Icam1" "Btla" "Cd72" "B2m" "Hp" "Itgb2" -cd4_ligands = nichenet_output$`CD4 T`$top_ligands %>% setdiff(nichenet_output$`CD8 T`$top_ligands) -cd8_ligands = nichenet_output$`CD8 T`$top_ligands %>% setdiff(nichenet_output$`CD4 T`$top_ligands) +cd4_ligands <- nichenet_output$`CD4 T`$top_ligands %>% setdiff(nichenet_output$`CD8 T`$top_ligands) +cd8_ligands <- nichenet_output$`CD8 T`$top_ligands %>% setdiff(nichenet_output$`CD4 T`$top_ligands) -print("Ligands specifically regulating DE in CD4T: ") -## [1] "Ligands specifically regulating DE in CD4T: " +print("Ligands specifically regulating DE in CD4T:") +## [1] "Ligands specifically regulating DE in CD4T:" print(cd4_ligands) ## [1] "H2-Eb1" "H2-Oa" "Il16" "Fn1" "H2-DMb1" "H2-DMb2" -print("Ligands specifically regulating DE in CD8T: ") -## [1] "Ligands specifically regulating DE in CD8T: " +print("Ligands specifically regulating DE in CD8T:") +## [1] "Ligands specifically regulating DE in CD8T:" print(cd8_ligands) ## [1] "Cxcl10" "Adam17" "Cxcl11" "Tgm2" "Cxcl9" "Vcan" ``` -## NicheNet analysis on Seurat object: explain differential expression between two cell populations +## `nichenet_seuratobj_cluster_de`: explain differential expression between two cell types -Previously, we demonstrated the use of a wrapper function for applying -NicheNet to explain differential expression between two conditions in -one cell type. However, also differential expression between two cell -populations might sometimes be (partially) caused by communication with -cells in the neighborhood. For example, differentiation from a -progenitor cell to the differentiated cell might be induced by niche -cells. A concrete example is discussed in this paper: [Stellate Cells, -Hepatocytes, and Endothelial Cells Imprint the Kupffer Cell Identity on -Monocytes Colonizing the Liver Macrophage +Unlike the case above where we applied NicheNet to explain differential +expression between two conditions in one cell type, here we try to +explain differential expression between two cell populations. DE between +cell populations are sometimes (partially) caused by communication with +cells in the neighborhood, e.g., the differentiation from a progenitor +cell to a differentiated cell might be induced by niche cells. A +concrete example is discussed in the paper by Bonnardel et al. (2019): +[Stellate Cells, Hepatocytes, and Endothelial Cells Imprint the Kupffer +Cell Identity on Monocytes Colonizing the Liver Macrophage Niche](https://www.cell.com/immunity/fulltext/S1074-7613(19)30368-1). -Therefore, we will now also demonstrate the use of another Seurat -wrapper function that can be used in the case of explaining differential -expression between cell populations. But keep in mind that the -comparison that you make should be biologically relevant. It is possible -to use NicheNet to explain differential expression between any two cell -populations in your dataset, but in most cases, differential expression -between cell populations will be a result of cell-intrinsic properties +However, keep in mind that the comparison that you make should be +biologically relevant. as in most cases, differential expression between +cell populations will be a result of cell-intrinsic properties (i.e. different cell types have a different gene expression profile) and -not of intercellular communication processes. In such a case, it does +not of an intercellular communication processes. In such a case, it does not make any sense to use NicheNet. -For demonstration purposes, we will here first change the seuratObject -of the data described above, such that it can be used in this setting. +For demonstration purposes, we will change the Seurat object of the same +dataset such that it can be used in this setting. ``` r -seuratObj@meta.data$celltype = paste(seuratObj@meta.data$celltype,seuratObj@meta.data$aggregate, sep = "_") - -seuratObj@meta.data$celltype %>% table() +seuratObj <- SetIdent(seuratObj, value = paste(seuratObj$celltype, seuratObj$aggregate, sep = "_")) +Idents(seuratObj) %>% table() ## . -## B_LCMV B_SS CD4 T_LCMV CD4 T_SS CD8 T_LCMV CD8 T_SS DC_LCMV DC_SS Mono_LCMV Mono_SS NK_LCMV NK_SS Treg_LCMV Treg_SS -## 344 38 1961 601 1252 393 14 4 75 15 94 37 146 53 - -seuratObj = SetIdent(seuratObj,value = "celltype") +## CD8 T_SS CD4 T_SS Treg_SS B_SS NK_SS Mono_SS DC_SS CD8 T_LCMV CD4 T_LCMV B_LCMV Treg_LCMV NK_LCMV Mono_LCMV +## 393 601 53 38 37 15 4 1252 1961 344 146 94 75 +## DC_LCMV +## 14 ``` Now perform the NicheNet analysis to explain differential expression @@ -740,11 +742,14 @@ steady-state’ by ligands expressed by monocytes and DCs after LCMV infection. ``` r -nichenet_output = nichenet_seuratobj_cluster_de( +nichenet_output <- nichenet_seuratobj_cluster_de( seurat_obj = seuratObj, - receiver_reference = "CD8 T_SS", receiver_affected = "CD8 T_LCMV", - sender = c("DC_LCMV","Mono_LCMV"), - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks) + receiver_reference = "CD8 T_SS", + receiver_affected = "CD8 T_LCMV", + sender = c("DC_LCMV", "Mono_LCMV"), + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" ## [1] "Perform DE analysis between two receiver cell clusters" @@ -753,30 +758,33 @@ nichenet_output = nichenet_seuratobj_cluster_de( ## [1] "Infer receptors of the prioritized ligands" ``` -Check the top-ranked ligands and their target genes +Check the top-ranked ligands and their target genes: ``` r nichenet_output$ligand_activity_target_heatmap ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-32-1.png) +![](seurat_wrapper_files/figure-gfm/summary-vis-cluster-de-1.png) -Check the expression of the top-ranked ligands +Check the expression of the top-ranked ligands: ``` r -DotPlot(seuratObj, features = nichenet_output$top_ligands %>% rev(), cols = "RdYlBu") + RotatedAxis() +DotPlot(seuratObj, features = nichenet_output$top_ligands %>% rev(), cols = "RdYlBu") + + RotatedAxis() ``` -![](seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png) +![](seurat_wrapper_files/figure-gfm/dotplot-cluster-de-1.png) -It could be interested to check which top-ranked ligands are -differentially expressed in monocytes after LCMV infection +It could be interesting to check which top-ranked ligands are +differentially expressed in monocytes after LCMV infection: ``` r -Mono_upregulated_ligands = FindMarkers(seuratObj, ident.1 = "Mono_LCMV", ident.2 = "Mono_SS") %>% rownames_to_column("gene") %>% filter(avg_log2FC > 0.25 & p_val_adj <= 0.05) %>% pull(gene) %>% intersect(nichenet_output$top_ligands) +Mono_upregulated_ligands <- FindMarkers(seuratObj, ident.1 = "Mono_LCMV", ident.2 = "Mono_SS") %>% + rownames_to_column("gene") %>% filter(avg_log2FC > 0.25 & p_val_adj <= 0.05) %>% + pull(gene) %>% intersect(nichenet_output$top_ligands) -print("Monocyte ligands upregulated after LCMV infection and explaining DE between CD8T-StSt and CD8T-LCMV are: ") -## [1] "Monocyte ligands upregulated after LCMV infection and explaining DE between CD8T-StSt and CD8T-LCMV are: " +print("Monocyte ligands upregulated after LCMV infection and explaining DE between CD8T-SS and CD8T-LCMV are: ") +## [1] "Monocyte ligands upregulated after LCMV infection and explaining DE between CD8T-SS and CD8T-LCMV are: " print(Mono_upregulated_ligands) ## [1] "B2m" "H2-D1" "Cxcl10" ``` @@ -791,10 +799,66 @@ print(Mono_upregulated_ligands) it to other clusters. This is not optimal if you would have repeated samples for your conditions. In such a case, we recommend to follow the vignette [Perform NicheNet analysis starting from a Seurat - object: step-by-step - analysis](seurat_steps.md):`vignette("seurat_steps", package="nichenetr")` - and tweak the differential expression step there (and perform the - analysis e.g. as discussed in ). + object: step-by-step analysis](seurat_steps.md) and tweak the + differential expression step there (and perform the analysis e.g., + as discussed in ). + +``` r +sessionInfo() +## R version 4.3.2 (2023-10-31) +## Platform: x86_64-redhat-linux-gnu (64-bit) +## Running under: CentOS Stream 8 +## +## Matrix products: default +## BLAS/LAPACK: /usr/lib64/libopenblaso-r0.3.15.so; LAPACK version 3.9.0 +## +## locale: +## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 +## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C +## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C +## +## time zone: Asia/Bangkok +## tzcode source: system (glibc) +## +## attached base packages: +## [1] stats graphics grDevices utils datasets methods base +## +## other attached packages: +## [1] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.4 purrr_1.0.2 readr_2.1.2 tidyr_1.3.0 tibble_3.2.1 +## [8] ggplot2_3.4.4 tidyverse_1.3.1 SeuratObject_5.0.1 Seurat_4.4.0 nichenetr_2.0.4 +## +## loaded via a namespace (and not attached): +## [1] fs_1.6.3 matrixStats_1.2.0 spatstat.sparse_3.0-3 bitops_1.0-7 lubridate_1.9.3 httr_1.4.7 +## [7] RColorBrewer_1.1-3 doParallel_1.0.17 tools_4.3.2 sctransform_0.4.0 backports_1.4.1 utf8_1.2.4 +## [13] R6_2.5.1 lazyeval_0.2.2 uwot_0.1.16 GetoptLong_1.0.5 withr_2.5.2 sp_2.1-2 +## [19] gridExtra_2.3 fdrtool_1.2.17 progressr_0.14.0 cli_3.6.2 spatstat.explore_3.2-1 labeling_0.4.3 +## [25] spatstat.data_3.0-3 randomForest_4.7-1.1 proxy_0.4-27 ggridges_0.5.5 pbapply_1.7-2 foreign_0.8-85 +## [31] parallelly_1.36.0 limma_3.56.2 readxl_1.4.3 rstudioapi_0.15.0 visNetwork_2.1.2 generics_0.1.3 +## [37] shape_1.4.6 ica_1.0-3 spatstat.random_3.2-2 car_3.1-2 Matrix_1.6-4 ggbeeswarm_0.7.2 +## [43] fansi_1.0.6 S4Vectors_0.38.1 abind_1.4-5 lifecycle_1.0.4 yaml_2.3.8 carData_3.0-5 +## [49] recipes_1.0.7 Rtsne_0.17 grid_4.3.2 promises_1.2.1 crayon_1.5.2 miniUI_0.1.1.1 +## [55] lattice_0.21-9 haven_2.4.3 cowplot_1.1.2 pillar_1.9.0 knitr_1.45 ComplexHeatmap_2.16.0 +## [61] rjson_0.2.21 future.apply_1.11.0 codetools_0.2-19 leiden_0.3.9 glue_1.6.2 data.table_1.14.10 +## [67] vctrs_0.6.5 png_0.1-8 spam_2.10-0 cellranger_1.1.0 gtable_0.3.4 assertthat_0.2.1 +## [73] gower_1.0.1 xfun_0.41 mime_0.12 prodlim_2023.08.28 survival_3.5-7 timeDate_4032.109 +## [79] iterators_1.0.14 hardhat_1.3.0 lava_1.7.3 DiagrammeR_1.0.10 ellipsis_0.3.2 fitdistrplus_1.1-11 +## [85] ROCR_1.0-11 ipred_0.9-14 nlme_3.1-163 RcppAnnoy_0.0.21 irlba_2.3.5.1 vipor_0.4.5 +## [91] KernSmooth_2.23-22 rpart_4.1.21 colorspace_2.1-0 BiocGenerics_0.46.0 DBI_1.1.3 Hmisc_5.1-0 +## [97] nnet_7.3-19 ggrastr_1.0.2 tidyselect_1.2.0 compiler_4.3.2 rvest_1.0.2 htmlTable_2.4.1 +## [103] xml2_1.3.6 plotly_4.10.0 shadowtext_0.1.2 checkmate_2.3.1 scales_1.3.0 caTools_1.18.2 +## [109] lmtest_0.9-40 digest_0.6.33 goftest_1.2-3 spatstat.utils_3.0-4 rmarkdown_2.11 htmltools_0.5.7 +## [115] pkgconfig_2.0.3 base64enc_0.1-3 highr_0.10 dbplyr_2.1.1 fastmap_1.1.1 rlang_1.1.2 +## [121] GlobalOptions_0.1.2 htmlwidgets_1.6.2 shiny_1.7.1 farver_2.1.1 zoo_1.8-12 jsonlite_1.8.8 +## [127] ModelMetrics_1.2.2.2 magrittr_2.0.3 Formula_1.2-5 dotCall64_1.1-1 patchwork_1.1.3 munsell_0.5.0 +## [133] Rcpp_1.0.11 ggnewscale_0.4.9 reticulate_1.34.0 stringi_1.7.6 pROC_1.18.5 MASS_7.3-60 +## [139] plyr_1.8.9 parallel_4.3.2 listenv_0.9.0 ggrepel_0.9.4 deldir_2.0-2 splines_4.3.2 +## [145] tensor_1.5 hms_1.1.3 circlize_0.4.15 igraph_1.2.11 ggpubr_0.6.0 spatstat.geom_3.2-7 +## [151] ggsignif_0.6.4 reshape2_1.4.4 stats4_4.3.2 reprex_2.0.1 evaluate_0.23 modelr_0.1.8 +## [157] tzdb_0.4.0 foreach_1.5.2 tweenr_2.0.2 httpuv_1.6.13 RANN_2.6.1 polyclip_1.10-6 +## [163] future_1.33.0 clue_0.3-64 scattermore_1.2 ggforce_0.4.1 broom_0.7.12 xtable_1.8-4 +## [169] e1071_1.7-14 rstatix_0.7.2 later_1.3.2 viridisLite_0.4.2 class_7.3-22 beeswarm_0.4.0 +## [175] IRanges_2.34.1 cluster_2.1.4 timechange_0.2.0 globals_0.16.2 caret_6.0-94 +``` # References diff --git a/vignettes/seurat_wrapper_circos.Rmd b/vignettes/seurat_wrapper_circos.Rmd index c5cf4f3..5f090b4 100644 --- a/vignettes/seurat_wrapper_circos.Rmd +++ b/vignettes/seurat_wrapper_circos.Rmd @@ -10,7 +10,6 @@ vignette: > bibliography: library.bib --- - @@ -24,21 +23,16 @@ knitr::opts_chunk$set( ) ``` -In this vignette, you can learn how to perform a basic NicheNet analysis on a Seurat v3 object - and how to visualize the output in a circos plot. This vignette demonstrates the same workflow as shown in [Perform NicheNet analysis starting from a Seurat object](seurat_wrapper.md):`vignette("seurat_wrapper", package="nichenetr")`, but adds a circos plot visualization as shown in [Circos plot visualization to show active ligand-target links between interacting cells](circos.md):`vignette("circos", package="nichenetr")`. For more detailed information about the NicheNet workflow, check those vignettes. -This vignette was made upon popular request to demonstrate how those two vignettes can be combined into one analysis workflow. Note that we as developers of NicheNet generally recommend a visualization of the output by combining several heatmaps (ligand activity, ligand-target links, ligand-receptor links, ligand expression, ligand LFC,...) over using a circos plot visualization. Certainly for cases with many sender cell types and ligands that are expressed by more than one sender cell type. Because in those cases, the circos plot is much less informative and could lead to wrong interpretation of the results. - -As example expression data of interacting cells, we will use mouse NICHE-seq data from Medaglia et al. to explore intercellular communication in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection [See @medaglia_spatial_2017]. We will NicheNet to explore immune cell crosstalk in response to this LCMV infection. +In this vignette, you can learn how to visualize the output of a NicheNet analysis in a circos plot (also called a chord diagram) via the `circlize` package. This vignette follows the same workflow as shown in [Perform NicheNet analysis starting from a Seurat object](seurat_wrapper.md). -In this dataset, differential expression is observed between CD8 T cells in steady-state and CD8 T cells after LCMV infection. NicheNet can be applied to look at how several immune cell populations in the lymph node (i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can regulate and induce these observed gene expression changes. NicheNet will specifically prioritize ligands from these immune cells and their target genes that change in expression upon LCMV infection. +This vignette was made upon popular request to demonstrate how those two vignettes can be combined into one analysis workflow. Note that we as developers of NicheNet generally recommend a visualization of the output by combining several heatmaps (ligand activity, ligand-target links, ligand-receptor links, ligand expression, ligand LFC,...) over using a circos plot visualization. This is especially true for cases with many sender cell types and ligands that are expressed by more than one sender cell type. Because in those cases, the circos plot is much less informative and could lead to wrong interpretation of the results. -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from Zenodo. +We will again use the NICHE-seq data from Medaglia et al. (2017), which profiles several immune cell types in the T cell area in the inguinal lymph node before and 72 hours after lymphocytic choriomeningitis virus (LCMV) infection. +You can download the [NicheNet networks](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object of the processed NICHE-seq single-cell data](https://doi.org/10.5281/zenodo.3531889) from Zenodo. # Prepare NicheNet analysis -## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks. - - -### Load Packages: +### Load packages ```{r} library(nichenetr) # Please update to v2.0.4 @@ -48,68 +42,51 @@ library(tidyverse) library(circlize) ``` -If you would use and load other packages, we recommend to load these 3 packages after the others. - -### Read in NicheNet's ligand-target prior model, ligand-receptor network and weighted integrated networks: +### Read in NicheNet's networks ```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns - -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) head(lr_network) -weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) -head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network -head(weighted_networks$gr) # interactions and their weights in the gene regulatory network ``` ### Read in the expression data of interacting cells ```{r} -seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) + # For newer Seurat versions, you may need to run the following seuratObj <- UpdateSeuratObject(seuratObj) -seuratObj@meta.data %>% head() -``` - -Visualize which cell populations are present: CD4 T cells (including regulatory T cells), CD8 T cells, B cells, NK cells, dendritic cells (DCs) and inflammatory monocytes -```{r} -seuratObj@meta.data$celltype %>% table() # note that the number of cells of some cell types is very low and should preferably be higher for a real application -DimPlot(seuratObj, reduction = "tsne") +# Convert gene names +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") ``` -```{r} -seuratObj@meta.data$aggregate %>% table() -DimPlot(seuratObj, reduction = "tsne", group.by = "aggregate") -``` - # Perform the NicheNet analysis -## NicheNet analysis on Seurat object: explain differential expression between two conditions - -In this case study, the receiver cell population is the 'CD8 T' cell population, whereas the sender cell populations are 'CD4 T', 'Treg', 'Mono', 'NK', 'B' and 'DC'. The above described functions will consider a gene to be expressed when it is expressed in at least a predefined fraction of cells in one cluster (default: 10%). +For this analysis, we define the receiver cell population as the 'CD8 T' cell population, and the sender cell populations as 'CD4 T', 'Treg', 'Mono', 'NK', 'B' and 'DC'. We consider a gene to be expressed when it is expressed in at least 10% of cells in one cluster (default). The gene set of interest are the genes differentially expressed in CD8 T cells after LCMV infection. The condition of interest is thus 'LCMV', whereas the reference/steady-state condition is 'SS'. The notion of conditions can be extracted from the metadata column 'aggregate', the method to calculate the differential expression is the standard Seurat Wilcoxon test. The number of top-ranked ligands that are further used to predict active target genes and construct an active ligand-receptor network is 30 by default, but we will only choose the top 20 to not overcrowd the circos plot. -To perform the NicheNet analysis with these specifications, run the following: +**Note:** Cell types should be the identities of the seurat object (check using `table(Idents(seuratObj))`) ```{r} -# indicated cell types should be cell class identities -# check via: -# seuratObj %>% Idents() %>% table() -sender_celltypes = c("CD4 T","Treg", "Mono", "NK", "B", "DC") -nichenet_output = nichenet_seuratobj_aggregate( +sender_celltypes <- c("CD4 T","Treg", "Mono", "NK", "B", "DC") +nichenet_output <- nichenet_seuratobj_aggregate( seurat_obj = seuratObj, receiver = "CD8 T", - condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", + condition_colname = "aggregate", + condition_oi = "LCMV", condition_reference = "SS", sender = sender_celltypes, - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks, top_n_ligands = 20) ``` @@ -125,27 +102,27 @@ nichenet_output$ligand_activities These ligands are expressed by one or more of the input sender cells. To see which cell population expresses which of these top-ranked ligands, you can run the following: -```{r, fig.width=12} -DotPlot(seuratObj, features = nichenet_output$top_ligands %>% rev(), cols = "RdYlBu") + RotatedAxis() +```{r dotplot, fig.width=12} +nichenet_output$ligand_expression_dotplot ``` -As you can see, most op the top-ranked ligands seem to be mainly expressed by dendritic cells and monocytes. +As you can see, most of the top-ranked ligands seem to be mainly expressed by dendritic cells and monocytes. It could also be interesting to see whether some of these ligands are differentially expressed after LCMV infection. -```{r, fig.width=12} -DotPlot(seuratObj, features = nichenet_output$top_ligands %>% rev(), split.by = "aggregate") + RotatedAxis() +```{r lfc-heatmap, fig.width=12} +nichenet_output$ligand_differential_expression_heatmap ``` -```{r} -VlnPlot(seuratObj, features = c("Il15", "Cxcl10","Cxcl16"), split.by = "aggregate", pt.size = 0, combine = FALSE) +```{r violin-plot} +VlnPlot(seuratObj, features = c("Ptprc", "H2-M3", "Cxcl10"), split.by = "aggregate", pt.size = 0, combine = TRUE) ``` #### Inferred active ligand-target links NicheNet also infers active target genes of these top-ranked ligands. To see which top-ranked ligands are predicted to have regulated the expression of which differentially expressed genes, you can run following command for a heatmap visualization: -```{r} +```{r ligand-target-heatmap} nichenet_output$ligand_target_heatmap ``` @@ -153,283 +130,134 @@ nichenet_output$ligand_target_heatmap This visualization groups the top predicted active ligands according to the strongest expressing cell type. Therefore we need to determine per cell type which ligands they express more strongly than the other cell types. -### Calculate average ligand expression in sender cells - -```{r} -# avg_expression_ligands = AverageExpression(seuratObj %>% subset(subset = aggregate == "LCMV"),features = nichenet_output$top_ligands) # if want to look specifically in LCMV-only cells -avg_expression_ligands = AverageExpression(seuratObj, features = nichenet_output$top_ligands) -``` - ### Assign ligands to sender cells -To assign ligands to sender cell type, we can e.g. look for which sender cell types show an expression that is higher than the average + SD. +To assign ligands to sender cell type, we can look for which sender cell types show a mean expression that is higher than the mean + one standard deviation. You can change the functions to aggregate the counts (`func.agg`, default is the mean) and function to assign the ligands (`func.assign`, default is mean + SD). Ligands that are expressed higher than `func.assign` in more than one cell type and ligands that are not assigned to any cell type are assigned to "General". ```{r} -sender_ligand_assignment = avg_expression_ligands$RNA %>% apply(1, function(ligand_expression){ - ligand_expression > (ligand_expression %>% mean() + ligand_expression %>% sd()) - }) %>% t() -sender_ligand_assignment = sender_ligand_assignment %>% apply(2, function(x){x[x == TRUE]}) %>% purrr::keep(function(x){length(x) > 0}) -names(sender_ligand_assignment) +ligand_type_indication_df <- assign_ligands_to_celltype(seuratObj, + nichenet_output$top_ligands, + celltype_col = "celltype") + +ligand_type_indication_df %>% head() +ligand_type_indication_df$ligand_type %>% table() ``` -The top ligands seem to be most strongly expressed by B cells, NK cells, monocytes and DCs. We will know also look at which ligands are common across multiple cell types (= those that are specific to > 1 cell type, or those that were not assigned to a cell type in the previous block of code) +### Define the ligand-target links of interest -Determine now which prioritized ligands are expressed by CAFs and or endothelial cells +We will need the ligand-target links from the NicheNet output. To avoid making a circos plots with too many ligand-target links, we will show only links with a weight higher than a predefined cutoff: links belonging to the 40% of lowest scores were removed. Not that this cutoffs and other cutoffs used for this visualization can be changed according to the user's needs. ```{r} +head(nichenet_output$ligand_target_df) -all_assigned_ligands = sender_ligand_assignment %>% lapply(function(x){names(x)}) %>% unlist() -unique_ligands = all_assigned_ligands %>% table() %>% .[. == 1] %>% names() -general_ligands = nichenet_output$top_ligands %>% setdiff(unique_ligands) - -B_specific_ligands = sender_ligand_assignment$B %>% names() %>% setdiff(general_ligands) -NK_specific_ligands = sender_ligand_assignment$NK %>% names() %>% setdiff(general_ligands) -Mono_specific_ligands = sender_ligand_assignment$Mono %>% names() %>% setdiff(general_ligands) -DC_specific_ligands = sender_ligand_assignment$DC %>% names() %>% setdiff(general_ligands) +active_ligand_target_links_df <- nichenet_output$ligand_target_df +active_ligand_target_links_df$target_type <- "LCMV-DE" # needed for joining tables +circos_links <- get_ligand_target_links_oi(ligand_type_indication_df, + active_ligand_target_links_df, + cutoff = 0.40) -ligand_type_indication_df = tibble( - ligand_type = c(rep("B-specific", times = B_specific_ligands %>% length()), - rep("NK-specific", times = NK_specific_ligands %>% length()), - rep("Mono-specific", times = Mono_specific_ligands %>% length()), - rep("DC-specific", times = DC_specific_ligands %>% length()), - rep("General", times = general_ligands %>% length())), - ligand = c(B_specific_ligands, NK_specific_ligands, Mono_specific_ligands, DC_specific_ligands, general_ligands)) +head(circos_links) ``` -### Define the ligand-target links of interest - -To avoid making a circos plots with too many ligand-target links, we will show only links with a weight higher than a predefined cutoff: links belonging to the 40% of lowest scores were removed. Not that this cutoffs and other cutoffs used for this visualization can be changed according to the user's needs. +Prepare the circos visualization by giving each segment of ligands and targets a specific color and order, as well as gaps between different cell types. By default, cell types are ordered alphabetically, followed by "General" (then they are drawn counter-clockwise). Users can give a specific order to the cell types by providing a vector of cell types to the argument `celltype_order`. The gaps between the different segments can also be defined by providing a named list to the argument `widths`. ```{r} -active_ligand_target_links_df = nichenet_output$ligand_target_df %>% mutate(target_type = "LCMV-DE") %>% inner_join(ligand_type_indication_df) # if you want ot make circos plots for multiple gene sets, combine the different data frames and differentiate which target belongs to which gene set via the target type - -cutoff_include_all_ligands = active_ligand_target_links_df$weight %>% quantile(0.40) +ligand_colors <- c("General" = "#377EB8", "NK" = "#4DAF4A", "B" = "#984EA3", + "Mono" = "#FF7F00", "DC" = "#FFFF33", "Treg" = "#F781BF", + "CD8 T"= "#E41A1C") +target_colors <- c("LCMV-DE" = "#999999") + +vis_circos_obj <- prepare_circos_visualization(circos_links, + ligand_colors = ligand_colors, + target_colors = target_colors, + celltype_order = NULL) +``` -active_ligand_target_links_df_circos = active_ligand_target_links_df %>% filter(weight > cutoff_include_all_ligands) +Render the circos plot where all links have the same transparency. Here, only the widths of the blocks that indicate each target gene is proportional the ligand-target regulatory potential (~prior knowledge supporting the regulatory interaction). -ligands_to_remove = setdiff(active_ligand_target_links_df$ligand %>% unique(), active_ligand_target_links_df_circos$ligand %>% unique()) -targets_to_remove = setdiff(active_ligand_target_links_df$target %>% unique(), active_ligand_target_links_df_circos$target %>% unique()) - -circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove) -``` +```{r ligand-target-circos, fig.width=8, fig.height=8} -Prepare the circos visualization: give each segment of ligands and targets a specific color and order +make_circos_plot(vis_circos_obj, transparency = FALSE, args.circos.text = list(cex = 0.5)) -```{r} -grid_col_ligand =c("General" = "lawngreen", - "NK-specific" = "royalblue", - "B-specific" = "darkgreen", - "Mono-specific" = "violet", - "DC-specific" = "steelblue2") -grid_col_target =c( - "LCMV-DE" = "tomato") - -grid_col_tbl_ligand = tibble(ligand_type = grid_col_ligand %>% names(), color_ligand_type = grid_col_ligand) -grid_col_tbl_target = tibble(target_type = grid_col_target %>% names(), color_target_type = grid_col_target) - -circos_links = circos_links %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as target! -circos_links = circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_target) -links_circle = circos_links %>% select(ligand,target, weight) - -ligand_color = circos_links %>% distinct(ligand,color_ligand_type) -grid_ligand_color = ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) -target_color = circos_links %>% distinct(target,color_target_type) -grid_target_color = target_color$color_target_type %>% set_names(target_color$target) - -grid_col =c(grid_ligand_color,grid_target_color) - -# give the option that links in the circos plot will be transparant ~ ligand-target potential score -transparency = circos_links %>% mutate(weight =(weight-min(weight))/(max(weight)-min(weight))) %>% mutate(transparency = 1-weight) %>% .$transparency ``` -Prepare the circos visualization: order ligands and targets +Render the circos plot where the degree of transparency determined by the regulatory potential value of a ligand-target interaction. -```{r} -target_order = circos_links$target %>% unique() -ligand_order = c(Mono_specific_ligands, DC_specific_ligands, NK_specific_ligands,B_specific_ligands, general_ligands) %>% c(paste(.," ")) %>% intersect(circos_links$ligand) -order = c(ligand_order,target_order) +```{r ligand-target-circos-transparent, fig.width=8, fig.height=8} +make_circos_plot(vis_circos_obj, transparency = TRUE, args.circos.text = list(cex = 0.5)) ``` -Prepare the circos visualization: define the gaps between the different segments +To create a legend for the circos plot, we can use the `ComplexHeatmap::Legend` function and creating a gTree object from it with `grid::grid.grabExpr`. As the circos plot is drawn on base R graphics (i.e., it is not a ggplot object), we will get the plot using `recordPlot()`. -```{r} -width_same_cell_same_ligand_type = 0.5 -width_different_cell = 6 -width_ligand_target = 15 -width_same_cell_same_target_type = 0.5 - -gaps = c( - # width_ligand_target, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Mono-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "DC-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "NK-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "B-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General") %>% distinct(ligand) %>% nrow() -1)), - width_ligand_target, - rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_type == "LCMV-DE") %>% distinct(target) %>% nrow() -1)), - width_ligand_target - ) +```{r ligand-target-circos-unused, fig.show='hide'} +par(bg = "transparent") -``` +# Default celltype order +celltype_order <- unique(circos_links$ligand_type) %>% sort() %>% .[. != "General"] %>% c(., "General") +# Create legend +circos_legend <- ComplexHeatmap::Legend( + labels = celltype_order, + background = ligand_colors[celltype_order], + type = "point", + grid_height = unit(3, "mm"), + grid_width = unit(3, "mm"), + labels_gp = grid::gpar(fontsize = 8) + ) -Render the circos plot (all links same transparancy). Only the widths of the blocks that indicate each target gene is proportional the ligand-target regulatory potential (~prior knowledge supporting the regulatory interaction). +circos_legend_grob <- grid::grid.grabExpr(ComplexHeatmap::draw(circos_legend)) -```{r, fig.width=8, fig.height=8} -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) # -circos.clear() +make_circos_plot(vis_circos_obj, transparency = TRUE, args.circos.text = list(cex = 0.5)) +p_circos_no_legend <- recordPlot() ``` -Render the circos plot (degree of transparancy determined by the regulatory potential value of a ligand-target interaction) - -```{r, fig.width=8, fig.height=8} -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) # -circos.clear() +We can combine the circos plot and the legend using `cowplot::plot_grid`. +```{r ligand-target-circos-with-legend, fig.width=8, fig.height=8} +cowplot::plot_grid(p_circos_no_legend, circos_legend_grob, rel_widths = c(1, 0.1)) ``` -Save circos plot to an svg file -```{r} +We can save this plot to an svg file. + +```{r eval=FALSE} svg("ligand_target_circos.svg", width = 10, height = 10) -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) # -circos.clear() +cowplot::plot_grid(p_circos_no_legend, circos_legend_grob, rel_widths = c(1, 0.1)) dev.off() ``` ### Visualize ligand-receptor interactions of the prioritized ligands in a circos plot -```{r} -lr_network_top_df = nichenet_output$ligand_receptor_df %>% mutate(receptor_type = "LCMV_CD8T_receptor") %>% inner_join(ligand_type_indication_df) -``` +To create a ligand-receptor chord diagram, we can perform similar steps as above using the weighted ligand-receptor dataframe instead. However, as as `prepare_circos_visualization` accesses "target" and "target_type" columns, it is necessary to rename the columns accordingly even though the dataframe contains receptor and not target gene information. ```{r} -grid_col_ligand =c("General" = "lawngreen", - "NK-specific" = "royalblue", - "B-specific" = "darkgreen", - "Mono-specific" = "violet", - "DC-specific" = "steelblue2") -grid_col_receptor =c( - "LCMV_CD8T_receptor" = "darkred") - -grid_col_tbl_ligand = tibble(ligand_type = grid_col_ligand %>% names(), color_ligand_type = grid_col_ligand) -grid_col_tbl_receptor = tibble(receptor_type = grid_col_receptor %>% names(), color_receptor_type = grid_col_receptor) - -circos_links = lr_network_top_df %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as receptor! -circos_links = circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_receptor) -links_circle = circos_links %>% select(ligand,receptor, weight) - -ligand_color = circos_links %>% distinct(ligand,color_ligand_type) -grid_ligand_color = ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) -receptor_color = circos_links %>% distinct(receptor,color_receptor_type) -grid_receptor_color = receptor_color$color_receptor_type %>% set_names(receptor_color$receptor) - -grid_col =c(grid_ligand_color,grid_receptor_color) - -# give the option that links in the circos plot will be transparant ~ ligand-receptor potential score -transparency = circos_links %>% mutate(weight =(weight-min(weight))/(max(weight)-min(weight))) %>% mutate(transparency = 1-weight) %>% .$transparency -``` +lr_network_top_df <- nichenet_output$ligand_receptor_df %>% + mutate(target_type = "LCMV_CD8T_receptor") %>% + rename(target=receptor) %>% + inner_join(ligand_type_indication_df) -Prepare the circos visualization: order ligands and receptors +receptor_colors <- c("LCMV_CD8T_receptor" = "#E41A1C") -```{r} -receptor_order = circos_links$receptor %>% unique() -ligand_order = c(Mono_specific_ligands, DC_specific_ligands, NK_specific_ligands,B_specific_ligands, general_ligands) %>% c(paste(.," ")) %>% intersect(circos_links$ligand) -order = c(ligand_order,receptor_order) +vis_circos_receptor_obj <- prepare_circos_visualization(lr_network_top_df, + ligand_colors = ligand_colors, + target_colors = receptor_colors) ``` -Prepare the circos visualization: define the gaps between the different segments +When drawing the plot, the argument `link.visible` = TRUE is also necessary for making all links visible, since no cutoff is used to filter out ligand-receptor interactions. -```{r} -width_same_cell_same_ligand_type = 0.5 -width_different_cell = 6 -width_ligand_receptor = 15 -width_same_cell_same_receptor_type = 0.5 - -gaps = c( - # width_ligand_target, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Mono-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "DC-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "NK-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "B-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General") %>% distinct(ligand) %>% nrow() -1)), - width_ligand_receptor, - rep(width_same_cell_same_receptor_type, times = (circos_links %>% filter(receptor_type == "LCMV_CD8T_receptor") %>% distinct(receptor) %>% nrow() -1)), - width_ligand_receptor - ) +```{r ligand-receptor-circos} +make_circos_plot(vis_circos_receptor_obj, transparency = FALSE, + link.visible = TRUE, args.circos.text = list(cex = 0.8)) ``` -Render the circos plot (all links same transparancy). Only the widths of the blocks that indicate each receptor is proportional the ligand-receptor interaction weight (~prior knowledge supporting the interaction). - -```{r, fig.width=8, fig.height=8} -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) -}, bg.border = NA) # -circos.clear() -``` +Just as above, if `transparency = TRUE`, the degree of transparency is determined by the prior interaction weight of the ligand-receptor interaction. -Render the circos plot (degree of transparancy determined by the prior interaction weight of the ligand-receptor interaction - just as the widths of the blocks indicating each receptor) - -```{r, fig.width=8, fig.height=8} -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) -}, bg.border = NA) # -circos.clear() -``` +### FAQ: How to draw a double circos plot of ligand-receptor-target links? + +Please check the [HNSCC case study + double circos visualization](circos_plot.md) for the demonstration. -Save circos plot to an svg file ```{r} -svg("ligand_receptor_circos.svg", width = 15, height = 15) -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) -}, bg.border = NA) # -circos.clear() -dev.off() +sessionInfo() ``` -### References diff --git a/vignettes/seurat_wrapper_circos.md b/vignettes/seurat_wrapper_circos.md index 36351ac..d64da05 100644 --- a/vignettes/seurat_wrapper_circos.md +++ b/vignettes/seurat_wrapper_circos.md @@ -7,50 +7,33 @@ Robin Browaeys rmarkdown::render("vignettes/seurat_wrapper_circos.Rmd", output_format = "github_document") --> -In this vignette, you can learn how to perform a basic NicheNet analysis -on a Seurat v3 object - and how to visualize the output in a circos -plot. This vignette demonstrates the same workflow as shown in [Perform -NicheNet analysis starting from a Seurat -object](seurat_wrapper.md):`vignette("seurat_wrapper", package="nichenetr")`, -but adds a circos plot visualization as shown in [Circos plot -visualization to show active ligand-target links between interacting -cells](circos.md):`vignette("circos", package="nichenetr")`. For more -detailed information about the NicheNet workflow, check those vignettes. +In this vignette, you can learn how to visualize the output of a +NicheNet analysis in a circos plot (also called a chord diagram) via the +`circlize` package. This vignette follows the same workflow as shown in +[Perform NicheNet analysis starting from a Seurat +object](seurat_wrapper.md). + This vignette was made upon popular request to demonstrate how those two vignettes can be combined into one analysis workflow. Note that we as developers of NicheNet generally recommend a visualization of the output by combining several heatmaps (ligand activity, ligand-target links, ligand-receptor links, ligand expression, ligand LFC,…) over using a -circos plot visualization. Certainly for cases with many sender cell -types and ligands that are expressed by more than one sender cell type. -Because in those cases, the circos plot is much less informative and -could lead to wrong interpretation of the results. - -As example expression data of interacting cells, we will use mouse -NICHE-seq data from Medaglia et al. to explore intercellular -communication in the T cell area in the inguinal lymph node before and -72 hours after lymphocytic choriomeningitis virus (LCMV) infection (See -Medaglia et al. 2017). We will NicheNet to explore immune cell crosstalk -in response to this LCMV infection. - -In this dataset, differential expression is observed between CD8 T cells -in steady-state and CD8 T cells after LCMV infection. NicheNet can be -applied to look at how several immune cell populations in the lymph node -(i.e., monocytes, dendritic cells, NK cells, B cells, CD4 T cells) can -regulate and induce these observed gene expression changes. NicheNet -will specifically prioritize ligands from these immune cells and their -target genes that change in expression upon LCMV infection. - -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) -and the [Seurat object of the processed NICHE-seq single-cell -data](https://doi.org/10.5281/zenodo.3531889) can be downloaded from -Zenodo. +circos plot visualization. This is especially true for cases with many +sender cell types and ligands that are expressed by more than one sender +cell type. Because in those cases, the circos plot is much less +informative and could lead to wrong interpretation of the results. + +We will again use the NICHE-seq data from Medaglia et al. (2017), which +profiles several immune cell types in the T cell area in the inguinal +lymph node before and 72 hours after lymphocytic choriomeningitis virus +(LCMV) infection. You can download the [NicheNet +networks](https://doi.org/10.5281/zenodo.7074290) and the [Seurat object +of the processed NICHE-seq single-cell +data](https://doi.org/10.5281/zenodo.3531889) from Zenodo. # Prepare NicheNet analysis -## Load required packages, read in the Seurat object with processed expression data of interacting cells and NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks. - -### Load Packages: +### Load packages ``` r library(nichenetr) # Please update to v2.0.4 @@ -60,22 +43,12 @@ library(tidyverse) library(circlize) ``` -If you would use and load other packages, we recommend to load these 3 -packages after the others. - -### Read in NicheNet’s ligand-target prior model, ligand-receptor network and weighted integrated networks: +### Read in NicheNet’s networks ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## 2300002M23Rik 2610528A11Rik 9530003J23Rik a A2m -## 0610005C13Rik 0.000000e+00 0.000000e+00 1.311297e-05 0.000000e+00 1.390053e-05 -## 0610009B22Rik 0.000000e+00 0.000000e+00 1.269301e-05 0.000000e+00 1.345536e-05 -## 0610009L18Rik 8.872902e-05 4.977197e-05 2.581909e-04 7.570125e-05 9.802264e-05 -## 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 2.585820e-03 -## 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 2.632082e-03 - -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) head(lr_network) ## # A tibble: 6 × 4 ## from to database source @@ -86,80 +59,26 @@ head(lr_network) ## 4 a Atrn omnipath omnipath ## 5 a F11r omnipath omnipath ## 6 a Mc1r omnipath omnipath - -weighted_networks = readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) -head(weighted_networks$lr_sig) # interactions and their weights in the ligand-receptor + signaling network -## # A tibble: 6 × 3 -## from to weight -## -## 1 0610010F05Rik App 0.110 -## 2 0610010F05Rik Cat 0.0673 -## 3 0610010F05Rik H1f2 0.0660 -## 4 0610010F05Rik Lrrc49 0.0829 -## 5 0610010F05Rik Nicn1 0.0864 -## 6 0610010F05Rik Srpk1 0.123 -head(weighted_networks$gr) # interactions and their weights in the gene regulatory network -## # A tibble: 6 × 3 -## from to weight -## -## 1 0610010K14Rik 0610010K14Rik 0.121 -## 2 0610010K14Rik 2510039O18Rik 0.121 -## 3 0610010K14Rik 2610021A01Rik 0.0256 -## 4 0610010K14Rik 9130401M01Rik 0.0263 -## 5 0610010K14Rik Alg1 0.127 -## 6 0610010K14Rik Alox12 0.128 ``` ### Read in the expression data of interacting cells ``` r -seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) + # For newer Seurat versions, you may need to run the following seuratObj <- UpdateSeuratObject(seuratObj) -seuratObj@meta.data %>% head() -## nGene nUMI orig.ident aggregate res.0.6 celltype nCount_RNA nFeature_RNA -## W380370 880 1611 LN_SS SS 1 CD8 T 1607 876 -## W380372 541 891 LN_SS SS 0 CD4 T 885 536 -## W380374 742 1229 LN_SS SS 0 CD4 T 1223 737 -## W380378 847 1546 LN_SS SS 1 CD8 T 1537 838 -## W380379 839 1606 LN_SS SS 0 CD4 T 1603 836 -## W380381 517 844 LN_SS SS 0 CD4 T 840 513 +# Convert gene names +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") ``` -Visualize which cell populations are present: CD4 T cells (including -regulatory T cells), CD8 T cells, B cells, NK cells, dendritic cells -(DCs) and inflammatory monocytes - -``` r -seuratObj@meta.data$celltype %>% table() # note that the number of cells of some cell types is very low and should preferably be higher for a real application -## . -## B CD4 T CD8 T DC Mono NK Treg -## 382 2562 1645 18 90 131 199 -DimPlot(seuratObj, reduction = "tsne") -``` - -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-4-1.png) - -``` r -seuratObj@meta.data$aggregate %>% table() -## . -## LCMV SS -## 3886 1141 -DimPlot(seuratObj, reduction = "tsne", group.by = "aggregate") -``` - -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-5-1.png) - # Perform the NicheNet analysis -## NicheNet analysis on Seurat object: explain differential expression between two conditions - -In this case study, the receiver cell population is the ‘CD8 T’ cell -population, whereas the sender cell populations are ‘CD4 T’, ‘Treg’, -‘Mono’, ‘NK’, ‘B’ and ‘DC’. The above described functions will consider -a gene to be expressed when it is expressed in at least a predefined -fraction of cells in one cluster (default: 10%). +For this analysis, we define the receiver cell population as the ‘CD8 T’ +cell population, and the sender cell populations as ‘CD4 T’, ‘Treg’, +‘Mono’, ‘NK’, ‘B’ and ‘DC’. We consider a gene to be expressed when it +is expressed in at least 10% of cells in one cluster (default). The gene set of interest are the genes differentially expressed in CD8 T cells after LCMV infection. The condition of interest is thus ‘LCMV’, @@ -173,20 +92,20 @@ target genes and construct an active ligand-receptor network is 30 by default, but we will only choose the top 20 to not overcrowd the circos plot. -To perform the NicheNet analysis with these specifications, run the -following: +**Note:** Cell types should be the identities of the seurat object +(check using `table(Idents(seuratObj))`) ``` r -# indicated cell types should be cell class identities -# check via: -# seuratObj %>% Idents() %>% table() -sender_celltypes = c("CD4 T","Treg", "Mono", "NK", "B", "DC") -nichenet_output = nichenet_seuratobj_aggregate( +sender_celltypes <- c("CD4 T","Treg", "Mono", "NK", "B", "DC") +nichenet_output <- nichenet_seuratobj_aggregate( seurat_obj = seuratObj, receiver = "CD8 T", - condition_colname = "aggregate", condition_oi = "LCMV", condition_reference = "SS", + condition_colname = "aggregate", + condition_oi = "LCMV", condition_reference = "SS", sender = sender_celltypes, - ligand_target_matrix = ligand_target_matrix, lr_network = lr_network, weighted_networks = weighted_networks, + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks, top_n_ligands = 20) ## [1] "Read in and process NicheNet's networks" ## [1] "Define expressed ligands and receptors in receiver and sender cells" @@ -207,20 +126,20 @@ command: ``` r nichenet_output$ligand_activities -## # A tibble: 70 × 6 +## # A tibble: 73 × 6 ## test_ligand auroc aupr aupr_corrected pearson rank ## -## 1 Ebi3 0.658 0.381 0.235 0.293 1 -## 2 Ptprc 0.642 0.305 0.159 0.161 2 -## 3 H2-M3 0.610 0.287 0.142 0.181 3 -## 4 H2-M2 0.614 0.272 0.126 0.147 5 -## 5 H2-T10 0.614 0.272 0.126 0.147 5 -## 6 H2-T22 0.614 0.272 0.126 0.147 5 -## 7 H2-T23 0.614 0.271 0.126 0.147 7 -## 8 H2-K1 0.607 0.258 0.113 0.132 8 -## 9 H2-Q4 0.606 0.258 0.112 0.131 10 -## 10 H2-Q6 0.606 0.258 0.112 0.131 10 -## # ℹ 60 more rows +## 1 Ebi3 0.663 0.390 0.244 0.301 1 +## 2 Ptprc 0.642 0.310 0.165 0.167 2 +## 3 H2-M3 0.608 0.292 0.146 0.179 3 +## 4 H2-M2 0.611 0.279 0.133 0.153 5 +## 5 H2-T10 0.611 0.279 0.133 0.153 5 +## 6 H2-T22 0.611 0.279 0.133 0.153 5 +## 7 H2-T23 0.611 0.278 0.132 0.153 7 +## 8 H2-K1 0.605 0.268 0.122 0.142 8 +## 9 H2-Q4 0.605 0.268 0.122 0.141 10 +## 10 H2-Q6 0.605 0.268 0.122 0.141 10 +## # ℹ 63 more rows ``` These ligands are expressed by one or more of the input sender cells. To @@ -228,39 +147,28 @@ see which cell population expresses which of these top-ranked ligands, you can run the following: ``` r -DotPlot(seuratObj, features = nichenet_output$top_ligands %>% rev(), cols = "RdYlBu") + RotatedAxis() +nichenet_output$ligand_expression_dotplot ``` -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-8-1.png) +![](seurat_wrapper_circos_files/figure-gfm/dotplot-1.png) -As you can see, most op the top-ranked ligands seem to be mainly +As you can see, most of the top-ranked ligands seem to be mainly expressed by dendritic cells and monocytes. It could also be interesting to see whether some of these ligands are differentially expressed after LCMV infection. ``` r -DotPlot(seuratObj, features = nichenet_output$top_ligands %>% rev(), split.by = "aggregate") + RotatedAxis() +nichenet_output$ligand_differential_expression_heatmap ``` -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-9-1.png) +![](seurat_wrapper_circos_files/figure-gfm/lfc-heatmap-1.png) ``` r -VlnPlot(seuratObj, features = c("Il15", "Cxcl10","Cxcl16"), split.by = "aggregate", pt.size = 0, combine = FALSE) -## [[1]] +VlnPlot(seuratObj, features = c("Ptprc", "H2-M3", "Cxcl10"), split.by = "aggregate", pt.size = 0, combine = TRUE) ``` -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-1.png) - - ## - ## [[2]] - -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-2.png) - - ## - ## [[3]] - -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-3.png) +![](seurat_wrapper_circos_files/figure-gfm/violin-plot-1.png) #### Inferred active ligand-target links @@ -273,7 +181,7 @@ following command for a heatmap visualization: nichenet_output$ligand_target_heatmap ``` -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-11-1.png) +![](seurat_wrapper_circos_files/figure-gfm/ligand-target-heatmap-1.png) ## Circos plots to visualize ligand-target and ligand-receptor interactions @@ -282,341 +190,253 @@ the strongest expressing cell type. Therefore we need to determine per cell type which ligands they express more strongly than the other cell types. -### Calculate average ligand expression in sender cells - -``` r -# avg_expression_ligands = AverageExpression(seuratObj %>% subset(subset = aggregate == "LCMV"),features = nichenet_output$top_ligands) # if want to look specifically in LCMV-only cells -avg_expression_ligands = AverageExpression(seuratObj, features = nichenet_output$top_ligands) -``` - ### Assign ligands to sender cells -To assign ligands to sender cell type, we can e.g. look for which sender -cell types show an expression that is higher than the average + SD. - -``` r -sender_ligand_assignment = avg_expression_ligands$RNA %>% apply(1, function(ligand_expression){ - ligand_expression > (ligand_expression %>% mean() + ligand_expression %>% sd()) - }) %>% t() -sender_ligand_assignment = sender_ligand_assignment %>% apply(2, function(x){x[x == TRUE]}) %>% purrr::keep(function(x){length(x) > 0}) -names(sender_ligand_assignment) -## [1] "B" "NK" "Mono" "DC" -``` - -The top ligands seem to be most strongly expressed by B cells, NK cells, -monocytes and DCs. We will know also look at which ligands are common -across multiple cell types (= those that are specific to \> 1 cell type, -or those that were not assigned to a cell type in the previous block of -code) - -Determine now which prioritized ligands are expressed by CAFs and or -endothelial cells - -``` r - -all_assigned_ligands = sender_ligand_assignment %>% lapply(function(x){names(x)}) %>% unlist() -unique_ligands = all_assigned_ligands %>% table() %>% .[. == 1] %>% names() -general_ligands = nichenet_output$top_ligands %>% setdiff(unique_ligands) - -B_specific_ligands = sender_ligand_assignment$B %>% names() %>% setdiff(general_ligands) -NK_specific_ligands = sender_ligand_assignment$NK %>% names() %>% setdiff(general_ligands) -Mono_specific_ligands = sender_ligand_assignment$Mono %>% names() %>% setdiff(general_ligands) -DC_specific_ligands = sender_ligand_assignment$DC %>% names() %>% setdiff(general_ligands) - -ligand_type_indication_df = tibble( - ligand_type = c(rep("B-specific", times = B_specific_ligands %>% length()), - rep("NK-specific", times = NK_specific_ligands %>% length()), - rep("Mono-specific", times = Mono_specific_ligands %>% length()), - rep("DC-specific", times = DC_specific_ligands %>% length()), - rep("General", times = general_ligands %>% length())), - ligand = c(B_specific_ligands, NK_specific_ligands, Mono_specific_ligands, DC_specific_ligands, general_ligands)) +To assign ligands to sender cell type, we can look for which sender cell +types show a mean expression that is higher than the mean + one standard +deviation. You can change the functions to aggregate the counts +(`func.agg`, default is the mean) and function to assign the ligands +(`func.assign`, default is mean + SD). Ligands that are expressed higher +than `func.assign` in more than one cell type and ligands that are not +assigned to any cell type are assigned to “General”. + +``` r +ligand_type_indication_df <- assign_ligands_to_celltype(seuratObj, + nichenet_output$top_ligands, + celltype_col = "celltype") + +ligand_type_indication_df %>% head() +## ligand_type ligand +## 1 B H2-M3 +## 2 B Btla +## 3 NK Ptprc +## 4 NK H2-Q7 +## 5 NK Cd48 +## 6 Mono Ebi3 +ligand_type_indication_df$ligand_type %>% table() +## . +## B DC General Mono NK +## 2 8 1 6 3 ``` ### Define the ligand-target links of interest -To avoid making a circos plots with too many ligand-target links, we -will show only links with a weight higher than a predefined cutoff: -links belonging to the 40% of lowest scores were removed. Not that this +We will need the ligand-target links from the NicheNet output. To avoid +making a circos plots with too many ligand-target links, we will show +only links with a weight higher than a predefined cutoff: links +belonging to the 40% of lowest scores were removed. Not that this cutoffs and other cutoffs used for this visualization can be changed according to the user’s needs. ``` r -active_ligand_target_links_df = nichenet_output$ligand_target_df %>% mutate(target_type = "LCMV-DE") %>% inner_join(ligand_type_indication_df) # if you want ot make circos plots for multiple gene sets, combine the different data frames and differentiate which target belongs to which gene set via the target type - -cutoff_include_all_ligands = active_ligand_target_links_df$weight %>% quantile(0.40) - -active_ligand_target_links_df_circos = active_ligand_target_links_df %>% filter(weight > cutoff_include_all_ligands) - -ligands_to_remove = setdiff(active_ligand_target_links_df$ligand %>% unique(), active_ligand_target_links_df_circos$ligand %>% unique()) -targets_to_remove = setdiff(active_ligand_target_links_df$target %>% unique(), active_ligand_target_links_df_circos$target %>% unique()) - -circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove) -``` - -Prepare the circos visualization: give each segment of ligands and -targets a specific color and order - -``` r -grid_col_ligand =c("General" = "lawngreen", - "NK-specific" = "royalblue", - "B-specific" = "darkgreen", - "Mono-specific" = "violet", - "DC-specific" = "steelblue2") -grid_col_target =c( - "LCMV-DE" = "tomato") - -grid_col_tbl_ligand = tibble(ligand_type = grid_col_ligand %>% names(), color_ligand_type = grid_col_ligand) -grid_col_tbl_target = tibble(target_type = grid_col_target %>% names(), color_target_type = grid_col_target) - -circos_links = circos_links %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as target! -circos_links = circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_target) -links_circle = circos_links %>% select(ligand,target, weight) - -ligand_color = circos_links %>% distinct(ligand,color_ligand_type) -grid_ligand_color = ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) -target_color = circos_links %>% distinct(target,color_target_type) -grid_target_color = target_color$color_target_type %>% set_names(target_color$target) - -grid_col =c(grid_ligand_color,grid_target_color) - -# give the option that links in the circos plot will be transparant ~ ligand-target potential score -transparency = circos_links %>% mutate(weight =(weight-min(weight))/(max(weight)-min(weight))) %>% mutate(transparency = 1-weight) %>% .$transparency -``` +head(nichenet_output$ligand_target_df) +## # A tibble: 6 × 3 +## ligand target weight +## +## 1 Ebi3 Bst2 0.0500 +## 2 Ebi3 Cd274 0.0504 +## 3 Ebi3 Cxcl10 0.0570 +## 4 Ebi3 Cxcr4 0.0430 +## 5 Ebi3 Ddit4 0.0485 +## 6 Ebi3 Ddx58 0.0402 -Prepare the circos visualization: order ligands and targets +active_ligand_target_links_df <- nichenet_output$ligand_target_df +active_ligand_target_links_df$target_type <- "LCMV-DE" # needed for joining tables +circos_links <- get_ligand_target_links_oi(ligand_type_indication_df, + active_ligand_target_links_df, + cutoff = 0.40) -``` r -target_order = circos_links$target %>% unique() -ligand_order = c(Mono_specific_ligands, DC_specific_ligands, NK_specific_ligands,B_specific_ligands, general_ligands) %>% c(paste(.," ")) %>% intersect(circos_links$ligand) -order = c(ligand_order,target_order) +head(circos_links) +## # A tibble: 6 × 5 +## ligand target weight target_type ligand_type +## +## 1 Ebi3 Bst2 0.0500 LCMV-DE Mono +## 2 Ebi3 Cd274 0.0504 LCMV-DE Mono +## 3 Ebi3 Cxcl10 0.0570 LCMV-DE Mono +## 4 Ebi3 Cxcr4 0.0430 LCMV-DE Mono +## 5 Ebi3 Ddit4 0.0485 LCMV-DE Mono +## 6 Ebi3 Ddx58 0.0402 LCMV-DE Mono ``` -Prepare the circos visualization: define the gaps between the different -segments +Prepare the circos visualization by giving each segment of ligands and +targets a specific color and order, as well as gaps between different +cell types. By default, cell types are ordered alphabetically, followed +by “General” (then they are drawn counter-clockwise). Users can give a +specific order to the cell types by providing a vector of cell types to +the argument `celltype_order`. The gaps between the different segments +can also be defined by providing a named list to the argument `widths`. ``` r -width_same_cell_same_ligand_type = 0.5 -width_different_cell = 6 -width_ligand_target = 15 -width_same_cell_same_target_type = 0.5 - -gaps = c( - # width_ligand_target, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Mono-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "DC-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "NK-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "B-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General") %>% distinct(ligand) %>% nrow() -1)), - width_ligand_target, - rep(width_same_cell_same_target_type, times = (circos_links %>% filter(target_type == "LCMV-DE") %>% distinct(target) %>% nrow() -1)), - width_ligand_target - ) -``` +ligand_colors <- c("General" = "#377EB8", "NK" = "#4DAF4A", "B" = "#984EA3", + "Mono" = "#FF7F00", "DC" = "#FFFF33", "Treg" = "#F781BF", + "CD8 T"= "#E41A1C") +target_colors <- c("LCMV-DE" = "#999999") -Render the circos plot (all links same transparancy). Only the widths of -the blocks that indicate each target gene is proportional the -ligand-target regulatory potential (~prior knowledge supporting the -regulatory interaction). - -``` r -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) # +vis_circos_obj <- prepare_circos_visualization(circos_links, + ligand_colors = ligand_colors, + target_colors = target_colors, + celltype_order = NULL) ``` -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-19-1.png) +Render the circos plot where all links have the same transparency. Here, +only the widths of the blocks that indicate each target gene is +proportional the ligand-target regulatory potential (~prior knowledge +supporting the regulatory interaction). ``` r -circos.clear() -``` -Render the circos plot (degree of transparancy determined by the -regulatory potential value of a ligand-target interaction) - -``` r -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) # +make_circos_plot(vis_circos_obj, transparency = FALSE, args.circos.text = list(cex = 0.5)) ``` -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-20-1.png) +![](seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-1.png) -``` r -circos.clear() -``` - -Save circos plot to an svg file +Render the circos plot where the degree of transparency determined by +the regulatory potential value of a ligand-target interaction. ``` r -svg("ligand_target_circos.svg", width = 10, height = 10) -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1) -}, bg.border = NA) # -circos.clear() -dev.off() -## png -## 2 +make_circos_plot(vis_circos_obj, transparency = TRUE, args.circos.text = list(cex = 0.5)) ``` -### Visualize ligand-receptor interactions of the prioritized ligands in a circos plot +![](seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-transparent-1.png) -``` r -lr_network_top_df = nichenet_output$ligand_receptor_df %>% mutate(receptor_type = "LCMV_CD8T_receptor") %>% inner_join(ligand_type_indication_df) -``` +To create a legend for the circos plot, we can use the +`ComplexHeatmap::Legend` function and creating a gTree object from it +with `grid::grid.grabExpr`. As the circos plot is drawn on base R +graphics (i.e., it is not a ggplot object), we will get the plot using +`recordPlot()`. ``` r -grid_col_ligand =c("General" = "lawngreen", - "NK-specific" = "royalblue", - "B-specific" = "darkgreen", - "Mono-specific" = "violet", - "DC-specific" = "steelblue2") -grid_col_receptor =c( - "LCMV_CD8T_receptor" = "darkred") - -grid_col_tbl_ligand = tibble(ligand_type = grid_col_ligand %>% names(), color_ligand_type = grid_col_ligand) -grid_col_tbl_receptor = tibble(receptor_type = grid_col_receptor %>% names(), color_receptor_type = grid_col_receptor) - -circos_links = lr_network_top_df %>% mutate(ligand = paste(ligand," ")) # extra space: make a difference between a gene as ligand and a gene as receptor! -circos_links = circos_links %>% inner_join(grid_col_tbl_ligand) %>% inner_join(grid_col_tbl_receptor) -links_circle = circos_links %>% select(ligand,receptor, weight) - -ligand_color = circos_links %>% distinct(ligand,color_ligand_type) -grid_ligand_color = ligand_color$color_ligand_type %>% set_names(ligand_color$ligand) -receptor_color = circos_links %>% distinct(receptor,color_receptor_type) -grid_receptor_color = receptor_color$color_receptor_type %>% set_names(receptor_color$receptor) - -grid_col =c(grid_ligand_color,grid_receptor_color) - -# give the option that links in the circos plot will be transparant ~ ligand-receptor potential score -transparency = circos_links %>% mutate(weight =(weight-min(weight))/(max(weight)-min(weight))) %>% mutate(transparency = 1-weight) %>% .$transparency -``` +par(bg = "transparent") -Prepare the circos visualization: order ligands and receptors +# Default celltype order +celltype_order <- unique(circos_links$ligand_type) %>% sort() %>% .[. != "General"] %>% c(., "General") -``` r -receptor_order = circos_links$receptor %>% unique() -ligand_order = c(Mono_specific_ligands, DC_specific_ligands, NK_specific_ligands,B_specific_ligands, general_ligands) %>% c(paste(.," ")) %>% intersect(circos_links$ligand) -order = c(ligand_order,receptor_order) -``` - -Prepare the circos visualization: define the gaps between the different -segments - -``` r -width_same_cell_same_ligand_type = 0.5 -width_different_cell = 6 -width_ligand_receptor = 15 -width_same_cell_same_receptor_type = 0.5 - -gaps = c( - # width_ligand_target, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "Mono-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "DC-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "NK-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "B-specific") %>% distinct(ligand) %>% nrow() -1)), - width_different_cell, - rep(width_same_cell_same_ligand_type, times = (circos_links %>% filter(ligand_type == "General") %>% distinct(ligand) %>% nrow() -1)), - width_ligand_receptor, - rep(width_same_cell_same_receptor_type, times = (circos_links %>% filter(receptor_type == "LCMV_CD8T_receptor") %>% distinct(receptor) %>% nrow() -1)), - width_ligand_receptor +# Create legend +circos_legend <- ComplexHeatmap::Legend( + labels = celltype_order, + background = ligand_colors[celltype_order], + type = "point", + grid_height = unit(3, "mm"), + grid_width = unit(3, "mm"), + labels_gp = grid::gpar(fontsize = 8) ) -``` -Render the circos plot (all links same transparancy). Only the widths of -the blocks that indicate each receptor is proportional the -ligand-receptor interaction weight (~prior knowledge supporting the -interaction). +circos_legend_grob <- grid::grid.grabExpr(ComplexHeatmap::draw(circos_legend)) -``` r -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = 0, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) -}, bg.border = NA) # -``` - -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-26-1.png) - -``` r -circos.clear() +make_circos_plot(vis_circos_obj, transparency = TRUE, args.circos.text = list(cex = 0.5)) +p_circos_no_legend <- recordPlot() ``` -Render the circos plot (degree of transparancy determined by the prior -interaction weight of the ligand-receptor interaction - just as the -widths of the blocks indicating each receptor) +We can combine the circos plot and the legend using +`cowplot::plot_grid`. ``` r -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) -}, bg.border = NA) # +cowplot::plot_grid(p_circos_no_legend, circos_legend_grob, rel_widths = c(1, 0.1)) ``` -![](seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-27-1.png) +![](seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-with-legend-1.png) -``` r -circos.clear() -``` - -Save circos plot to an svg file +We can save this plot to an svg file. ``` r -svg("ligand_receptor_circos.svg", width = 15, height = 15) -circos.par(gap.degree = gaps) -chordDiagram(links_circle, directional = 1,order=order,link.sort = TRUE, link.decreasing = FALSE, grid.col = grid_col,transparency = transparency, diffHeight = 0.005, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow", link.visible = links_circle$weight >= cutoff_include_all_ligands,annotationTrack = "grid", - preAllocateTracks = list(track.height = 0.075)) -# we go back to the first track and customize sector labels -circos.track(track.index = 1, panel.fun = function(x, y) { - circos.text(CELL_META$xcenter, CELL_META$ylim[1], CELL_META$sector.index, - facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 0.8) -}, bg.border = NA) # -circos.clear() +svg("ligand_target_circos.svg", width = 10, height = 10) +cowplot::plot_grid(p_circos_no_legend, circos_legend_grob, rel_widths = c(1, 0.1)) dev.off() -## png -## 2 ``` -### References - -
- -
- -Medaglia, Chiara, Amir Giladi, Liat Stoler-Barak, Marco De Giovanni, -Tomer Meir Salame, Adi Biram, Eyal David, et al. 2017. “Spatial -Reconstruction of Immune Niches by Combining Photoactivatable Reporters -and scRNA-Seq.” *Science*, December, -eaao4277. . - -
+### Visualize ligand-receptor interactions of the prioritized ligands in a circos plot -
+To create a ligand-receptor chord diagram, we can perform similar steps +as above using the weighted ligand-receptor dataframe instead. However, +as as `prepare_circos_visualization` accesses “target” and “target_type” +columns, it is necessary to rename the columns accordingly even though +the dataframe contains receptor and not target gene information. + +``` r +lr_network_top_df <- nichenet_output$ligand_receptor_df %>% + mutate(target_type = "LCMV_CD8T_receptor") %>% + rename(target=receptor) %>% + inner_join(ligand_type_indication_df) + +receptor_colors <- c("LCMV_CD8T_receptor" = "#E41A1C") + +vis_circos_receptor_obj <- prepare_circos_visualization(lr_network_top_df, + ligand_colors = ligand_colors, + target_colors = receptor_colors) +``` + +When drawing the plot, the argument `link.visible` = TRUE is also +necessary for making all links visible, since no cutoff is used to +filter out ligand-receptor interactions. + +``` r +make_circos_plot(vis_circos_receptor_obj, transparency = FALSE, + link.visible = TRUE, args.circos.text = list(cex = 0.8)) +``` + +![](seurat_wrapper_circos_files/figure-gfm/ligand-receptor-circos-1.png) + +Just as above, if `transparency = TRUE`, the degree of transparency is +determined by the prior interaction weight of the ligand-receptor +interaction. + +### FAQ: How to draw a double circos plot of ligand-receptor-target links? + +Please check the [HNSCC case study + double circos visualization](circos_plot.md) for the demonstration. + +``` r +sessionInfo() +## R version 4.3.2 (2023-10-31) +## Platform: x86_64-redhat-linux-gnu (64-bit) +## Running under: CentOS Stream 8 +## +## Matrix products: default +## BLAS/LAPACK: /usr/lib64/libopenblaso-r0.3.15.so; LAPACK version 3.9.0 +## +## locale: +## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 +## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C +## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C +## +## time zone: Asia/Bangkok +## tzcode source: system (glibc) +## +## attached base packages: +## [1] stats graphics grDevices utils datasets methods base +## +## other attached packages: +## [1] circlize_0.4.15 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.4 purrr_1.0.2 readr_2.1.2 tidyr_1.3.0 +## [8] tibble_3.2.1 ggplot2_3.4.4 tidyverse_1.3.1 SeuratObject_5.0.1 Seurat_4.4.0 nichenetr_2.0.4 +## +## loaded via a namespace (and not attached): +## [1] fs_1.6.3 matrixStats_1.2.0 spatstat.sparse_3.0-3 bitops_1.0-7 lubridate_1.9.3 httr_1.4.7 +## [7] RColorBrewer_1.1-3 doParallel_1.0.17 tools_4.3.2 sctransform_0.4.0 backports_1.4.1 utf8_1.2.4 +## [13] R6_2.5.1 lazyeval_0.2.2 uwot_0.1.16 GetoptLong_1.0.5 withr_2.5.2 sp_2.1-2 +## [19] gridExtra_2.3 fdrtool_1.2.17 progressr_0.14.0 cli_3.6.2 spatstat.explore_3.2-1 labeling_0.4.3 +## [25] spatstat.data_3.0-3 randomForest_4.7-1.1 proxy_0.4-27 ggridges_0.5.5 pbapply_1.7-2 foreign_0.8-85 +## [31] parallelly_1.36.0 limma_3.56.2 readxl_1.4.3 rstudioapi_0.15.0 gridGraphics_0.5-1 visNetwork_2.1.2 +## [37] generics_0.1.3 shape_1.4.6 ica_1.0-3 spatstat.random_3.2-2 car_3.1-2 Matrix_1.6-4 +## [43] ggbeeswarm_0.7.2 fansi_1.0.6 S4Vectors_0.38.1 abind_1.4-5 lifecycle_1.0.4 yaml_2.3.8 +## [49] carData_3.0-5 recipes_1.0.7 Rtsne_0.17 grid_4.3.2 promises_1.2.1 crayon_1.5.2 +## [55] miniUI_0.1.1.1 lattice_0.21-9 haven_2.4.3 cowplot_1.1.2 pillar_1.9.0 knitr_1.45 +## [61] ComplexHeatmap_2.16.0 rjson_0.2.21 future.apply_1.11.0 codetools_0.2-19 leiden_0.3.9 glue_1.6.2 +## [67] data.table_1.14.10 vctrs_0.6.5 png_0.1-8 spam_2.10-0 cellranger_1.1.0 gtable_0.3.4 +## [73] assertthat_0.2.1 gower_1.0.1 xfun_0.41 mime_0.12 prodlim_2023.08.28 survival_3.5-7 +## [79] timeDate_4032.109 iterators_1.0.14 hardhat_1.3.0 lava_1.7.3 DiagrammeR_1.0.10 ellipsis_0.3.2 +## [85] fitdistrplus_1.1-11 ROCR_1.0-11 ipred_0.9-14 nlme_3.1-163 RcppAnnoy_0.0.21 irlba_2.3.5.1 +## [91] vipor_0.4.5 KernSmooth_2.23-22 rpart_4.1.21 colorspace_2.1-0 BiocGenerics_0.46.0 DBI_1.1.3 +## [97] Hmisc_5.1-0 nnet_7.3-19 ggrastr_1.0.2 tidyselect_1.2.0 compiler_4.3.2 rvest_1.0.2 +## [103] htmlTable_2.4.1 xml2_1.3.6 plotly_4.10.0 shadowtext_0.1.2 checkmate_2.3.1 scales_1.3.0 +## [109] caTools_1.18.2 lmtest_0.9-40 digest_0.6.33 goftest_1.2-3 spatstat.utils_3.0-4 rmarkdown_2.11 +## [115] htmltools_0.5.7 pkgconfig_2.0.3 base64enc_0.1-3 highr_0.10 dbplyr_2.1.1 fastmap_1.1.1 +## [121] rlang_1.1.2 GlobalOptions_0.1.2 htmlwidgets_1.6.2 shiny_1.7.1 farver_2.1.1 zoo_1.8-12 +## [127] jsonlite_1.8.8 ModelMetrics_1.2.2.2 magrittr_2.0.3 Formula_1.2-5 dotCall64_1.1-1 patchwork_1.1.3 +## [133] munsell_0.5.0 Rcpp_1.0.11 ggnewscale_0.4.9 reticulate_1.34.0 stringi_1.7.6 pROC_1.18.5 +## [139] MASS_7.3-60 plyr_1.8.9 parallel_4.3.2 listenv_0.9.0 ggrepel_0.9.4 deldir_2.0-2 +## [145] splines_4.3.2 tensor_1.5 hms_1.1.3 igraph_1.2.11 ggpubr_0.6.0 spatstat.geom_3.2-7 +## [151] ggsignif_0.6.4 reshape2_1.4.4 stats4_4.3.2 reprex_2.0.1 evaluate_0.23 modelr_0.1.8 +## [157] tzdb_0.4.0 foreach_1.5.2 tweenr_2.0.2 httpuv_1.6.13 RANN_2.6.1 polyclip_1.10-6 +## [163] future_1.33.0 clue_0.3-64 scattermore_1.2 ggforce_0.4.1 broom_0.7.12 xtable_1.8-4 +## [169] e1071_1.7-14 rstatix_0.7.2 later_1.3.2 viridisLite_0.4.2 class_7.3-22 beeswarm_0.4.0 +## [175] IRanges_2.34.1 cluster_2.1.4 timechange_0.2.0 globals_0.16.2 caret_6.0-94 +``` diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/dotplot-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/dotplot-1.png new file mode 100644 index 0000000..d9a074f Binary files /dev/null and b/vignettes/seurat_wrapper_circos_files/figure-gfm/dotplot-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/lfc-heatmap-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/lfc-heatmap-1.png new file mode 100644 index 0000000..fc44564 Binary files /dev/null and b/vignettes/seurat_wrapper_circos_files/figure-gfm/lfc-heatmap-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-receptor-circos-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-receptor-circos-1.png new file mode 100644 index 0000000..301717b Binary files /dev/null and b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-receptor-circos-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-1.png new file mode 100644 index 0000000..6897100 Binary files /dev/null and b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-transparent-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-transparent-1.png new file mode 100644 index 0000000..ce16299 Binary files /dev/null and b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-transparent-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-unused-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-unused-1.png new file mode 100644 index 0000000..3a6d7b5 Binary files /dev/null and b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-unused-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-with-legend-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-with-legend-1.png new file mode 100644 index 0000000..23cd753 Binary files /dev/null and b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-circos-with-legend-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-heatmap-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-heatmap-1.png new file mode 100644 index 0000000..0b54340 Binary files /dev/null and b/vignettes/seurat_wrapper_circos_files/figure-gfm/ligand-target-heatmap-1.png differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-1.png deleted file mode 100644 index e3f743f..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-2.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-2.png deleted file mode 100644 index 93b1dd9..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-2.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-3.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-3.png deleted file mode 100644 index 6d776ab..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-10-3.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-11-1.png deleted file mode 100644 index 4f3e89c..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-11-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-19-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-19-1.png deleted file mode 100644 index a3ef603..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-19-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-20-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-20-1.png deleted file mode 100644 index 5c17c48..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-20-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-26-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-26-1.png deleted file mode 100644 index ed9b2ed..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-26-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-27-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-27-1.png deleted file mode 100644 index 6c35b13..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-27-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-8-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-8-1.png deleted file mode 100644 index 0d7cf9c..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-8-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-9-1.png deleted file mode 100644 index e14ecfa..0000000 Binary files a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-9-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/violin-plot-1.png b/vignettes/seurat_wrapper_circos_files/figure-gfm/violin-plot-1.png new file mode 100644 index 0000000..4a5fd64 Binary files /dev/null and b/vignettes/seurat_wrapper_circos_files/figure-gfm/violin-plot-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/dotplot-cluster-de-1.png b/vignettes/seurat_wrapper_files/figure-gfm/dotplot-cluster-de-1.png new file mode 100644 index 0000000..9c7443b Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/dotplot-cluster-de-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/dotplot-condition-1.png b/vignettes/seurat_wrapper_files/figure-gfm/dotplot-condition-1.png new file mode 100644 index 0000000..c7c98f8 Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/dotplot-condition-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/dotplot-receptors-1.png b/vignettes/seurat_wrapper_files/figure-gfm/dotplot-receptors-1.png new file mode 100644 index 0000000..6380ac3 Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/dotplot-receptors-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/dotplot-sender-1.png b/vignettes/seurat_wrapper_files/figure-gfm/dotplot-sender-1.png new file mode 100644 index 0000000..2e26798 Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/dotplot-sender-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/lfc-heatmap-1.png b/vignettes/seurat_wrapper_files/figure-gfm/lfc-heatmap-1.png new file mode 100644 index 0000000..b31d601 Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/lfc-heatmap-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/ligand-receptor-heatmap-1.png b/vignettes/seurat_wrapper_files/figure-gfm/ligand-receptor-heatmap-1.png new file mode 100644 index 0000000..3a322eb Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/ligand-receptor-heatmap-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/ligand-target-heatmap-1.png b/vignettes/seurat_wrapper_files/figure-gfm/ligand-target-heatmap-1.png new file mode 100644 index 0000000..4784198 Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/ligand-target-heatmap-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/ligand-target-heatmap-adapted-1.png b/vignettes/seurat_wrapper_files/figure-gfm/ligand-target-heatmap-adapted-1.png new file mode 100644 index 0000000..8164b90 Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/ligand-target-heatmap-adapted-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/summary-vis-1.png b/vignettes/seurat_wrapper_files/figure-gfm/summary-vis-1.png new file mode 100644 index 0000000..be25655 Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/summary-vis-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/summary-vis-agnostic-1.png b/vignettes/seurat_wrapper_files/figure-gfm/summary-vis-agnostic-1.png new file mode 100644 index 0000000..ca313f2 Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/summary-vis-agnostic-1.png differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-32-1.png b/vignettes/seurat_wrapper_files/figure-gfm/summary-vis-cluster-de-1.png similarity index 100% rename from vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-32-1.png rename to vignettes/seurat_wrapper_files/figure-gfm/summary-vis-cluster-de-1.png diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/seurat_wrapper_files/figure-gfm/umap-1-1.png similarity index 100% rename from vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-4-1.png rename to vignettes/seurat_wrapper_files/figure-gfm/umap-1-1.png diff --git a/vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-5-1.png b/vignettes/seurat_wrapper_files/figure-gfm/umap-2-1.png similarity index 100% rename from vignettes/seurat_wrapper_circos_files/figure-gfm/unnamed-chunk-5-1.png rename to vignettes/seurat_wrapper_files/figure-gfm/umap-2-1.png diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-10-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-10-1.png deleted file mode 100644 index 76a806d..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-10-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-1.png deleted file mode 100644 index a4c9b67..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-11-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-12-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-12-1.png deleted file mode 100644 index 3365c86..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-12-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-16-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-16-1.png deleted file mode 100644 index 1148682..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-16-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-1.png deleted file mode 100644 index f1ce0c7..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-2.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-2.png deleted file mode 100644 index 8e029cf..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-2.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-3.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-3.png deleted file mode 100644 index bc2cc82..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-17-3.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-1.png deleted file mode 100644 index b4f34fa..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-18-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-19-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-19-1.png deleted file mode 100644 index fcd8523..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-19-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-23-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-23-1.png deleted file mode 100644 index 1a3fb2f..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-23-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-25-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-25-1.png deleted file mode 100644 index 82e9c74..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-25-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png deleted file mode 100644 index c3bac70..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-26-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png deleted file mode 100644 index d39fe19..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-27-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png deleted file mode 100644 index 28fde41..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-33-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-4-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-4-1.png deleted file mode 100644 index b5db526..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-4-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-5-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-5-1.png deleted file mode 100644 index 12ca0a9..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-5-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-9-1.png b/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-9-1.png deleted file mode 100644 index e926cfe..0000000 Binary files a/vignettes/seurat_wrapper_files/figure-gfm/unnamed-chunk-9-1.png and /dev/null differ diff --git a/vignettes/seurat_wrapper_files/figure-gfm/violin-plot-1.png b/vignettes/seurat_wrapper_files/figure-gfm/violin-plot-1.png new file mode 100644 index 0000000..973803f Binary files /dev/null and b/vignettes/seurat_wrapper_files/figure-gfm/violin-plot-1.png differ diff --git a/vignettes/target_prediction_evaluation_geneset.Rmd b/vignettes/target_prediction_evaluation_geneset.Rmd index 4f71b40..9844afc 100644 --- a/vignettes/target_prediction_evaluation_geneset.Rmd +++ b/vignettes/target_prediction_evaluation_geneset.Rmd @@ -23,116 +23,72 @@ knitr::opts_chunk$set( ) ``` -This vignette shows how NicheNet can be used to to predict which ligands might regulate a given set of genes and how well they do this prediction. For this analysis, you need to define: +This vignette assesses the ligands prioritized by NicheNet in their ability to predict a gene set of interest. We will first follow the steps of [Perform NicheNet analysis starting from a Seurat object](seurat_wrapper.md)) to obtain ligands rankings. Make sure you understand the steps and output of a basic NicheNet analysis (more information in [Perform NicheNet analysis starting from a Seurat object: step-by-step analysis](seurat_steps.md). You can also apply this vignette to the [NicheNet’s ligand activity analysis on a gene set of interest](ligand_activity_geneset.md) vignette. -* a set of genes of which expression in a "receiver cell" is possibly affected by extracellular protein signals (ligands) (e.g. genes differentially expressed upon cell-cell interaction ) -* a set of potentially active ligands (e.g. ligands expressed by interacting "sender cells") - -Therefore, you often first need to process expression data of interacting cells to define both. - -In this example, we will use data from Puram et al. to explore intercellular communication in the tumor microenvironment in head and neck squamous cell carcinoma (HNSCC) [See @puram_single-cell_2017]. More specifically, we will look at which ligands expressed by cancer-associated fibroblasts (CAFs) can induce a specific gene program in neighboring malignant cells. This program, a partial epithelial-mesenschymal transition (p-EMT) program, could be linked by Puram et al. to metastasis. - -For this analysis, we will first assess the ligand activity of each ligand, or in other words, we will assess how well each CAF-ligand can predict the p-EMT gene set compared to the background of expressed genes. This allows us to prioritize p-EMT-regulating ligands. Then, we will assess how well the prioritized ligands together can predict whether genes belong to the gene set of interest or not. - -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) and example [expression data](https://doi.org/10.5281/zenodo.3260758) of interacting cells can be downloaded from Zenodo. - -### Load packages required for this vignette +## Run NicheNet ```{r} -library(nichenetr) +library(nichenetr) # Please update to v2.0.4 +library(Seurat) +library(SeuratObject) library(tidyverse) ``` -### Read in expression data of interacting cells - -First, we will read in the publicly available single-cell data from CAF and malignant cells from HNSCC tumors. ```{r} -hnscc_expression = readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) -expression = hnscc_expression$expression -sample_info = hnscc_expression$sample_info # contains meta-information about the cells -``` +# Read in networks +lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) -Secondly, we will determine which genes are expressed in CAFs and malignant cells from high quality primary tumors. Therefore, we wil not consider cells from tumor samples of less quality or from lymph node metastases. To determine expressed genes, we use the definition used by of Puram et al. +lr_network <- lr_network %>% distinct(from, to) -```{r} -tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") +# Read in expression data and update +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- UpdateSeuratObject(seuratObj) +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") -CAF_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `non-cancer cell type` == "CAF") %>% pull(cell) -malignant_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `classified as cancer cell` == 1) %>% pull(cell) +# Run NicheNet +nichenet_output <- nichenet_seuratobj_aggregate( + seurat_obj = seuratObj, + sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), + receiver = "CD8 T", + condition_colname = "aggregate", + condition_oi = "LCMV", + condition_reference = "SS", + expression_pct = 0.05, + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks + ) -expressed_genes_CAFs = expression[CAF_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() -expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() +best_upstream_ligands <- nichenet_output$ligand_activities %>% + top_n(30, aupr_corrected) %>% arrange(desc(aupr_corrected)) %>% pull(test_ligand) ``` -### Load the ligand-target model we want to use - -```{r} -ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -``` - - -### Load the gene set of interest and background of genes - -As gene set of interest, we consider the genes of which the expression is possibly affected due to communication with other cells. - -Because we here want to investigate how CAF regulate the expression of p-EMT genes in malignant cells, we will use the p-EMT gene set defined by Puram et al. as gene set of interset and use all genes expressed in malignant cells as background of genes. +## Assess how well top-ranked ligands can predict a gene set of interest - -```{r} -pemt_geneset = readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), col_names = "gene") %>% pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] # only consider genes also present in the NicheNet model - this excludes genes from the gene list for which the official HGNC symbol was not used by Puram et al. -head(pemt_geneset) -background_expressed_genes = expressed_genes_malignant %>% .[. %in% rownames(ligand_target_matrix)] -head(background_expressed_genes) -``` - -### Perform NicheNet's ligand activity analysis on the gene set of interest - -In a first step, we will define a set of potentially active ligands. As potentially active ligands, we will use ligands that are 1) expressed by CAFs and 2) can bind a (putative) receptor expressed by malignant cells. Putative ligand-receptor links were gathered from NicheNet's ligand-receptor data sources. - -```{r} -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) - -ligands = lr_network %>% pull(from) %>% unique() -expressed_ligands = intersect(ligands,expressed_genes_CAFs) - -receptors = lr_network %>% pull(to) %>% unique() -expressed_receptors = intersect(receptors,expressed_genes_malignant) - -potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() -head(potential_ligands) -``` - -Now perform the ligand activity analysis: infer how well NicheNet's ligand-target potential scores can predict whether a gene belongs to the p-EMT program or not. - -```{r} -ligand_activities = predict_ligand_activities(geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -``` - -Now, we want to rank the ligands based on their ligand activity. In our validation study, we showed that the AUPR between a ligand's target predictions and the observed transcriptional response was the most informative measure to define ligand activity. Therefore, we will rank the ligands based on their AUPR. - -```{r} -ligand_activities %>% arrange(-aupr_corrected) -best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) -head(best_upstream_ligands) -``` - -For the top 30 ligands, we will now build a multi-ligand model that uses all top-ranked ligands to predict whether a gene belongs to the p-EMT program of not. This classification model will be trained via cross-validation and returns a probability for every gene. +For the top 30 ligands, we will now build a multi-ligand model that uses all top-ranked ligands to predict whether a gene belongs to the gene set of interest (differentially expressed genes in CD8 T cells after LCMV infection) or not. This classification model will be trained via cross-validation and returns a probability for every gene. ```{r} # change rounds and folds here, to two rounds to reduce time: normally: do multiple rounds -k = 3 # 3-fold -n = 2 # 2 rounds +k <- 3 # 3-fold +n <- 2 # 2 rounds -pemt_gene_predictions_top30_list = seq(n) %>% lapply(assess_rf_class_probabilities, folds = k, geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligands_oi = best_upstream_ligands, ligand_target_matrix = ligand_target_matrix) +gene_predictions_top30_list <- lapply(1:n, assess_rf_class_probabilities, + folds = k, + geneset = nichenet_output$geneset_oi, + background_expressed_genes = nichenet_output$background_expressed_genes, + ligands_oi = best_upstream_ligands, + ligand_target_matrix = ligand_target_matrix) ``` -Evaluate now how well the target gene probabilies accord to the gene set assignments +Evaluate now how well the target gene probabilities accord to the gene set assignments. ```{r} # get performance: auroc-aupr-pearson -target_prediction_performances_cv = pemt_gene_predictions_top30_list %>% lapply(classification_evaluation_continuous_pred_wrapper) %>% bind_rows() %>% mutate(round=seq(1:nrow(.))) +target_prediction_performances_cv <- gene_predictions_top30_list %>% lapply(classification_evaluation_continuous_pred_wrapper) %>% + bind_rows() %>% mutate(round=seq(1:nrow(.))) ``` What is the AUROC, AUPR and PCC of this model (averaged over cross-validation rounds)? @@ -146,24 +102,30 @@ target_prediction_performances_cv$pearson %>% mean() Evaluate now whether genes belonging to the gene set are more likely to be top-predicted. We will look at the top 5% of predicted targets here. ```{r} -# get performance: how many p-EMT genes and non-p-EMT-genes among top 5% predicted targets -target_prediction_performances_discrete_cv = pemt_gene_predictions_top30_list %>% lapply(calculate_fraction_top_predicted, quantile_cutoff = 0.95) %>% bind_rows() %>% ungroup() %>% mutate(round=rep(1:length(pemt_gene_predictions_top30_list), each = 2)) +# get performance: how many viral response genes and non-viral response-genes among top 5% predicted targets +target_prediction_performances_discrete_cv <- gene_predictions_top30_list %>% + lapply(calculate_fraction_top_predicted, + quantile_cutoff = 0.95) %>% + bind_rows(.id = "round") ``` -What is the fraction of p-EMT genes that belongs to the top 5% predicted targets? +What is the fraction of viral response genes that belongs to the top 5% predicted targets? ```{r} target_prediction_performances_discrete_cv %>% filter(true_target) %>% .$fraction_positive_predicted %>% mean() ``` -What is the fraction of non-p-EMT genes that belongs to the top 5% predicted targets? +What is the fraction of non-viral-response genes that belongs to the top 5% predicted targets? ```{r} target_prediction_performances_discrete_cv %>% filter(!true_target) %>% .$fraction_positive_predicted %>% mean() ``` -We see that the p-EMT genes are enriched in the top-predicted target genes. To test this, we will now apply a Fisher's exact test for every cross-validation round and report the average p-value. +We see that the viral response genes are enriched in the top-predicted target genes. To test this, we will now apply a Fisher's exact test for every cross-validation round and report the average p-value. ```{r} -target_prediction_performances_discrete_fisher = pemt_gene_predictions_top30_list %>% lapply(calculate_fraction_top_predicted_fisher, quantile_cutoff = 0.95) +target_prediction_performances_discrete_fisher <- gene_predictions_top30_list %>% + lapply(calculate_fraction_top_predicted_fisher, + quantile_cutoff = 0.95) + target_prediction_performances_discrete_fisher %>% unlist() %>% mean() ``` @@ -171,8 +133,10 @@ Finally, we will look at which p-EMT genes are well-predicted in every cross-val ```{r} # get top predicted genes -top_predicted_genes = seq(length(pemt_gene_predictions_top30_list)) %>% lapply(get_top_predicted_genes,pemt_gene_predictions_top30_list) %>% reduce(full_join, by = c("gene","true_target")) +top_predicted_genes <- lapply(1:n, get_top_predicted_genes, + gene_predictions_top30_list) %>% + reduce(full_join, by = c("gene","true_target")) + top_predicted_genes %>% filter(true_target) ``` -### References diff --git a/vignettes/target_prediction_evaluation_geneset.md b/vignettes/target_prediction_evaluation_geneset.md index a9e58ea..61e5ba4 100644 --- a/vignettes/target_prediction_evaluation_geneset.md +++ b/vignettes/target_prediction_evaluation_geneset.md @@ -7,181 +7,91 @@ Robin Browaeys rmarkdown::render("vignettes/target_prediction_evaluation_geneset.Rmd", output_format = "github_document") --> -This vignette shows how NicheNet can be used to to predict which ligands -might regulate a given set of genes and how well they do this -prediction. For this analysis, you need to define: - -- a set of genes of which expression in a “receiver cell” is possibly - affected by extracellular protein signals (ligands) (e.g. genes - differentially expressed upon cell-cell interaction ) -- a set of potentially active ligands (e.g. ligands expressed by - interacting “sender cells”) - -Therefore, you often first need to process expression data of -interacting cells to define both. - -In this example, we will use data from Puram et al. to explore -intercellular communication in the tumor microenvironment in head and -neck squamous cell carcinoma (HNSCC) (See Puram et al. 2017). More -specifically, we will look at which ligands expressed by -cancer-associated fibroblasts (CAFs) can induce a specific gene program -in neighboring malignant cells. This program, a partial -epithelial-mesenschymal transition (p-EMT) program, could be linked by -Puram et al. to metastasis. - -For this analysis, we will first assess the ligand activity of each -ligand, or in other words, we will assess how well each CAF-ligand can -predict the p-EMT gene set compared to the background of expressed -genes. This allows us to prioritize p-EMT-regulating ligands. Then, we -will assess how well the prioritized ligands together can predict -whether genes belong to the gene set of interest or not. - -The used [ligand-target matrix](https://doi.org/10.5281/zenodo.7074290) -and example [expression data](https://doi.org/10.5281/zenodo.3260758) of -interacting cells can be downloaded from Zenodo. - -### Load packages required for this vignette - -``` r -library(nichenetr) +This vignette assesses the ligands prioritized by NicheNet in their +ability to predict a gene set of interest. We will first follow the +steps of [Perform NicheNet analysis starting from a Seurat +object](seurat_wrapper.md)) to obtain ligands rankings. Make sure you +understand the steps and output of a basic NicheNet analysis (more +information in [Perform NicheNet analysis starting from a Seurat object: +step-by-step analysis](seurat_steps.md). You can also apply this +vignette to the [NicheNet’s ligand activity analysis on a gene set of +interest](ligand_activity_geneset.md) vignette. + +## Run NicheNet + +``` r +library(nichenetr) # Please update to v2.0.4 +library(Seurat) +library(SeuratObject) library(tidyverse) ``` -### Read in expression data of interacting cells - -First, we will read in the publicly available single-cell data from CAF -and malignant cells from HNSCC tumors. - -``` r -hnscc_expression = readRDS(url("https://zenodo.org/record/3260758/files/hnscc_expression.rds")) -expression = hnscc_expression$expression -sample_info = hnscc_expression$sample_info # contains meta-information about the cells -``` - -Secondly, we will determine which genes are expressed in CAFs and -malignant cells from high quality primary tumors. Therefore, we wil not -consider cells from tumor samples of less quality or from lymph node -metastases. To determine expressed genes, we use the definition used by -of Puram et al. - -``` r -tumors_remove = c("HN10","HN","HN12", "HN13", "HN24", "HN7", "HN8","HN23") - -CAF_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `non-cancer cell type` == "CAF") %>% pull(cell) -malignant_ids = sample_info %>% filter(`Lymph node` == 0 & !(tumor %in% tumors_remove) & `classified as cancer cell` == 1) %>% pull(cell) - -expressed_genes_CAFs = expression[CAF_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() -expressed_genes_malignant = expression[malignant_ids,] %>% apply(2,function(x){10*(2**x - 1)}) %>% apply(2,function(x){log2(mean(x) + 1)}) %>% .[. >= 4] %>% names() -``` - -### Load the ligand-target model we want to use - ``` r -ligand_target_matrix = readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds")) -ligand_target_matrix[1:5,1:5] # target genes in rows, ligands in columns -## A2M AANAT ABCA1 ACE ACE2 -## A-GAMMA3'E 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 -## A1BG 0.0018503922 0.0011108718 0.0014225077 0.0028594037 0.001139013 -## A1BG-AS1 0.0007400797 0.0004677614 0.0005193137 0.0007836698 0.000375007 -## A1CF 0.0024799266 0.0013026348 0.0020420890 0.0047921048 0.003273375 -## A2M 0.0084693452 0.0040689323 0.0064256379 0.0105191365 0.005719199 -``` +# Read in networks +lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds")) +ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds")) +weighted_networks <- readRDS(url("https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds")) -### Load the gene set of interest and background of genes +lr_network <- lr_network %>% distinct(from, to) -As gene set of interest, we consider the genes of which the expression -is possibly affected due to communication with other cells. +# Read in expression data and update +seuratObj <- readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) +seuratObj <- UpdateSeuratObject(seuratObj) +seuratObj <- alias_to_symbol_seurat(seuratObj, "mouse") -Because we here want to investigate how CAF regulate the expression of -p-EMT genes in malignant cells, we will use the p-EMT gene set defined -by Puram et al. as gene set of interset and use all genes expressed in -malignant cells as background of genes. +# Run NicheNet +nichenet_output <- nichenet_seuratobj_aggregate( + seurat_obj = seuratObj, + sender = c("CD4 T","Treg", "Mono", "NK", "B", "DC"), + receiver = "CD8 T", + condition_colname = "aggregate", + condition_oi = "LCMV", + condition_reference = "SS", + expression_pct = 0.05, + ligand_target_matrix = ligand_target_matrix, + lr_network = lr_network, + weighted_networks = weighted_networks + ) +## [1] "Read in and process NicheNet's networks" +## [1] "Define expressed ligands and receptors in receiver and sender cells" +## [1] "Perform DE analysis in receiver cell" +## [1] "Perform NicheNet ligand activity analysis" +## [1] "Infer active target genes of the prioritized ligands" +## [1] "Infer receptors of the prioritized ligands" +## [1] "Perform DE analysis in sender cells" -``` r -pemt_geneset = readr::read_tsv(url("https://zenodo.org/record/3260758/files/pemt_signature.txt"), col_names = "gene") %>% pull(gene) %>% .[. %in% rownames(ligand_target_matrix)] # only consider genes also present in the NicheNet model - this excludes genes from the gene list for which the official HGNC symbol was not used by Puram et al. -head(pemt_geneset) -## [1] "SERPINE1" "TGFBI" "MMP10" "LAMC2" "P4HA2" "PDPN" -background_expressed_genes = expressed_genes_malignant %>% .[. %in% rownames(ligand_target_matrix)] -head(background_expressed_genes) -## [1] "RPS11" "ELMO2" "PNMA1" "MMP2" "TMEM216" "ERCC5" +best_upstream_ligands <- nichenet_output$ligand_activities %>% + top_n(30, aupr_corrected) %>% arrange(desc(aupr_corrected)) %>% pull(test_ligand) ``` -### Perform NicheNet’s ligand activity analysis on the gene set of interest - -In a first step, we will define a set of potentially active ligands. As -potentially active ligands, we will use ligands that are 1) expressed by -CAFs and 2) can bind a (putative) receptor expressed by malignant cells. -Putative ligand-receptor links were gathered from NicheNet’s -ligand-receptor data sources. - -``` r -lr_network = readRDS(url("https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds")) - -ligands = lr_network %>% pull(from) %>% unique() -expressed_ligands = intersect(ligands,expressed_genes_CAFs) - -receptors = lr_network %>% pull(to) %>% unique() -expressed_receptors = intersect(receptors,expressed_genes_malignant) - -potential_ligands = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors) %>% pull(from) %>% unique() -head(potential_ligands) -## [1] "A2M" "ADAM10" "ADAM12" "ADAM15" "ADAM17" "ADAM9" -``` - -Now perform the ligand activity analysis: infer how well NicheNet’s -ligand-target potential scores can predict whether a gene belongs to the -p-EMT program or not. - -``` r -ligand_activities = predict_ligand_activities(geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands) -``` - -Now, we want to rank the ligands based on their ligand activity. In our -validation study, we showed that the AUPR between a ligand’s target -predictions and the observed transcriptional response was the most -informative measure to define ligand activity. Therefore, we will rank -the ligands based on their AUPR. - -``` r -ligand_activities %>% arrange(-aupr_corrected) -## # A tibble: 203 × 5 -## test_ligand auroc aupr aupr_corrected pearson -## -## 1 TGFB2 0.768 0.123 0.107 0.199 -## 2 CXCL12 0.708 0.0884 0.0721 0.144 -## 3 BMP8A 0.770 0.0880 0.0718 0.177 -## 4 INHBA 0.773 0.0866 0.0703 0.124 -## 5 LTBP1 0.722 0.0785 0.0622 0.163 -## 6 TNXB 0.713 0.0737 0.0574 0.158 -## 7 ENG 0.759 0.0732 0.0569 0.157 -## 8 BMP5 0.745 0.0715 0.0552 0.150 -## 9 VCAN 0.715 0.0711 0.0548 0.142 -## 10 HGF 0.712 0.0711 0.0548 0.138 -## # … with 193 more rows -best_upstream_ligands = ligand_activities %>% top_n(30, aupr_corrected) %>% arrange(-aupr_corrected) %>% pull(test_ligand) -head(best_upstream_ligands) -## [1] "TGFB2" "CXCL12" "BMP8A" "INHBA" "LTBP1" "TNXB" -``` +## Assess how well top-ranked ligands can predict a gene set of interest For the top 30 ligands, we will now build a multi-ligand model that uses -all top-ranked ligands to predict whether a gene belongs to the p-EMT -program of not. This classification model will be trained via +all top-ranked ligands to predict whether a gene belongs to the gene set +of interest (differentially expressed genes in CD8 T cells after LCMV +infection) or not. This classification model will be trained via cross-validation and returns a probability for every gene. ``` r # change rounds and folds here, to two rounds to reduce time: normally: do multiple rounds -k = 3 # 3-fold -n = 2 # 2 rounds +k <- 3 # 3-fold +n <- 2 # 2 rounds -pemt_gene_predictions_top30_list = seq(n) %>% lapply(assess_rf_class_probabilities, folds = k, geneset = pemt_geneset, background_expressed_genes = background_expressed_genes, ligands_oi = best_upstream_ligands, ligand_target_matrix = ligand_target_matrix) +gene_predictions_top30_list <- lapply(1:n, assess_rf_class_probabilities, + folds = k, + geneset = nichenet_output$geneset_oi, + background_expressed_genes = nichenet_output$background_expressed_genes, + ligands_oi = best_upstream_ligands, + ligand_target_matrix = ligand_target_matrix) ``` -Evaluate now how well the target gene probabilies accord to the gene set -assignments +Evaluate now how well the target gene probabilities accord to the gene +set assignments. ``` r # get performance: auroc-aupr-pearson -target_prediction_performances_cv = pemt_gene_predictions_top30_list %>% lapply(classification_evaluation_continuous_pred_wrapper) %>% bind_rows() %>% mutate(round=seq(1:nrow(.))) +target_prediction_performances_cv <- gene_predictions_top30_list %>% lapply(classification_evaluation_continuous_pred_wrapper) %>% + bind_rows() %>% mutate(round=seq(1:nrow(.))) ``` What is the AUROC, AUPR and PCC of this model (averaged over @@ -189,45 +99,51 @@ cross-validation rounds)? ``` r target_prediction_performances_cv$auroc %>% mean() -## [1] 0.7606117 +## [1] 0.8044756 target_prediction_performances_cv$aupr %>% mean() -## [1] 0.09281456 +## [1] 0.4975771 target_prediction_performances_cv$pearson %>% mean() -## [1] 0.1911942 +## [1] 0.541401 ``` Evaluate now whether genes belonging to the gene set are more likely to be top-predicted. We will look at the top 5% of predicted targets here. ``` r -# get performance: how many p-EMT genes and non-p-EMT-genes among top 5% predicted targets -target_prediction_performances_discrete_cv = pemt_gene_predictions_top30_list %>% lapply(calculate_fraction_top_predicted, quantile_cutoff = 0.95) %>% bind_rows() %>% ungroup() %>% mutate(round=rep(1:length(pemt_gene_predictions_top30_list), each = 2)) +# get performance: how many viral response genes and non-viral response-genes among top 5% predicted targets +target_prediction_performances_discrete_cv <- gene_predictions_top30_list %>% + lapply(calculate_fraction_top_predicted, + quantile_cutoff = 0.95) %>% + bind_rows(.id = "round") ``` -What is the fraction of p-EMT genes that belongs to the top 5% predicted -targets? +What is the fraction of viral response genes that belongs to the top 5% +predicted targets? ``` r target_prediction_performances_discrete_cv %>% filter(true_target) %>% .$fraction_positive_predicted %>% mean() -## [1] 0.3489583 +## [1] 0.45 ``` -What is the fraction of non-p-EMT genes that belongs to the top 5% -predicted targets? +What is the fraction of non-viral-response genes that belongs to the top +5% predicted targets? ``` r target_prediction_performances_discrete_cv %>% filter(!true_target) %>% .$fraction_positive_predicted %>% mean() -## [1] 0.04529767 +## [1] 0.0179845 ``` -We see that the p-EMT genes are enriched in the top-predicted target -genes. To test this, we will now apply a Fisher’s exact test for every -cross-validation round and report the average p-value. +We see that the viral response genes are enriched in the top-predicted +target genes. To test this, we will now apply a Fisher’s exact test for +every cross-validation round and report the average p-value. ``` r -target_prediction_performances_discrete_fisher = pemt_gene_predictions_top30_list %>% lapply(calculate_fraction_top_predicted_fisher, quantile_cutoff = 0.95) +target_prediction_performances_discrete_fisher <- gene_predictions_top30_list %>% + lapply(calculate_fraction_top_predicted_fisher, + quantile_cutoff = 0.95) + target_prediction_performances_discrete_fisher %>% unlist() %>% mean() -## [1] 4.529691e-18 +## [1] 2.332346e-96 ``` Finally, we will look at which p-EMT genes are well-predicted in every @@ -235,36 +151,23 @@ cross-validation round. ``` r # get top predicted genes -top_predicted_genes = seq(length(pemt_gene_predictions_top30_list)) %>% lapply(get_top_predicted_genes,pemt_gene_predictions_top30_list) %>% reduce(full_join, by = c("gene","true_target")) +top_predicted_genes <- lapply(1:n, get_top_predicted_genes, + gene_predictions_top30_list) %>% + reduce(full_join, by = c("gene","true_target")) + top_predicted_genes %>% filter(true_target) -## # A tibble: 41 × 4 -## gene true_target predicted_top_target_round1 predicted_top_target_round2 -## -## 1 SERPINE1 TRUE TRUE TRUE -## 2 MMP1 TRUE TRUE TRUE -## 3 TAGLN TRUE TRUE TRUE -## 4 COL1A1 TRUE TRUE TRUE -## 5 FSTL3 TRUE TRUE TRUE -## 6 MT2A TRUE TRUE TRUE -## 7 TNC TRUE TRUE TRUE -## 8 SEMA3C TRUE TRUE TRUE -## 9 THBS1 TRUE TRUE TRUE -## 10 LAMC2 TRUE TRUE TRUE -## # … with 31 more rows +## # A tibble: 125 × 4 +## gene true_target predicted_top_target_round1 predicted_top_target_round2 +## +## 1 Gbp4 TRUE TRUE TRUE +## 2 Gbp9 TRUE TRUE TRUE +## 3 Ifi203 TRUE TRUE TRUE +## 4 Ifi209 TRUE TRUE TRUE +## 5 Ifi213 TRUE TRUE TRUE +## 6 Ifi208 TRUE TRUE TRUE +## 7 Mndal TRUE TRUE TRUE +## 8 Ifi206 TRUE TRUE TRUE +## 9 Phf11 TRUE TRUE TRUE +## 10 Ifit3b TRUE TRUE TRUE +## # ℹ 115 more rows ``` - -### References - -
- -
- -Puram, Sidharth V., Itay Tirosh, Anuraag S. Parikh, Anoop P. Patel, -Keren Yizhak, Shawn Gillespie, Christopher Rodman, et al. 2017. -“Single-Cell Transcriptomic Analysis of Primary and Metastatic Tumor -Ecosystems in Head and Neck Cancer.” *Cell* 171 (7): 1611–1624.e24. -. - -
- -
diff --git a/vignettes/tgfb3_targets_signaling_path.jpg b/vignettes/tgfb3_targets_signaling_path.jpg deleted file mode 100644 index e34ee9a..0000000 Binary files a/vignettes/tgfb3_targets_signaling_path.jpg and /dev/null differ diff --git a/vignettes/tgfb3_targets_signaling_path.png b/vignettes/tgfb3_targets_signaling_path.png deleted file mode 100644 index 1e40cd8..0000000 Binary files a/vignettes/tgfb3_targets_signaling_path.png and /dev/null differ diff --git a/vignettes/workflow_model_construction.jpg b/vignettes/workflow_model_construction.jpg deleted file mode 100644 index 4d3dffa..0000000 Binary files a/vignettes/workflow_model_construction.jpg and /dev/null differ