From 0e55a0bf33a942913324b9a9dafb4cfb96e24f23 Mon Sep 17 00:00:00 2001 From: iheartfoosball Date: Mon, 16 Aug 2021 20:59:54 -0700 Subject: [PATCH 1/2] Update visualization.R Seems like there's an issue with removing pathways in netVisual_embeddingPairwise. Pathways to be removed are identified in the similarity matrix, where each pathway is represented multiple times (once for each dataset). Each pathway in pathway.remove is labeled according to the dataset in which it meets the removal condition (colSums(similarity)==1) and labeled accordingly (eg "Pathway1--Dataset1"). However, when removing these from each individual dataset's probability matrix, the "--Dataset1" is dropped and "Pathway1" is subsequently removed from all datasets. As a result, the filtered similarity matrix (datasets combined) has a different number of rows compared to the sum of the rows of each dataset's filtered probability matrix. This will happen anytime a pathway is found in multiple datasets but did not meet the removal condition in all of those datasets. I can think of two ways to resolve this: A) remove each pathway in pathway.remove from all datasets (in the similarity matrix and in each individual probability matrix) B) only remove a pathway from an individual dataset's probability matrix if the pathway met the removal condition in that particular dataset I am not too familiar with the theory here, so I'm not sure what is best. For now I've edited the function to do option B, and I labeled the edited portions "edit_1" and "edit_2" --- R/visualization.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/R/visualization.R b/R/visualization.R index 2009f5b..04ce164 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -3100,13 +3100,19 @@ netVisual_embeddingPairwise <- function(object, slot.name = "netP", type = c("fu if (is.null(pathway.remove)) { similarity <- methods::slot(object, slot.name)$similarity[[type]]$matrix[[comparison.name]] pathway.remove <- rownames(similarity)[which(colSums(similarity) == 1)] - pathway.remove <- sub("--.*", "", pathway.remove) - } + + #***edit_1: keep dataset name as suffix, then later remove pathways from their corresponding datasets + #***note: other option is to remove these pathways from all datasets, aka grepl and remove relevant parts of 'Y' and 'clusters' + #pathway.remove <- sub("--.*", "", pathway.remove) } if (length(pathway.remove) > 0) { for (i in 1:length(prob)) { probi <- prob[[i]] - pathway.remove.idx <- which(dimnames(probi)[[3]] %in% pathway.remove) + + #***edit_2: only remove pathway if it was problematic in this particular dataset (based on suffix after "--") + #pathway.remove.idx <- which(dimnames(probi)[[3]] %in% pathway.remove) + pathway.remove.idx <- which(paste0(dimnames(probi)[[3]],"--",object.names[i]) %in% pathway.remove) + if (length(pathway.remove.idx) > 0) { probi <- probi[ , , -pathway.remove.idx] } From 5678392f0655ba17afbdabd7db6e6e0fb4267ee7 Mon Sep 17 00:00:00 2001 From: iheartfoosball Date: Mon, 16 Aug 2021 21:20:27 -0700 Subject: [PATCH 2/2] Update visualization.R --- R/visualization.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/visualization.R b/R/visualization.R index 04ce164..faa5cb1 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -3103,7 +3103,8 @@ netVisual_embeddingPairwise <- function(object, slot.name = "netP", type = c("fu #***edit_1: keep dataset name as suffix, then later remove pathways from their corresponding datasets #***note: other option is to remove these pathways from all datasets, aka grepl and remove relevant parts of 'Y' and 'clusters' - #pathway.remove <- sub("--.*", "", pathway.remove) } + #pathway.remove <- sub("--.*", "", pathway.remove) + } if (length(pathway.remove) > 0) { for (i in 1:length(prob)) {