-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathutil.R
130 lines (119 loc) · 6.53 KB
/
util.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
##############################################################################################################################
# Additional parsers
##############################################################################################################################
#' Helper function to read in the DP algorithm files
#' @param indir The directory in which the files were stored
#' @return A GS.data object with fields S.i, V.h, pi.h and alpha
#' @author sd11
read_gsdata_object = function(indir, samplename, no.iters, conc_param, cluster_conc) {
GS.data = list()
filename_prefix = file.path(indir, paste(samplename, "_2D_iters", no.iters, "_concParam", conc_param, "_clusterWidth", 1/cluster_conc, sep=""))
GS.data$S.i = as.matrix(read.csv(paste(filename_prefix, "_states.csv", sep=""), row.names=1))
GS.data$V.h = as.matrix(read.csv(paste(filename_prefix, "_stickbreaking_weights.csv", sep=""), row.names=1))
GS.data$pi.h = as.matrix(read.csv(paste(filename_prefix, "_discreteMutationCopyNumbers.csv", sep=""), row.names=1))
GS.data$alpha = read.csv(paste(filename_prefix, "_alpha.csv", sep=""), row.names=1)
# Fix the dimensions of this array
pi.h = array(NA, c(nrow(GS.data$pi.h), ncol(GS.data$pi.h), 1))
pi.h[1:nrow(GS.data$pi.h), 1:ncol(GS.data$pi.h), 1] = GS.data$pi.h
GS.data$pi.h = pi.h
return(GS.data)
}
##############################################################################################################################
# Functions to build mutation coassignment probability matrices
##############################################################################################################################
#' Helper function that builds a mutation to mutation coassignment probability matrix out of mutation to cluster assignments
#' @param GS.data Main clustering output directly from the chain
#' @param density Posterior density through the MCMC output
#' @param no.muts The number of mutations in the data set
#' @param sampledIters Iterations to consider
#' @return A no.muts x no.muts matrix where each cell contains the fraction of iterations the pair of mutations was assigned to the same cluster
build_coassignment_prob_matrix_preferences = function(GS.data, density, no.muts, sampledIters) {
res = getLocalOptima(density, hypercube.size=5)
localOptima = res$localOptima
peak.indices = res$peak.indices
no.optima = length(localOptima)
boundary = array(NA,no.optima-1)
mutation.preferences = array(0,c(no.muts,no.optima))
for(i in 1:(no.optima-1)){
min.density = min(density$median.density[(peak.indices[i]+1):(peak.indices[i+1]-1)])
min.indices = intersect(which(density$median.density == min.density),(peak.indices[i]+1):(peak.indices[i+1]-1))
#what distance along the line between a pair of optima do we have to go to reach the minimum density
boundary[i] = (density$fraction.of.tumour.cells[max(min.indices)] + density$fraction.of.tumour.cells[min(min.indices)])/2
}
# Adapt this to make a table with the preferred cluster for each mutation in each iteration
S.i = data.matrix(GS.data$S.i)
pi.h = GS.data$pi.h[,,1]
mutation.preferences = array(0, c(no.muts, length(sampledIters)))
coassignments = array(0, c(no.muts, no.muts))
for(s in sampledIters) {
#temp.preferences = array(0,c(no.muts,no.optima))
for(c in unique(S.i[s,])){
bestOptimum = sum(pi.h[s,c]>boundary)+1
#temp.preferences[S.i[s,]==c,bestOptimum] = temp.preferences[S.i[s,]==c,bestOptimum] + 1
assigned.muts = which(S.i[s,]==c)
coassignments[assigned.muts, assigned.muts] = coassignments[assigned.muts, assigned.muts] + 1
}
#iter.preferences = t(apply(temp.preferences, 1, function(p) { as.integer(p==max(p)) / sum(p==max(p)) }))
#mutation.preferences = mutation.preferences + iter.preferences
}
coassignments = coassignments / length(sampledIters)
return(coassignments)
}
#' Fetch the CCFs of the clusters to which SNVs have been assigned during MCMC into a big table
#' @param pi.h The cluster CCFs
#' @param S.i The mutation assignments to cluster ids
#' @param no.muts Number of total SNVs
#' @param no.iters The total number of iterations
#' @param no.iters.burn.in The total number of iterations to use as burnin
#' @return A matrix with a column for each SNV and a row for each to consider iteration with the cell containing the CCF
#' @author sd11
get_snv_assignment_ccfs = function(pi.h, S.i, no.muts, no.timepoints, no.iters, no.iters.burn.in) {
#' Get assignment ccfs for each snv
no.iters.post.burnin = no.iters-no.iters.burn.in
snv_ccfs = array(NA, c(no.iters.post.burnin, no.muts, no.timepoints))
x = (no.iters.burn.in+1):no.iters
for (t in 1:no.timepoints) {
for (i in 1:no.muts) {
snv_ccfs[, i, t] = pi.h[cbind(x, S.i[-c(1:no.iters.burn.in), i], t)]
}
}
return(snv_ccfs)
}
#' Helper function that builds the a density over assignment CCFs for each mutation
get_snv_ccf_assignmnent_density = function(S.i, pi.h, no.iters.burn.in, ccf_max_value=10) {
snv_ccfs = get_snv_assignment_ccfs(pi.h, S.i, ncol(S.i), dim(pi.h)[3], nrow(S.i), no.iters.burn.in)
snv_ccfs = snv_ccfs[,,1]
snv_densities = lapply(1:ncol(snv_ccfs), function(i) {
if (all(snv_ccfs[,i] < ccf_max_value)) {
ggplot_build(ggplot(data.frame(ccf=snv_ccfs[,i])) + aes(x=ccf, y=..density..) + geom_density() + xlim(0, ccf_max_value))$data[[1]]$y
} else {
NA
}
})
snv_densities = lapply(snv_densities, function(dat) { dat/sum(dat) })
return(snv_densities)
}
#' Build a coassignment probability matrix using SNV specific densities over
#' the assigned CCFs during MCMC
#' @param S.i The mutation assignments to cluster ids
#' @param pi.h The cluster CCFs
#' @param no.iters.burn.in The number of iterations to discard as burn in
#' @return Square matrix with a row and column for every mutation, each cell contains the probability of the pair of mutations belonging to the same cluster
#' @author dw9, sd11
build_coassignment_prob_matrix_densities = function(S.i, pi.h, no.iters.burn.in) {
snv_densities = get_snv_ccf_assignmnent_density(S.i, pi.h, no.iters.burn.in)
no.muts = length(snv_densities)
identity.strengths = array(0,c(no.muts,no.muts))
for (i in 1:(no.muts-1)) {
identity.strengths[i,i] = 1
for(j in (i+1):no.muts) {
if (is.na(snv_densities[[i]]) | is.na(snv_densities[[j]])) {
identity.strengths[i,j] = identity.strengths[j,i] = 0
} else {
identity.strengths[i,j] = identity.strengths[j,i] = 1-(sum(abs(snv_densities[[i]]-snv_densities[[j]])) / 2)
}
}
}
identity.strengths[no.muts,no.muts] = 1
return(identity.strengths)
}