Skip to content

Commit 124f689

Browse files
committed
add function maf2variants
1 parent 37440e8 commit 124f689

11 files changed

+13928
-13298
lines changed

NAMESPACE

+6
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ export(calRoutines)
77
export(getClinSites)
88
export(getKaKs)
99
export(inferClonalTrees)
10+
export(maf2variants)
1011
export(plotCNA)
1112
export(plotCNAProfile)
1213
export(plotCNAtree)
1314
export(plotMutTree)
1415
export(plotVafCluster)
1516
export(readCNAProfile)
17+
export(readMaf)
1618
export(set.colors)
1719
export(splitSegment)
1820
export(tree2timescape)
@@ -25,6 +27,7 @@ import(ape)
2527
import(clonevol)
2628
import(ggrepel)
2729
import(ggtree)
30+
import(methods)
2831
import(phangorn)
2932
import(treeio)
3033
importFrom(data.table,":=")
@@ -36,11 +39,14 @@ importFrom(data.table,.N)
3639
importFrom(data.table,.NGRP)
3740
importFrom(data.table,.SD)
3841
importFrom(data.table,data.table)
42+
importFrom(data.table,fread)
43+
importFrom(data.table,setkey)
3944
importFrom(grDevices,dev.off)
4045
importFrom(grDevices,pdf)
4146
importFrom(magrittr,"%>%")
4247
importFrom(methods,new)
4348
importFrom(stats,as.dist)
49+
importFrom(stats,qnorm)
4450
importFrom(stats,rnorm)
4551
importFrom(stats,setNames)
4652
importFrom(utils,data)

R/maf2variants.R

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
2+
#' maf2variants
3+
#' @description Change the maf object into variants data frame for the subclonal structures analysis.
4+
5+
#' @param maf Maf or MafList object generated by `readMaf()` function
6+
#' @param patient.id Select the specific patients. Default `NULL`, all patients are included.
7+
#' @param ccf.cutoff Removing low-CCF mutations (default: 0.1).
8+
#' @param extract.VAF Whether extract the VAF information. Default `FALSE`: extract CCF rather than VAF.
9+
#'
10+
#' @details
11+
#'
12+
#' This function extracts the `Cluster` information from the CCF data. Therefore, the `Cluster` column is required in the ccfFile.
13+
#'
14+
#' For the output `variants`, the first five columns are Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster. The remaining columns indicate variant cellular prevalence for each sample.
15+
#'
16+
#'
17+
#' @examples
18+
#' #' data.type <- "split1"
19+
#'
20+
#' maf1 <- readMaf(
21+
#' mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)),
22+
#' ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)),
23+
#' clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)),
24+
#' refBuild = "hg19",
25+
#' ccf.conf.level = 0.95
26+
#' )
27+
#'
28+
#' ccfs = maf2variants(maf1, patient.id = "Met1")
29+
#'
30+
#' #extract VAF rather than CCF.
31+
#' vafs = maf2variants(maf1, patient.id = "Met1", extract.VAF = T)
32+
#'
33+
#' @export
34+
#'
35+
36+
maf2variants <- function(
37+
maf,
38+
patient.id = NULL,
39+
ccf.cutoff = 0.1,
40+
extract.VAF = FALSE
41+
){
42+
43+
processMaf2Vars = function(m) {
44+
45+
maf_data <- MesKit::getMafData(m)
46+
patient <- MesKit::getMafPatient(m)
47+
48+
#Check whether the Cluster column exists in CCF data.
49+
if(!"Cluster" %in% colnames(maf_data)){
50+
stop("The Cluster column is missing in maf data, stop.")
51+
}
52+
53+
if(extract.VAF){
54+
55+
#VAF
56+
message("Extract VAF rather than CCF")
57+
mut_standardcol = c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification","Tumor_Sample_Barcode","Tumor_ID", "Patient_ID", "Tumor_Sample_Label", "VAF", "Cluster")
58+
59+
vars = maf_data %>%
60+
dplyr::select(dplyr::all_of(mut_standardcol)) %>%
61+
dplyr::mutate(Mutid = str_c(Chromosome, Start_Position, End_Position, Reference_Allele,Tumor_Seq_Allele2, sep = ":")) %>%
62+
dplyr::rowwise() %>%
63+
dplyr::mutate(VAF = ifelse(max(VAF)<=1, VAF*100, VAF) ) %>%
64+
dplyr::filter(VAF >= ccf.cutoff*100/2) %>%
65+
dplyr::filter(!is.na(Cluster) & Cluster >=1 ) %>%
66+
tidyr::pivot_wider(
67+
id_cols = c(Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster),
68+
names_from = c(Tumor_Sample_Label),
69+
values_from = c(VAF),
70+
values_fill = 0
71+
)
72+
}else{
73+
74+
#CCF
75+
mut_standardcol = c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification","Tumor_Sample_Barcode","Tumor_ID", "Patient_ID", "Tumor_Sample_Label", "CCF", "Cluster")
76+
77+
vars = maf_data %>%
78+
dplyr::select(dplyr::all_of(mut_standardcol)) %>%
79+
dplyr::mutate(Mutid = str_c(Chromosome, Start_Position, End_Position, Reference_Allele,Tumor_Seq_Allele2, sep = ":")) %>%
80+
dplyr::rowwise() %>%
81+
dplyr::mutate(CCF = ifelse(max(CCF)<=1, CCF*100, CCF) ) %>%
82+
dplyr::filter(CCF >= ccf.cutoff*100) %>%
83+
dplyr::filter(!is.na(Cluster) & Cluster >=1 ) %>%
84+
tidyr::pivot_wider(
85+
id_cols = c(Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster),
86+
names_from = c(Tumor_Sample_Label),
87+
values_from = c(CCF),
88+
values_fill = 0
89+
)
90+
91+
}
92+
93+
message(sprintf("Patient %s has the following cluster: %s", patient,
94+
str_c(sort(unique(vars$Cluster)), collapse = "; ") ))
95+
96+
vars
97+
}
98+
99+
100+
if(is.null(patient.id)){
101+
Vars <- lapply(maf, processMaf2Vars)
102+
names(Vars) <- names(maf)
103+
}else{
104+
Vars = processMaf2Vars(maf[[patient.id]])
105+
}
106+
107+
return(
108+
Vars
109+
)
110+
111+
}
112+
113+
114+
115+
116+
117+
118+
119+
120+
121+
122+
123+
124+
125+
126+
127+

R/plotMutTree.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ plotMutTree <- function(maf,
4848
hexpand_ratio = 0.3,
4949
...) {
5050
message("Building trees")
51-
phyloTree <- getPhyloTree(maf, # TODO unknown package
51+
phyloTree <- MesKit::getPhyloTree(maf,
5252
patient.id = patient.id,
5353
method = method, min.vaf = min.vaf,
5454
bootstrap.rep.num = bootstrap.rep.num,

R/readMaf.R

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
#' readMaf
2+
#' @description Read tab delimited MAF (can be plain text or *.gz compressed) file along with sample information file.
3+
#'
4+
#' @param mafFile A tab delimited MAF file (plain text or *.gz compressed). Required.
5+
#' @param clinicalFile A clinical data file includes Tumor_Sample_Barcode, Tumor_ID, Patient_ID. Tumor_Sample_Label is optional. Default NULL.
6+
#' @param ccfFile A CCF file of somatic mutations. Default NULL.
7+
#' @param adjusted.VAF Whether adjusted VAF is included in mafFile. Default FALSE.
8+
#' @param nonSyn.vc List of Variant classifications which are considered as non-silent. Default NULL, use Variant Classifications with "Frame_Shift_Del","Frame_Shift_Ins","Splice_Site","Translation_Start_Site","Nonsense_Mutation","Nonstop_Mutation","In_Frame_Del","In_Frame_Ins","Missense_Mutation"
9+
#' @param use.indel.ccf Whether include indels in ccfFile. Default FALSE.
10+
#' @param ccf.conf.level The confidence level of CCF to identify clonal or subclonal.
11+
#' Only works when "CCF_std" or "CCF_CI_high" is provided in ccfFile. Default 0.95.
12+
#' @param remove.empty.VAF Whether removing the mutations with VAF=0. When making the comparison of pair-wide CCF, retained mutations with VAF=0.
13+
#' @param refBuild Human reference genome version. Default 'hg19'. Optional: 'hg18' or 'hg38'.
14+
#'
15+
#' @examples
16+
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
17+
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
18+
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
19+
#' maf <- readMaf(mafFile=maf.File,clinicalFile = clin.File, refBuild="hg19")
20+
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
21+
#' @return an object of Maf or MafList.
22+
#' @import methods
23+
#' @importFrom data.table fread setkey
24+
#' @importFrom stats qnorm
25+
#' @export readMaf
26+
27+
## read.maf main function
28+
readMaf <- function(
29+
mafFile,
30+
clinicalFile,
31+
ccfFile = NULL,
32+
adjusted.VAF = FALSE,
33+
nonSyn.vc = NULL,
34+
use.indel.ccf = FALSE,
35+
ccf.conf.level = 0.95,
36+
remove.empty.VAF = TRUE,
37+
refBuild = "hg19"
38+
) {
39+
40+
refBuild <- match.arg(refBuild, choices = c('hg18', 'hg19', 'hg38'), several.ok = FALSE)
41+
42+
## get non-silent muation types
43+
if (is.null(nonSyn.vc)) {
44+
nonSyn.vc <- c(
45+
"Frame_Shift_Del",
46+
"Frame_Shift_Ins",
47+
"Splice_Site",
48+
"Translation_Start_Site",
49+
"Nonsense_Mutation",
50+
"Nonstop_Mutation",
51+
"In_Frame_Del",
52+
"In_Frame_Ins",
53+
"Missense_Mutation"
54+
)
55+
}
56+
57+
maf_data <- data.table::fread(
58+
file = mafFile,
59+
quote = "",
60+
header = TRUE,
61+
data.table = TRUE,
62+
fill = TRUE,
63+
sep = '\t',
64+
skip = "Hugo_Symbol",
65+
stringsAsFactors = FALSE
66+
)
67+
68+
clin_data <- data.table::fread(
69+
file = clinicalFile,
70+
quote = "",
71+
header = TRUE,
72+
data.table = TRUE,
73+
fill = TRUE,
74+
sep = '\t',
75+
stringsAsFactors = FALSE
76+
)
77+
78+
79+
## merge maf data and clinical data
80+
maf_col <- colnames(maf_data)
81+
clin_col <- colnames(clin_data)
82+
is_col <- intersect(maf_col, clin_col)
83+
is_col <- is_col[is_col!="Tumor_Sample_Barcode"]
84+
maf_data <- dplyr::select(maf_data, -all_of(is_col))
85+
maf_data <- dplyr::left_join(
86+
maf_data,
87+
clin_data,
88+
by = c(
89+
"Tumor_Sample_Barcode"
90+
)
91+
)
92+
93+
# check maf data
94+
maf_data <- validMaf(maf_data, remove.empty.VAF)
95+
96+
## calculate Total_allele_depth
97+
maf_data <- maf_data %>%
98+
dplyr::mutate(Total_allele_depth = .data$Ref_allele_depth + .data$Alt_allele_depth) %>%
99+
as.data.frame()
100+
101+
if(adjusted.VAF){
102+
maf_data$VAF_adj <- maf_data$VAF
103+
}
104+
105+
106+
## read ccf files
107+
if (!is.null(ccfFile)) {
108+
ccf_data <- suppressWarnings(data.table::fread(
109+
ccfFile,
110+
quote = "",
111+
header = TRUE,
112+
fill = TRUE,
113+
sep = '\t',
114+
stringsAsFactors = FALSE
115+
))
116+
## check ccf_data
117+
ccf_data <- validCCF(ccf_data, maf_data, use.indel.ccf = use.indel.ccf)
118+
## merge ccf_data to maf_data
119+
maf_data <- MesKit:::readCCF(maf_data, ccf_data, ccf.conf.level, sample.info, adjusted.VAF, use.indel.ccf = use.indel.ccf)
120+
}
121+
122+
## calculate average adjust VAF
123+
if("VAF_adj" %in% colnames(maf_data)){
124+
maf_data <- maf_data %>%
125+
dplyr::group_by(.data$Patient_ID, .data$Tumor_ID, .data$Chromosome,
126+
.data$Start_Position, .data$Reference_Allele,.data$Tumor_Seq_Allele2) %>%
127+
dplyr::mutate(Tumor_Average_VAF = round(
128+
sum(.data$VAF_adj * .data$Total_allele_depth)/
129+
sum(.data$Total_allele_depth)
130+
,3))
131+
132+
}
133+
134+
maf_data <- maf_data %>%
135+
dplyr::ungroup() %>%
136+
dplyr::select(-"Total_allele_depth") %>%
137+
as.data.frame()
138+
139+
data_list <- split(maf_data, maf_data$Patient_ID)
140+
maf_patient_list <- list()
141+
for(data in data_list){
142+
patient <- unique(data$Patient_ID)
143+
sample.info <- data %>%
144+
dplyr::select("Tumor_Sample_Barcode","Tumor_ID") %>%
145+
dplyr::distinct(.data$Tumor_Sample_Barcode, .keep_all = TRUE)
146+
if(nrow(sample.info) < 2){
147+
n <- nrow(sample.info)
148+
stop(paste0(patient," has only ",n," tumor samples.",
149+
"A minimum of two tumor samples are required for each patient."))
150+
}
151+
## set Maf
152+
maf <- MesKit:::Maf(
153+
data = data.table::setDT(data),
154+
sample.info = as.data.frame(sample.info),
155+
nonSyn.vc = nonSyn.vc,
156+
ref.build = refBuild
157+
)
158+
maf_patient_list[[patient]] <- maf
159+
}
160+
161+
if(length(data_list) > 1){
162+
## set MafList
163+
maf_list <- MesKit:::MafList(maf_patient_list)
164+
return(maf_list)
165+
}else{
166+
return(maf_patient_list[[1]])
167+
}
168+
}
169+
170+
171+

0 commit comments

Comments
 (0)