-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhomolgs.R
88 lines (76 loc) · 4.15 KB
/
homolgs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/R
# this script runs the snpeff on gnes and their homologs and pulls out the corresponding information for the strain(s) with an insertion
# unto the CDS of that gene (typically has lethal RNAi pehnotype)
# USE: homologs.R
library(cegwas)
library(dplyr)
library(tidyr)
setwd("/Users/kristen/Documents/transposon_figure_data/data")
homologs<-read.table("homologs.txt")
colnames(homologs)<-c("type_of","WB","transcript","gene_name","homolog","homolog_transcript")
hom_list<-list()
for (i in homologs$WB){
hom_list<-c(hom_list,i)
}
for (i in homologs$homolog){
hom_list<-c(hom_list,i)
}
str(hom_list) # list of homologs to run through SnpEff
hom_results<-snpeff(hom_list) # run SnpEff on the homologs
hom_results<-distinct(hom_results)
ins<-read.table("essentiality_nonredundant_cds.txt", sep = "\t")
colnames(ins)<-c( "chromosome","position","method","TE","CDS","transcript","gene_name","biotype","lethal_pheno","GO","strain_no","strains")
ins_info<-select(ins,strains,transcript)
temp1<-select(homologs,WB,transcript)
temp2<-select(homologs,homolog,homolog_transcript)
colnames(temp2)<-c("WB","transcript")
WB_trans_info<-rbind(temp1,temp2)
full_ins_info<-full_join(ins_info,WB_trans_info,by="transcript")
full_ins_info<-filter(full_ins_info,!is.na(strains)) # strains with insertions in CDS
unique(full_ins_info$transcript)
colnames(full_ins_info)<-c("strains","transcript","gene_id")
hom_results<-left_join(hom_results,full_ins_info, by="gene_id")
save(hom_results, file="hom_results.Rda")
unique(hom_results$strain)
#"strains" column is the strains with an insertion into the CDS of the corresponding gene
class(hom_results$strains)
strains_to_test<-unlist(strsplit(as.character(hom_results$strains), split=", "))
unique(hom_results$transcript)
# homolog pairs
#WBGene00002041 # WBGene00004969 hum-8
#WBGene00008296 # WBGene00008297 cdr-2
#WBGene00006925 # WBGene00006926 vit-1
#WBGene00014193 # WBGene00045515 nhr-247
#WBGene00009607 # WBGene00009609 oac-26
#WBGene00004178 # WBGene00004179 prg-1
hom_results_strains<-filter(hom_results, strain %in% strains_to_test)
hom_results_strains<-select(hom_results_strains,CHROM,POS,strain,REF,ALT,a1,a2,GT,FT,FILTER,query,allele,effect,impact,gene_name,gene_id,feature_type,transcript_biotype,exon_intron_rank,nt_change,aa_change,protein_position,distance_to_feature,feature_id,strains,transcript)
compare1<-filter(hom_results_strains,gene_id =="WBGene00002041" | gene_id =="WBGene00004969")
strains_to_test<-unlist(strsplit(as.character(compare1$strains), split=", "))
length(unique(compare1$strain))
compare1<-filter(compare1, strain %in% strains_to_test)
compare1<-filter(compare1, GT=="ALT")
compare2<-filter(hom_results_strains,gene_id =="WBGene00008296" | gene_id =="WBGene00008297")
strains_to_test<-unlist(strsplit(as.character(compare2$strains), split=", "))
compare2<-filter(compare2, strain %in% strains_to_test)
compare2<-filter(compare2, GT=="ALT")
compare3<-filter(hom_results_strains,gene_id =="WBGene00006925" | gene_id =="WBGene00006926")
strains_to_test<-unlist(strsplit(as.character(compare3$strains), split=", "))
compare3<-filter(compare3, strain %in% strains_to_test)
compare3<-filter(compare3, GT=="ALT")
compare4<-filter(hom_results_strains,gene_id =="WBGene00014193" | gene_id =="WBGene00045515")
strains_to_test<-unlist(strsplit(as.character(compare4$strains), split=", "))
compare4<-filter(compare4, strain %in% strains_to_test)
compare4<-filter(compare4, GT=="ALT")
compare5<-filter(hom_results_strains,gene_id =="WBGene00009607" | gene_id =="WBGene00009609")
strains_to_test<-unlist(strsplit(as.character(compare5$strains), split=", "))
compare5<-filter(compare5, strain %in% strains_to_test)
compare5<-filter(compare5, GT=="ALT")
#compare6<-filter(hom_results_strains,gene_id =="WBGene00004178" | gene_id =="WBGene00004179")
#strains_to_test<-unlist(strsplit(as.character(compare6$strains), split=", "))
#compare6<-filter(compare6, strain %in% strains_to_test)
#compare6<-filter(compare6, GT=="ALT")
#setwd("/Users/kristen/Dropbox/AndersenLab/LabFolders/Kristen/TEPaper/recent")
#load("homolog_comparisons.Rda")
save(compare1,compare2,compare3,compare4,compare5,compare6, file="homolog_comparisons.Rda")
load("homolog_comparisons.Rda")