-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathReferenceSequence.java
79 lines (63 loc) · 2.5 KB
/
ReferenceSequence.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package nhs.genetics.cardiff.framework;
import htsjdk.samtools.reference.FastaSequenceIndex;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import nhs.genetics.cardiff.framework.GenomicLocation;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Wrapper around htsjdk fasta functions. Extracts target from an indexed file.
*
* @author Matt Lyon
* @version 1.0
* @since 2015-04-21
*/
public class ReferenceSequence {
private static final Logger log = Logger.getLogger(ReferenceSequence.class.getName());
private String referenceSequence;
private GenomicLocation location;
private File fastaFilePath, indexFilePath;
private int padding = 0;
public ReferenceSequence(GenomicLocation location, File fastaFilePath, File indexFilePath){
this.location = location;
this.fastaFilePath = fastaFilePath;
this.indexFilePath = indexFilePath;
}
public ReferenceSequence(GenomicLocation location, File fastaFilePath, File indexFilePath, int padding){
this.location = location;
this.fastaFilePath = fastaFilePath;
this.indexFilePath = indexFilePath;
this.padding = padding;
}
public void populateReferenceSequence(){ //1-based
//read fasta index
FastaSequenceIndex refGenomeIndex = new FastaSequenceIndex(indexFilePath);
//get fasta sequence
try(IndexedFastaSequenceFile refGenomeFasta = new IndexedFastaSequenceFile(fastaFilePath, refGenomeIndex)) {
//get sequence
byte[] bytes = refGenomeFasta.getSubsequenceAt(location.getContig(), location.getStartPosition() - padding, location.getEndPosition() + padding).getBases();
referenceSequence = new String(bytes, "UTF-8");
refGenomeFasta.close();
} catch (UnsupportedEncodingException e){
log.log(Level.SEVERE, "Problem converting nucleotide sequence: " + e.toString());
} catch(IOException e){
log.log(Level.SEVERE, "Problem reading reference genome: " + e.toString());
}
}
public boolean isRefAllNSites(){
for (char base : referenceSequence.toCharArray()){
if (base != 'N'){
return false;
}
}
return true;
}
public String getReferenceSequence() {
return referenceSequence;
}
public int getLength(){
return referenceSequence.length();
}
}