-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbarcodeCounter.jl
41 lines (35 loc) · 1.38 KB
/
barcodeCounter.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#File Name: barcodeCounter.jl
#Descr: Julia file that finds the barcodes of fastq file based on a given spacer sequence
#Dependecies:
# Julia 1.4.2
# FASTX
# DelimitedFiles
#
# Arguments: barcodeDictionary.tsv *.fastq *.info
# 1) barcode dictionary in a tsv format where the first row are barcodes and the second row are names
# 2) a fastq file
# 3) a file that contains the spacer sequence (and only the spacer sequence)
include("barcodeCounterFuncts.jl");
using DelimitedFiles
using FASTX
#read barcode (dictionary) file
grid = readBarcodeFile(ARGS[1]);
#spin off barcodes and sequence names into their own file
barcodeArray = grid[:,1];
names = grid[:,2];
#create empty array to fill counts with
counts = fill(0,length(names));
#create dictionaries to align names, barcodes, and counts
countDict = Dict(barcodeArray .=> counts);
nameDict = Dict(barcodeArray .=> names);
#fill dictionaries with data from fastq file
fillDict(ARGS[2],countDict,nameDict);
#merge both dictionaries to prepare to output data to file
contentArray = syncDictsToArray(countDict,nameDict);
#sort combined matrix by descending order
sortedContentArr = sortslices(contentArray,dims=1,by=x->x[3],rev=true);
#create file name
fileName = createOutputFileName(ARGS[2]);
#write to file
writeResultsToFile(sortedContentArr,fileName);