1
1
import std/ [os, strformat, strutils, parseopt]
2
2
import ./ abif
3
3
4
- # # This module provides a command-line tool for converting ABIF files to FASTQ format
4
+ # # This module provides a command-line tool for converting ABIF files to FASTQ or FASTA format
5
5
# # with optional quality trimming.
6
6
# #
7
7
# # The abi2fq tool extracts sequence and quality data from ABIF files,
8
8
# # applies quality trimming to remove low-quality regions, and outputs
9
- # # in the standard FASTQ format.
9
+ # # in the standard FASTQ format or FASTA format (if --fasta is specified) .
10
10
# #
11
11
# # Command-line usage:
12
12
# #
@@ -20,6 +20,7 @@ import ./abif
20
20
# # -n, --no-trim Disable quality trimming
21
21
# # -v, --verbose Print additional information
22
22
# # --version Show version information
23
+ # # --fasta Output in FASTA format instead of FASTQ
23
24
# #
24
25
# # Examples:
25
26
# #
@@ -32,6 +33,9 @@ import ./abif
32
33
# #
33
34
# # # Convert with custom quality parameters
34
35
# # abi2fq -w 20 -q 30 input.ab1 output.fastq
36
+ # #
37
+ # # # Convert to FASTA format
38
+ # # abi2fq --fasta input.ab1 output.fasta
35
39
36
40
type
37
41
Config * = object
44
48
noTrim* : bool # # Whether to disable quality trimming
45
49
verbose* : bool # # Whether to show verbose output
46
50
showVersion* : bool # # Whether to show version information
51
+ fasta* : bool # # Whether to output in FASTA format instead of FASTQ
47
52
48
53
proc printHelp * () =
49
54
# # Displays the help message for the abi2fq tool.
@@ -61,6 +66,7 @@ Options:
61
66
-n, --no-trim Disable quality trimming
62
67
-v, --verbose Print additional information
63
68
--version Show version information
69
+ --fasta Output in FASTA format instead of FASTQ
64
70
65
71
If output file is not specified, FASTQ will be written to STDOUT.
66
72
"""
@@ -83,7 +89,8 @@ proc parseCommandLine*(): Config =
83
89
qualityThreshold: 20 ,
84
90
noTrim: false ,
85
91
verbose: false ,
86
- showVersion: false
92
+ showVersion: false ,
93
+ fasta: false
87
94
)
88
95
89
96
var fileArgs: seq [string ] = @ []
@@ -116,6 +123,8 @@ proc parseCommandLine*(): Config =
116
123
result .verbose = true
117
124
of " version" :
118
125
result .showVersion = true
126
+ of " fasta" :
127
+ result .fasta = true
119
128
else :
120
129
echo " Unknown option: " , key
121
130
printHelp ()
@@ -183,43 +192,54 @@ proc trimSequence*(sequence: string, qualities: seq[int],
183
192
result .seq = sequence[startPos ..< endPos]
184
193
result .qual = qualities[startPos ..< endPos]
185
194
186
- proc writeFastq * (sequence: string , qualities: seq [int ], name: string , outFile: string = " " ) =
187
- # # Writes sequence and quality data to a FASTQ file.
195
+ proc writeFastq * (sequence: string , qualities: seq [int ], name: string , outFile: string = " " , fasta: bool = false ) =
196
+ # # Writes sequence and quality data to a FASTQ or FASTA file.
188
197
# #
189
- # # If outFile is empty, the FASTQ data is written to stdout.
198
+ # # If outFile is empty, the data is written to stdout.
199
+ # # If fasta is true, the output will be in FASTA format instead of FASTQ.
190
200
# #
191
201
# # Parameters:
192
202
# # sequence: The DNA sequence to write
193
203
# # qualities: Quality scores for each base in the sequence
194
- # # name: The sample name for the FASTQ header
204
+ # # name: The sample name for the header
195
205
# # outFile: Path to the output file (empty string for stdout)
196
- # Convert quality values to Phred+33 format
197
- var qualityString = " "
198
- for qv in qualities:
199
- qualityString.add (chr (qv + 33 ))
206
+ # # fasta: Whether to output in FASTA format instead of FASTQ
200
207
201
- let fastqContent = & " @{ name} \n { sequence} \n +\n { qualityString} "
208
+ var content: string
209
+ if fasta:
210
+ # Create FASTA format
211
+ content = & " >{ name} \n { sequence} "
212
+ else :
213
+ # Create FASTQ format
214
+ var qualityString = " "
215
+ for qv in qualities:
216
+ qualityString.add (chr (qv + 33 ))
217
+ content = & " @{ name} \n { sequence} \n +\n { qualityString} "
202
218
203
219
if outFile == " " :
204
220
# Write to stdout
205
- stdout.write (fastqContent & " \n " )
221
+ stdout.write (content & " \n " )
206
222
else :
207
223
# Write to file
208
- writeFile (outFile, fastqContent & " \n " )
224
+ writeFile (outFile, content & " \n " )
209
225
210
226
proc main * () =
211
227
# # Main entry point for the abi2fq program.
212
228
# #
213
229
# # Handles command-line parsing, reads the input ABIF file,
214
230
# # performs quality trimming if enabled, and outputs the result
215
- # # in FASTQ format.
231
+ # # in FASTQ or FASTA format (depending on the --fasta option) .
216
232
let config = parseCommandLine ()
217
233
218
234
if config.verbose:
219
235
echo & " Processing file: { config.inFile} "
220
236
echo & " Window size: { config.windowSize} "
221
237
echo & " Quality threshold: { config.qualityThreshold} "
222
238
echo & " Trimming: { not config.noTrim} "
239
+ if config.fasta:
240
+ echo " Output format: FASTA"
241
+ else :
242
+ echo " Output format: FASTQ"
223
243
224
244
try :
225
245
let trace = newABIFTrace (config.inFile)
@@ -236,8 +256,54 @@ proc main*() =
236
256
quit (1 )
237
257
238
258
if config.noTrim:
239
- # No trimming, use original sequence
240
- writeFastq (sequence, qualities, sampleName, config.outFile)
259
+ # No trimming, but identify sections that would be trimmed and make them lowercase
260
+ let trimmed = trimSequence (sequence, qualities, config.windowSize, config.qualityThreshold)
261
+
262
+ if config.verbose:
263
+ if trimmed.seq .len == 0 :
264
+ echo " Warning: Entire sequence was below quality threshold"
265
+ elif trimmed.seq .len < sequence.len:
266
+ echo & " Sections that would be trimmed: { sequence.len - trimmed.seq .len} bases "
267
+
268
+ # Get indices for low quality regions
269
+ var modifiedSeq = " "
270
+ if trimmed.seq .len == 0 : # All sequence is below threshold
271
+ modifiedSeq = sequence.toLowerAscii ()
272
+ else :
273
+ # Find start position (same logic as in trimSequence)
274
+ var startPos = 0
275
+ for i in 0 .. (sequence.len - config.windowSize):
276
+ var windowSum = 0
277
+ for j in 0 ..< config.windowSize:
278
+ windowSum += qualities[i + j]
279
+
280
+ let windowAvg = windowSum / config.windowSize
281
+ if windowAvg >= config.qualityThreshold.float :
282
+ startPos = i
283
+ break
284
+
285
+ # Find end position (same logic as in trimSequence)
286
+ var endPos = sequence.len
287
+ for i in countdown (sequence.len - config.windowSize, 0 ):
288
+ var windowSum = 0
289
+ for j in 0 ..< config.windowSize:
290
+ windowSum += qualities[i + j]
291
+
292
+ let windowAvg = windowSum / config.windowSize
293
+ if windowAvg >= config.qualityThreshold.float :
294
+ endPos = i + config.windowSize
295
+ break
296
+
297
+ # Make trimmed regions lowercase
298
+ if startPos > 0 :
299
+ modifiedSeq.add (sequence[0 ..< startPos].toLowerAscii ())
300
+
301
+ modifiedSeq.add (sequence[startPos ..< endPos])
302
+
303
+ if endPos < sequence.len:
304
+ modifiedSeq.add (sequence[endPos ..< sequence.len].toLowerAscii ())
305
+
306
+ writeFastq (modifiedSeq, qualities, sampleName, config.outFile, config.fasta)
241
307
else :
242
308
# Trim low quality ends
243
309
let trimmed = trimSequence (sequence, qualities, config.windowSize, config.qualityThreshold)
@@ -247,7 +313,7 @@ proc main*() =
247
313
if trimmed.seq .len == 0 :
248
314
echo " Warning: Entire sequence was below quality threshold"
249
315
250
- writeFastq (trimmed.seq , trimmed.qual, sampleName, config.outFile)
316
+ writeFastq (trimmed.seq , trimmed.qual, sampleName, config.outFile, config.fasta )
251
317
252
318
trace.close ()
253
319
except :
0 commit comments