Skip to content

Commit

Permalink
Small update
Browse files Browse the repository at this point in the history
  • Loading branch information
V-Z committed Aug 10, 2015
1 parent 7202980 commit b824c5f
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
6 changes: 6 additions & 0 deletions sondovac_functions
Original file line number Diff line number Diff line change
Expand Up @@ -757,3 +757,9 @@ function readinputfile {
fi
CHECKFILEREADOUT=$CHECKFILEREAD
}

# Function to convert FASTA to TAB
# $1 is input file in FASTA, $2 output file in TAB
function fasta2tab {
awk '/>/{if (x)print x;print;x="";next}{x=(!x)?$0:x$0;}END{print x;}' $1 | sed 's/>[[:blank:]]*/>/g' | sed 's/[[:blank:]]+/ /g' | tr "\n" "\t" | sed 's/\t>/\n>/g' | sed 's/^>//g' | sed 's/[[:blank:]]*$//g' > $2
}
6 changes: 2 additions & 4 deletions sondovac_part_a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -852,8 +852,7 @@ echo
# In order to use the join command the original transcriptome file has to be converted to TXT, the transcript numbers have to be adjusted and the file sorted
echo
echo "Converting original data into TXT for subsequent joining"
### REWRITE !!!
perl -e ' $count=0; $len=0; while(<>) { s/\r?\n//; s/\t/ /g; if (s/^>//) { if ($. != 1) { print "\n" } s/ |$/\t/; $count++; $_ .= "\t"; } else { s/ //g; $len += length($_) } print $_; } print "\n"; warn "\nConverted $count JOINEDFA records in $. lines to tabular format\nTotal sequence length: $len\n\n"; ' $INPUTFILE > $INPUTTAB || { echo && echo "${BOLD}Error!${NORM} Conversion of $INPUTFILE failed. Check if it is valid FASTA file. Aborting." && echo && exit 1; }
fasta2tab $INPUTFILE $INPUTTAB || { echo && echo "${BOLD}Error!${NORM} Conversion of $INPUTFILE failed. Check if it is valid FASTA file. Aborting." && echo && exit 1; }
echo
echo "Sorting unique transcripts"
{ awk '{$1=sprintf("%05d", $1); print $0}' $INPUTTAB | sort > $SORTEDINPUT; } || { echo && echo "${BOLD}Error!${NORM} Sorting of unique transcripts failed. Aborting. Check if $INPUTFILE is correct FASTA file and check file $INPUTTAB." && echo && exit 1; }
Expand Down Expand Up @@ -965,8 +964,7 @@ echo "Modified file saved as $BLATOUTFIN2 for possible later usage"
# Convert FASTA to TAB
echo
echo "Converting FASTA to TAB"
# REWRITE!!!
perl -e ' $count=0; $len=0; while(<>) { s/\r?\n//; s/\t/ /g; if (s/^>//) { if ($. != 1) { print "\n" } s/ |$/\t/; $count++; $_ .= "\t"; } else { s/ //g; $len += length($_) } print $_; } print "\n"; warn "\nConverted $count FASTA records in $. lines to tabular format\nTotal sequence length: $len\n\n"; ' $BLATOUTFIN2 > $TAB || { echo && echo "${BOLD}Error!${NORM} Conversion of FASTA to TAB failed. Aborting. Check if file $BLATOUTFIN2 is correct." && echo && exit 1; }
fasta2tab $BLATOUTFIN2 $TAB || { echo && echo "${BOLD}Error!${NORM} Conversion of FASTA to TAB failed. Aborting. Check if file $BLATOUTFIN2 is correct." && echo && exit 1; }
echo
{ awk '{print $1"\t"length($2)"\t"$2}' $TAB | awk '{sum+=$2}END{print sum}'; } || { echo && echo "${BOLD}Error!${NORM} Conversion of FASTA to TABConversion of FASTA to TAB failed. Aborting. Check if file $TAB is correct." && echo && exit 1; }

Expand Down
9 changes: 3 additions & 6 deletions sondovac_part_b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,7 @@ echo

# Convert FASTA to TSV
echo "Converting FASTA to TAB"
# REWRITE!!!
perl -e ' $count=0; $len=0; while(<>) { s/\r?\n//; s/\t/ /g; if (s/^>//) { if ($. != 1) { print "\n" } s/ |$/\t/; $count++; $_ .= "\t"; } else { s/ //g; $len += length($_) } print $_; } print "\n"; warn "\nConverted $count FASTA records in $. lines to tabular format\nTotal sequence length: $len\n\n"; ' $SEQUENCES > $SEQUENCESTAB || { echo && echo "${BOLD}Error!${NORM} Conversion of FASTA into TAB failed. Aborting." && echo && exit 1; }
fasta2tab $SEQUENCES $SEQUENCESTAB || { echo && echo "${BOLD}Error!${NORM} Conversion of FASTA into TAB failed. Aborting." && echo && exit 1; }
echo

# Separate the assembled sequences
Expand Down Expand Up @@ -473,8 +472,7 @@ cd-hit-est -i $PROBEPRELIM -o $PROBEPRELIMCDHIT -c $CDHITSIM
echo
# One of the three outfiles is a FASTA file, it has to be converted to TAB
echo "Converting FASTA to TAB"
# REWRITE!!!
perl -e ' $count=0; $len=0; while(<>) { s/\r?\n//; s/\t/ /g; if (s/^>//) { if ($. != 1) { print "\n" } s/ |$/\t/; $count++; $_ .= "\t"; } else { s/ //g; $len += length($_) } print $_; } print "\n"; warn "\nConverted $count FASTA records in $. lines to tabular format\nTotal sequence length: $len\n\n"; ' $PROBEPRELIMCDHIT > $PROBEPRELIMCDHIT.txt || { echo && echo "${BOLD}Error!${NORM} Conversion of FASTA into TAB failed. Aborting." && echo && exit 1; }
fasta2tab $PROBEPRELIMCDHIT $PROBEPRELIMCDHIT.txt || { echo && echo "${BOLD}Error!${NORM} Conversion of FASTA into TAB failed. Aborting." && echo && exit 1; }
echo
# Count all assemblies, comprised of putative exons ≥120 bp
echo "Counting all assemblies, comprised of putative exons ≥120 bp:"
Expand Down Expand Up @@ -507,8 +505,7 @@ echo
# Calculating of the total number of base pairs
echo "Calculating of the total number of base pairs"
echo "Converting FASTA to TAB"
# REWRITE!!!
perl -e ' $count=0; $len=0; while(<>) { s/\r?\n//; s/\t/ /g; if (s/^>//) { if ($. != 1) { print "\n" } s/ |$/\t/; $count++; $_ .= "\t"; } else { s/ //g; $len += length($_) } print $_; } print "\n"; warn "\nConverted $count FASTA records in $. lines to tabular format\nTotal sequence length: $len\n\n"; ' $PROBESEQUENCES > $PROBESEQUENCESNUM || { echo && echo "${BOLD}Error!${NORM} Conversion of FASTA into TAB failed. Aborting." && echo && exit 1; }
fasta2tab $PROBESEQUENCES $PROBESEQUENCESNUM || { echo && echo "${BOLD}Error!${NORM} Conversion of FASTA into TAB failed. Aborting." && echo && exit 1; }
echo
echo "Total number of base pairs:"
awk '{print $1"\t"length($2)}' $PROBESEQUENCESNUM | awk '{s+=$2;c++}END{print s}'
Expand Down

0 comments on commit b824c5f

Please sign in to comment.