Skip to content

Commit

Permalink
extract_*_genbank subs in lib/phyTools.pm now parse LOCUS when access…
Browse files Browse the repository at this point in the history
…ion is not available in GenBank files, such as those made with PROKKA
  • Loading branch information
eead-csic-compbio committed Aug 7, 2017
1 parent 4af96a9 commit 68c28ce
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,4 @@
30062017: temp blastdb file closed properly in annotate_clusters.pl
25072017: corrected intergenic clusters produced with get_homologues.pl -g when using prokka-annotated GenBank files (thanks Uriel Alonso!)
25072017: updated get_homologues.pl -g and checked this section in the manual
07082017: extract_*_genbank subs in lib/phyTools.pm now parse LOCUS when accession is not available in GenBank files, such as those made with PROKKA
14 changes: 11 additions & 3 deletions lib/phyTools.pm
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ sub add_labels2newick_tree
return join(";\n",split(/;/,$fully_labelled_tree));
}

# Updated Jun2016
# Updated Aug2017
sub extract_intergenic_from_genbank
{
# takes a genbank input file and creates a FNA file containing all intergenic sequences found
Expand Down Expand Up @@ -668,6 +668,8 @@ sub extract_intergenic_from_genbank
{
$seq->alphabet('dna');
my ($gbaccession,$sequence,$gen,@genes) = ( $seq->accession() );
if($gbaccession eq 'unknown'){ $gbaccession = $seq->display_id() } # prokka-compatible

$sequence = $seq->primary_seq()->seq() || 'empty, need a full genbank entry!';
$taxon = '';
for my $f ($seq->get_SeqFeatures)
Expand Down Expand Up @@ -760,6 +762,7 @@ sub extract_intergenic_from_genbank
return $n_of_intergenic;
}

# Updated Aug 2017
sub extract_features_from_genbank
{
# takes a genbank input file and creates a single FASTA nucleotide file containing all features
Expand Down Expand Up @@ -814,6 +817,8 @@ sub extract_features_from_genbank
{
$seq->alphabet('dna');
$gbaccession = $seq->accession();
if($gbaccession eq 'unknown'){ $gbaccession = $seq->display_id() } # prokka-compatible

$taxon = $coords = $genelength = $source = '';
for my $f ($seq->get_SeqFeatures)
{
Expand Down Expand Up @@ -932,7 +937,7 @@ sub extract_features_from_genbank
return \%already_seen;
}

# Updated Oct2016
# Updated Aug2017
sub extract_CDSs_from_genbank
{
# takes a genbank input file and creates two FASTA files containing all CDSs in
Expand Down Expand Up @@ -969,6 +974,8 @@ sub extract_CDSs_from_genbank
$seq->alphabet('dna');

$gbaccession = $seq->accession();
if($gbaccession eq 'unknown'){ $gbaccession = $seq->display_id() } # prokka-compatible

$source = '';
foreach my $f ($seq->get_SeqFeatures)
{
Expand Down Expand Up @@ -1123,7 +1130,7 @@ sub extract_CDSs_from_genbank
return $n_of_CDS;
}

# Updated Oct2016
# Updated Aug2017
# To to be used when extract_CDSs fails, in cases such as FN869568.gbk
sub extract_genes_from_genbank
{
Expand Down Expand Up @@ -1158,6 +1165,7 @@ sub extract_genes_from_genbank
{
$seq->alphabet('dna');
$gbaccession = $seq->accession();
if($gbaccession eq 'unknown'){ $gbaccession = $seq->display_id() } # prokka-compatible
$source = '';
foreach my $f ($seq->get_SeqFeatures)
{
Expand Down

0 comments on commit 68c28ce

Please sign in to comment.