Skip to content

Commit

Permalink
Fix track dict (#5)
Browse files Browse the repository at this point in the history
* fixed the track dictionary (gene or CDS is used if gene is not present in gff3)

* fixed the track dictionary (gene or CDS is used if gene is not present in gff3)
  • Loading branch information
jonas-fuchs authored Jun 26, 2023
1 parent 2f92c4a commit 641264a
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 16 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ optional arguments:
length of the genome (needed if gff3 is not provided)
-g None, --gff3-path None
path to gff3 (needed if length is not provided)
-a gene, --gff3-annotations gene
annotations to display from gff3 file (standard: gene)
-t 0, --threshold 0 display frequencies above this threshold
--delete, --no-delete
delete mutations with frequencies present in all
Expand Down
2 changes: 1 addition & 1 deletion virheat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""plot vcf data as a heatmap mapped to a virus genome"""
_program = "virheat"
__version__ = "0.3"
__version__ = "0.4"
15 changes: 12 additions & 3 deletions virheat/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ def get_args(sysargs):
default=None,
help="path to gff3 (needed if length is not provided)"
)
parser.add_argument(
"-a",
"--gff3-annotations",
type=str,
metavar="gene",
default="gene",
help="annotations to display from gff3 file (standard: gene). Multiple possible (comma seperated)"
)
parser.add_argument(
"-t",
"--threshold",
Expand Down Expand Up @@ -117,7 +125,8 @@ def main(sysargs=sys.argv[1:]):
if gff3_ref_name not in reference_name and reference_name not in gff3_ref_name:
print("\033[31m\033[WARNING:\033[0m gff3 reference does not match the vcf reference!")
genome_end = data_prep.get_genome_end(gff3_info)
genes_with_mutations, n_tracks = data_prep.create_track_dict(unique_mutations, gff3_info)
annotation_list = args.gff3_annotations.split(",")
genes_with_mutations, n_tracks = data_prep.create_track_dict(unique_mutations, gff3_info, annotation_list)
# define space for the genome vis tracks
min_y_location = genome_y_location + genome_y_location/2 * (n_tracks+1)
elif args.genome_length is not None:
Expand All @@ -127,7 +136,7 @@ def main(sysargs=sys.argv[1:]):
sys.exit("\033[31m\033[1mERROR:\033[0m Provide either a gff3 file (-g) or the length (-l) of the genome which you used for mapping")

# define size of the plot
y_size = (n_mutations)*0.4
y_size = n_mutations*0.4
x_size = y_size*(n_samples+min_y_location)/n_mutations
x_size = x_size-x_size*0.15 # compensate of heatmap annotation

Expand All @@ -145,7 +154,7 @@ def main(sysargs=sys.argv[1:]):
# plot gene track
plotting.create_gene_vis(ax, genes_with_mutations, n_mutations, y_size, n_tracks, genome_end, min_y_location, genome_y_location, colors_genes)
plotting.create_axis(ax, n_mutations, min_y_location, n_samples, file_names, genome_end, genome_y_location, unique_mutations, reference_name)
plotting.create_colorbar(args.threshold, cmap_cells, min_y_location, n_samples, n_mutations)
plotting.create_colorbar(args.threshold, cmap_cells, min_y_location, n_samples)
plotting.create_mutation_legend(mutation_set, min_y_location, n_samples)

# create output folder
Expand Down
30 changes: 19 additions & 11 deletions virheat/scripts/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import re
import pathlib
import sys

# LIBS
import numpy as np
Expand Down Expand Up @@ -91,7 +92,7 @@ def read_vcf(vcf_file):
key, val = info.split("=")
vcf_dict[key].append(convert_string(val))
visited_keys.append(key)
# append none for ech none vistited key
# append none for ech none visited key
for key in [k for k in vcf_dict.keys() if k not in visited_keys]:
vcf_dict[key].append(None)

Expand Down Expand Up @@ -199,7 +200,6 @@ def parse_gff3(file):
# add start, stop and strand
gff3_dict[gff_values[2]][attribute_id]["start"] = int(gff_values[3])
gff3_dict[gff_values[2]][attribute_id]["stop"] = int(gff_values[4])
gff3_dict[gff_values[2]][attribute_id]["strand"] = gff_values[6]

gff3_file.close()

Expand All @@ -221,7 +221,7 @@ def get_genome_end(gff3_dict):
return genome_end


def create_track_dict(unique_mutations, gff3_info):
def create_track_dict(unique_mutations, gff3_info, annotation_type):
"""
create a dictionary of the genes that have mutations and assess in which
track these genes should go in case they overlap
Expand All @@ -233,14 +233,22 @@ def create_track_dict(unique_mutations, gff3_info):
for mutation in unique_mutations:
# get the mutation from string
mutation = int(mutation.split("_")[0])
for gene_name in gff3_info["gene"]:
if mutation in range(gff3_info["gene"][gene_name]["start"], gff3_info["gene"][gene_name]["stop"]):
genes_with_mutations.add(
(gff3_info["gene"][gene_name]["gene"],
gff3_info["gene"][gene_name]["start"],
gff3_info["gene"][gene_name]["stop"],
gff3_info["gene"][gene_name]["strand"])
)
for type in annotation_type:
if type not in gff3_info.keys():
continue
for annotation in gff3_info[type]:
if mutation in range(gff3_info[type][annotation]["start"], gff3_info[type][annotation]["stop"]):
if "Name" in gff3_info[type][annotation].keys():
attribute_name = gff3_info[type][annotation]["Name"]
else:
attribute_name = annotation
genes_with_mutations.add(
(attribute_name,
gff3_info[type][annotation]["start"],
gff3_info[type][annotation]["stop"])
)
if not genes_with_mutations:
sys.exit("none of the given annotation types were found in gff3.")

# create a dict and sort
gene_dict = {element[0]: [element[1:4]] for element in genes_with_mutations}
Expand Down
2 changes: 1 addition & 1 deletion virheat/scripts/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def create_genome_vis(ax, genome_y_location, n_mutations, unique_mutations, geno
return mutation_set


def create_colorbar(threshold, cmap, min_y_location, n_samples, n_mutations):
def create_colorbar(threshold, cmap, min_y_location, n_samples):
"""
creates a custom colorbar and annotates the threshold
"""
Expand Down

0 comments on commit 641264a

Please sign in to comment.