Skip to content

Commit

Permalink
Delete n (#10)
Browse files Browse the repository at this point in the history
* introduced min_mut def

* updated args

* updated docs
  • Loading branch information
jonas-fuchs committed Oct 26, 2023
1 parent 48ece89 commit aaebb31
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 1 deletion.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ options:
-t 0, --threshold 0 display frequencies above this threshold (0-1)
--delete, --no-delete
delete mutations that are present in all samples and their maximum frequency divergence is smaller than 0.5 (default: True)
-n None, --delete-n None
do not show mutations that occur n times or less (default: Do not delete)
--sort, --no-sort sort sample names alphanumerically (default: False)
--min-cov 20 display mutations covered at least x time (only if per base cov tsv files are provided)
-v, --version show program's version number and exit
Expand Down
2 changes: 1 addition & 1 deletion virheat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""plot vcf data as a heatmap mapped to a virus genome"""
_program = "virheat"
__version__ = "0.5.3"
__version__ = "0.5.4"
11 changes: 11 additions & 0 deletions virheat/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ def get_args(sysargs):
default=True,
help="delete mutations that are present in all samples and their maximum frequency divergence is smaller than 0.5"
)
parser.add_argument(
"-n",
"--delete-n",
type=int,
metavar='None',
default=None,
help="do not show mutations that occur n times or less (default: Do not delete)"
)
parser.add_argument(
"--sort",
action=argparse.BooleanOptionalAction,
Expand Down Expand Up @@ -114,8 +122,11 @@ def main(sysargs=sys.argv[1:]):
# extract vcf info
reference_name, frequency_lists, unique_mutations, file_names = data_prep.extract_vcf_data(vcf_files, threshold=args.threshold)
frequency_array = data_prep.create_freq_array(unique_mutations, frequency_lists)
# user specified delete options (removes mutations based on various rationales)
if args.delete:
frequency_array = data_prep.delete_common_mutations(frequency_array, unique_mutations)
if args.delete_n is not None:
frequency_array = data_prep.delete_n_mutations(frequency_array, unique_mutations, args.delete_n)
# annotate low coverage if per base coveage from qualimap was provided
data_prep.annotate_non_covered_regions(args.input[0], args.min_cov, frequency_array, file_names, unique_mutations)

Expand Down
20 changes: 20 additions & 0 deletions virheat/scripts/data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,26 @@ def delete_common_mutations(frequency_array, unique_mutations):

return np.delete(frequency_array, mut_to_del, axis=1)

def delete_n_mutations(frequency_array, unique_mutations, min_mut):
"""
delete mutations that are not present in more than n samples
"""
mut_to_del = []

for idx in range(0, len(frequency_array[0])):
n_mutations = 0
for frequency_list in frequency_array:
if frequency_list[idx] > 0:
n_mutations += 1
# check if min_mut was reached and if not mark as to delete
if n_mutations <= min_mut:
mut_to_del.append(idx)
# delete the mutations that are found only min_mut times in all samples
for idx in sorted(mut_to_del, reverse=True):
del unique_mutations[idx]

return np.delete(frequency_array, mut_to_del, axis=1)


def parse_gff3(file):
"""
Expand Down

0 comments on commit aaebb31

Please sign in to comment.