Skip to content

Commit

Permalink
Merge branch 'release/0.9.9'
Browse files Browse the repository at this point in the history
  • Loading branch information
dputhier committed Nov 29, 2018
2 parents 02d98f3 + 95d6653 commit b68ede3
Show file tree
Hide file tree
Showing 137 changed files with 6,998 additions and 3,292 deletions.
14 changes: 7 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ pylintshort:
@find . -name "*.py" -exec pylint $(PYLINT_ARGS) {} \; 2>/dev/null |perl -ne "print if(/(Your code has been rated)|(\*\*\*\* Module)/)"

nose:
@cd /tmp; mkdir -p gtftk_test; cd gtftk_test; a=`python -c "import os,pygtftk; print(os.path.dirname(pygtftk.__file__))"`; cd $$a ; for i in `find . -name "*.py" | perl -ne 'print unless(/(setup)|(plugin)|(libgtftk.py)|(__)/)'`; do echo "================="; echo $$i; nosetests --with-doctest $$i; done
@cd /tmp; mkdir -p gtftk_test; cd gtftk_test; a=`python -c "import os,pygtftk; print(os.path.dirname(pygtftk.__file__))"`; cd $$a ; for i in `find . -name "*.py" | perl -ne 'print unless(/(setup)|(plugin)|(bwig)|(libgtftk.py)|(__)/)'`; do echo "================="; echo $$i; nosetests --with-doctest $$i; done

nose_travis:
@ source activate pygtftk_py3k; mkdir -p ~/tmp; cd ~/tmp ; mkdir -p gtftk_test; cd gtftk_test; a=`python -c "import os,pygtftk; print(os.path.dirname(pygtftk.__file__))"`; echo $$a; cd $$a ; for i in `find . -name "*.py" | perl -ne 'print unless(/(setup)|(plugin)|(libgtftk.py)|(__)/)'`; do echo "================="; echo $$i; nosetests --with-doctest $$i; done
@ source activate pygtftk_py3k; mkdir -p ~/tmp; cd ~/tmp ; mkdir -p gtftk_test; cd gtftk_test; a=`python -c "import os,pygtftk; print(os.path.dirname(pygtftk.__file__))"`; echo $$a; cd $$a ; for i in `find . -name "*.py" | perl -ne 'print unless(/(setup)|(plugin)||(bwig)|(libgtftk.py)|(__)/)'`; do echo "================="; echo $$i; nosetests --with-doctest $$i; done


install:
Expand Down Expand Up @@ -94,10 +94,10 @@ test_cmd:
make bats_cmd CMD=$(CMD)

%.bats:
@gtftk -l > prgm_list.txt; gtftk -p > test_list.txt; for i in $$(cat prgm_list.txt); do cat test_list.txt | grep -E "@test \"$$i" -A 3 | grep -v "^\-\-$$" > $$i.bats; done
@gtftk -l |sort -r > prgm_list.txt; gtftk -p > test_list.txt; for i in $$(cat prgm_list.txt); do cat test_list.txt | grep -E "@test \"$$i" -A 3 | grep -v "^\-\-$$" > $$i.bats; done

%.bats_travis:
@gtftk -l | grep -v select_by_go | grep -v retrieve > prgm_list.txt; gtftk -p > test_list.txt; for i in $$(cat prgm_list.txt); do cat test_list.txt | grep -E "@test \"$$i" -A 3 | grep -v "^\-\-$$" > $$i.bats; done
@gtftk -l |sort -r | grep -v select_by_go | grep -v retrieve > prgm_list.txt; gtftk -p > test_list.txt; for i in $$(cat prgm_list.txt); do cat test_list.txt | grep -E "@test \"$$i" -A 3 | grep -v "^\-\-$$" > $$i.bats; done

%.completed : %.bats
@bats -t $<
Expand All @@ -107,21 +107,21 @@ test_cmd:
@bats -t $<
@echo "completed" > $@

OUTPUT = $(eval OUTPUT := $$(shell gtftk -l 2>/dev/null))$(OUTPUT)
OUTPUT = $(eval OUTPUT := $$(shell gtftk -l |sort -r 2>/dev/null))$(OUTPUT)
OUTPUT2 = $(addsuffix .completed, $(OUTPUT))


test_para: $(OUTPUT2)

OUTPUT3 = $(eval OUTPUT3 := $$(shell gtftk -l | grep -v select_by_go | grep -v retrieve 2>/dev/null))$(OUTPUT3)
OUTPUT3 = $(eval OUTPUT3 := $$(shell gtftk -l |sort -r | grep -v select_by_go | grep -v retrieve 2>/dev/null))$(OUTPUT3)
OUTPUT4 = $(addsuffix .completed, $(OUTPUT3))

test_para_travis: $(OUTPUT4)


clean:
@make bats_cmd CMD=clean
@git checkout docs/source/conf.py pygtftk/version.py; rm -rf simple* control_list_reference.txt control_list_data.txt add_attr_to_pos.tab test.py pygtftk.egg-info build airway_love.txt* ENCFF630HEX_Total_RNAseq_K562_count_mini.txt STDIN.e* closest_1.tsv STDIN.o* dist cmd_list.txt example_list.txt tmp_list.txt simple.chromInfo prgm_list.txt test_list.txt *.bats *.completed *mini_real* heatmap_* tx_classes* *~ \#* hh profile_* toto tott; cd docs/; make clean; cd ..; find . -type f -name '*~' -exec rm -f '{}' \;
@git checkout docs/source/conf.py pygtftk/version.py; rm -rf expected_s* ids* diff_fasta.py chr1_hg38_10M.fa* observed_s* order_fasta.py simple* control_list_reference.txt control_list_data.txt add_attr_to_pos.tab test.py pygtftk.egg-info build airway_love.txt* ENCFF630HEX_Total_RNAseq_K562_count_mini.txt STDIN.e* closest_1.tsv STDIN.o* dist cmd_list.txt example_list.txt tmp_list.txt simple.chromInfo prgm_list.txt test_list.txt *.bats *.completed *mini_real* heatmap_* tx_classes* *~ \#* hh profile_* toto tott; cd docs/; make clean; cd ..; find . -type f -name '*~' -exec rm -f '{}' \;

check_cmd_has_example:
@for i in $$(gtftk -l); do if grep -q "^$$i" docs/source/presentation.rst; then echo "" >/dev/null; else echo $$i; fi; done
Expand Down
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,5 @@ Running unitary tests
Several unitary tests have been implemented using doctests. You can run them using nose through the following command line: ::

make nose


13 changes: 8 additions & 5 deletions bin/gtftk
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ from pygtftk.utils import flatten_list
from pygtftk.utils import message
from pygtftk.utils import silentremove
from pygtftk.version import __version__
from pygtftk.bwig.bw_coverage import TMP_FILE_POOL_MANAGER

# Avoid warning message emitted by numpy
# https://tinyurl.com/ybev6zrw
Expand Down Expand Up @@ -92,9 +93,9 @@ def main():

if __name__ == "__main__":

from signal import signal, SIGPIPE, SIG_DFL
#from signal import signal, SIGPIPE, SIG_DFL

signal(SIGPIPE, SIG_DFL)
#signal(SIGPIPE, SIG_DFL)

try:

Expand All @@ -104,19 +105,21 @@ if __name__ == "__main__":

# delete created temporary files

for i in flatten_list(TMP_FILE_LIST, outlist=[]):
for i in flatten_list(TMP_FILE_LIST + list(TMP_FILE_POOL_MANAGER), outlist=[]):

# If the user ask to keep temp files
if args.tmp_dir is not None:
message("Keeping temporary file :" + i)
message("Keeping temp file : " + i)

base_name_i = os.path.basename(i)
shutil.move(i, os.path.join(args.tmp_dir, base_name_i))
else:
message("Deleting temporary file :" + i, type="DEBUG")
message("Deleting temp file : " + i, type="DEBUG")
silentremove(i)

except KeyboardInterrupt:
message("Canceled on user request.")
sys.exit(0)
except BrokenPipeError:
pass
sys.exit(0)
29 changes: 29 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
# Changelog

## v0.9.9

### Bug Fixes

- Fix a critical bug in get_sequence that affected get_feat_seq and get_tx_seq.
- Select_by_key now throw an error when no key/val are available.
- No more function with mutable objects as default arguments.
- Fix temporary file deletion.

### API Changes

- Refactored arg_formatter by creating a single type (ranged_num) to test for numeric inputs.
- Refactored all plugins so that there is no more reference to unused arguments (tmp_dir, verbosity...).


### Code changes

- No more reference to PY2.
- Added several test to get_tx_seq and get_feat_seq.
- Added several script to manipulate fasta files (see 'tools' folder). For pygtftk dev.
- Added 'extra_require' slot in setup().
- The get-feature-seq program now relies on bedtools (not on internal C code). This may change in the future asa a more flexible C interface is available.

### New Features

- Added --list-bigwigs to profile (to display the content of a coverage file).
- Added a novel dataset (mini_real_10M) derived from mini_real and containing 10 Mb of chr1.
- The configuration directory now supports several subdirectories named based on a hash string computed from path to the gtftk program.


## v0.9.8

Expand Down
Binary file modified docs/source/_static/example_01.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/_static/example_01b.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/_static/example_02.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/_static/example_05.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/_static/example_06.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/_static/example_06b.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/_static/example_07.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/_static/example_08.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/_static/example_13.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
149 changes: 149 additions & 0 deletions docs/source/annotation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
Commands from section 'annotation'
------------------------------------


closest_gn_to_feat
~~~~~~~~~~~~~~~~~~~~~~

**Description:** Find the n closest genes/transcripts for each peak (or the oppposite).

**Example:** Find the closest tss to a set of peak

.. command-output:: gtftk closest_gn_to_feat -t tss -r simple_peaks.bed6 -i simple.gtf -c simple.chromInfo -p 10 -K toto -n transcript_id,gene_id
:shell:

**Example:** Find the closest tss to a set of peak. Use the gene-centric and uncollapsed outout.

.. command-output:: gtftk closest_gn_to_feat -t tss -r simple_peaks.bed6 -i simple.gtf -c simple.chromInfo -p 10 -K toto -n transcript_id,gene_id -gu
:shell:


**Arguments:**

.. command-output:: gtftk closest_gn_to_feat -h
:shell:


closest_genes
~~~~~~~~~~~~~~~~~~~~~~

**Description:** Find the n closest genes for each transcript.

**Example:**

.. command-output:: gtftk get_example | bedtools sort | gtftk closest_genes -f
:shell:


**Arguments:**

.. command-output:: gtftk closest_genes -h
:shell:


overlapping
~~~~~~~~~~~~~~~~~~~~~~

**Description:** Find transcripts whose body/TSS/TTS region extended in 5' and 3' (-u/-d) overlaps with any transcript from another gene. Strandness is not considered by default. Used --invert-match to find those that do not overlap. If --annotate-gtf is used, all lines of the input GTF file will be printed and a new key containing the list of overlapping transcripts will be added to the transcript features/lines (key will be 'overlapping_*' with * one of body/TSS/TTS). The --annotate-gtf and --invert-match arguments are mutually exclusive.


**Example:** Find transcript whose promoter overlap transcript from other genes.

.. command-output:: gtftk get_example -f chromInfo > simple_join_chromInfo.txt; gtftk get_example | gtftk overlapping -c simple_join_chromInfo.txt -t promoter -u 10 -d 10 -a | gtftk select_by_key -k feature -v transcript | gtftk tabulate -k transcript_id,overlap_promoter_u0.01k_d0.01k | head
:shell:


**Arguments:**

.. command-output:: gtftk overlapping -h
:shell:

------------------------------------------------------------------------------------------------------------------

divergent
~~~~~~~~~~~~~~~~~~~~~~

**Description:** Find transcript with divergent promoters. These transcripts will be defined here
as those whose promoter region (defined by -u/-d) overlaps with the tss of
another gene in reverse/antisens orientation. This may be useful to select
coding genes in head-to-head orientation or LUAT as described in "Divergent
transcription is associated with promoters of transcriptional regulators"
(Lepoivre C, BMC Genomics, 2013). The ouput is a GTF with an additional key
('divergent') whose value is set to '.' if the gene has no antisens transcript
in its promoter region. If the gene has an antisens transcript in its promoter
region the 'divergent' key is set to the identifier of the transcript whose tss
is the closest relative to the considered promoter. The tss to tss distance is
also provided as an additional key (dist_to_divergent).


**Example:** Flag divergent transcripts in the example dataset. Select them and produce a tabulated output.

.. command-output:: gtftk get_example -f chromInfo > simple_join_chromInfo.txt; gtftk get_example | gtftk divergent -c simple_join_chromInfo.txt -u 10 -d 10| gtftk select_by_key -k feature -v transcript | gtftk tabulate -k transcript_id,divergent,dist_to_divergent | head -n 7
:shell:

**Arguments:**

.. command-output:: gtftk divergent -h
:shell:

------------------------------------------------------------------------------------------------------------------

convergent
~~~~~~~~~~~~~~~~~~~~~~

**Description:** Find transcript with convergent tts. These transcripts will be defined here
as those whose tts region (defined by -u/-d) overlaps with the tts of
another gene in reverse/antisens orientation. The ouput is a GTF with an
additional key ('convergent') whose value is set to '.' if the gene has no
convergent transcript in its tts region. If the gene has an antisens transcript
in its tts region the 'convergent' key is set to the identifier of the
transcript whose tts is the closest relative to the considered tts.
The tts to tts distance is also provided as an additional key (dist_to_convergent).


**Example:** Flag divergent transcripts in the example dataset. Select them and produce a tabulated output.

.. command-output:: gtftk get_example -f chromInfo > simple_join_chromInfo.txt; gtftk get_example | gtftk convergent -c simple_join_chromInfo.txt -u 25 -d 25| gtftk select_by_key -k feature -v transcript | gtftk tabulate -k transcript_id,convergent,dist_to_convergent| head -n 4
:shell:

**Arguments:**

.. command-output:: gtftk convergent -h
:shell:

------------------------------------------------------------------------------------------------------------------

exon_sizes
~~~~~~~~~~~~~~~~~~~~~~

**Description:** Add a new key to transcript features containing a comma separated list of exon sizes.


**Example:**

.. command-output:: gtftk get_example | gtftk exon_sizes | gtftk select_by_key -t
:shell:

**Arguments:**

.. command-output:: gtftk exon_sizes -h
:shell:

------------------------------------------------------------------------------------------------------------------


intron_sizes
~~~~~~~~~~~~~~~~~~~~~~

**Description:** Add a new key to transcript features containing a comma separated list of intron sizes.


**Example:**

.. command-output:: gtftk get_example | gtftk intron_sizes | gtftk select_by_key -t
:shell:

**Arguments:**

.. command-output:: gtftk intron_sizes -h
:shell:
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@
# built documents.
#
# The short X.Y version.
version = u'0.9.8'
version = u'0.9.8.dev0+8f76'

# The full version, including alpha/beta/rc tags.
release = u'0.9.8'
release = u'0.9.8.dev0+8f76'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
85 changes: 85 additions & 0 deletions docs/source/convertion.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
Commands from section 'convertion'
-----------------------------------

convert
~~~~~~~~~~~~~~~~~~~~~~

**Description:** This command can be used to convert to various formats. Currently only a limited number is supported.

* **bed**: classical bed6 format.
* **bed6**: classical bed6 format.
* **bed3**: bed3 format.


**Example:** Get the gene features and convert them to bed6.

.. command-output:: gtftk get_example | gtftk select_by_key -k feature -v gene | gtftk convert -n gene_id -f bed6| head -n 3
:shell:


**Arguments:**

.. command-output:: gtftk convert -h
:shell:

------------------------------------------------------------------------------------------------------------------

tabulate
~~~~~~~~~~~~~~~~~~~~~~

**Description:** Extract key/values from the GTF and convert them to tabulated format. When requesting coordinates they will be provided in 1-based format.


**Example:** Simply get the list of transcripts and gene.

.. command-output:: gtftk get_example -f gtf | gtftk select_by_key -k feature -v transcript| gtftk tabulate -k gene_id,transcript_id -s "|"
:shell:

.. warning:: By default tabulate will discard any line for which one of the selected key is not defined. Use -x (--accept-undef) to print them.


**Arguments:**

.. command-output:: gtftk tabulate -h
:shell:

------------------------------------------------------------------------------------------------------------------


bed_to_gtf
~~~~~~~~~~~~~~~~~~~~~~


**Description:** Convert a bed file to gtf-like format.

**Example:**

.. command-output:: gtftk get_example |gtftk convert| gtftk bed_to_gtf -t transcript | head -n 5
:shell:


**Arguments:**

.. command-output:: gtftk bed_to_gtf -h
:shell:


------------------------------------------------------------------------------------------------------------------


convert_ensembl
~~~~~~~~~~~~~~~~~~~~~~


**Description:** Convert the GTF file to ensembl format. Essentially add 'transcript'/'gene' features.

**Example:** Delete gene and transcript feature. Regenerate them.

.. command-output:: gtftk get_example | gtftk select_by_key -k feature -v gene,transcript -n| gtftk convert_ensembl | gtftk select_by_key -k gene_id -v G0001
:shell:


**Arguments:**

.. command-output:: gtftk bed_to_gtf -h
:shell:
Loading

0 comments on commit b68ede3

Please sign in to comment.