diff --git a/bin/gtftk b/bin/gtftk index c9e0b683..5e6b57e0 100644 --- a/bin/gtftk +++ b/bin/gtftk @@ -1,5 +1,11 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- + + +# Avoid AttributeError: module '__main__' has no attribute '__spec__'. +# A weird bug which seems to be associated to the calling prgm (e.g. spider) +__spec__ = "" + import hashlib import os import shutil @@ -24,7 +30,6 @@ from pygtftk.utils import flatten_list from pygtftk.utils import message from pygtftk.utils import silentremove from pygtftk.version import __version__ -from pygtftk.bwig.bw_coverage import TMP_FILE_POOL_MANAGER # Avoid warning message emitted by numpy # https://tinyurl.com/ybev6zrw @@ -93,6 +98,7 @@ def main(): if __name__ == "__main__": + from pygtftk.bwig.bw_coverage import TMP_FILE_POOL_MANAGER from signal import signal, SIGPIPE, SIG_DFL signal(SIGPIPE, SIG_DFL) diff --git a/changelog.md b/changelog.md index d9dc925a..167be48a 100644 --- a/changelog.md +++ b/changelog.md @@ -1,11 +1,10 @@ # Changelog -## v1.1.5 - +## v1.2.1 ### Bug Fixes -* Fix #128 (merge_attr error when using same key as source and destination). +* Fix multiprocessing issue with py3.8. ### API/CLI Changes @@ -13,14 +12,13 @@ ### Code changes -* None. +* Updated requirements.txt according to python 3.8. ### New Features * None. - -## v1.1.5 +## v1.2.0 This version introduces OLOGRAM-MODL, a new paradigm for OLOGRAM to find intersections between multiple sets of genomic regions at once and then compute their enrichment with OLOGRAM. An optional algorithm (MODL) to find interesting combinations with sparse dictionary learning and greedy submodular optimisation has also been added. Furthermore, it also contains major speedups to OLOGRAM itself. @@ -34,6 +32,7 @@ This version introduces OLOGRAM-MODL, a new paradigm for OLOGRAM to find interse * fix #124 * fix BED to BED convertion in arg_formatted.FormattedFile(). BED6+ files were considered as BED6- files. * fix #136 although --show-group-number is no more supported with gtftk profile when plotnine > 0.6.0 is used. +* Fix #128 (merge_attr error when using same key as source and destination). ### API Changes @@ -58,8 +57,6 @@ This version introduces OLOGRAM-MODL, a new paradigm for OLOGRAM to find interse * Introduced a *treeify_ologram_modl* plugin to visualize n-wise enrichment results as a treee * Introduced a *ologram_merge_runs* command to merge several runs to save RAM, treating each as a superbatch. - - ## v1.1.4 ### Bug Fixes @@ -99,7 +96,6 @@ This version introduces OLOGRAM-MODL, a new paradigm for OLOGRAM to find interse ## v1.1.2 - ### Bug Fixes * None @@ -116,7 +112,6 @@ This version introduces OLOGRAM-MODL, a new paradigm for OLOGRAM to find interse * The --more-bed-labels is now facultative in OLOGRAM. - ## v1.1.1 ### Bug Fixes @@ -140,7 +135,6 @@ This version introduces OLOGRAM-MODL, a new paradigm for OLOGRAM to find interse ## v1.1.0 - ### Bug Fixes * None. @@ -158,10 +152,8 @@ This version introduces OLOGRAM-MODL, a new paradigm for OLOGRAM to find interse * Support for Python 3.7. * The tss_numbering command now allows to add the number of different TSSs to the gene feature. - ## v1.0.9 - ### Bug Fixes * None. @@ -187,7 +179,6 @@ Enrichment of Annotations Tool) documentation. We can not warrant that the proce for more details. * Added -y/--display-fit-quality to ologram - ## v1.0.8 This version introduces *ologram_merge_stats* command that can be used to produce a heatmap from multiple OLOGRAM results. @@ -228,7 +219,6 @@ This version contains some minor code refactoring. See 1.0.6 for recent major ch * None. - ## v1.0.6 ### Bug Fixes diff --git a/docs/_images/example_01.png b/docs/_images/example_01.png index 08a728f5..9d6fe4a4 100644 Binary files a/docs/_images/example_01.png and b/docs/_images/example_01.png differ diff --git a/docs/_images/example_05.png b/docs/_images/example_05.png index 3f6c33d6..2937ac10 100644 Binary files a/docs/_images/example_05.png and b/docs/_images/example_05.png differ diff --git a/docs/_images/example_06.png b/docs/_images/example_06.png index 48f30732..79f3e737 100644 Binary files a/docs/_images/example_06.png and b/docs/_images/example_06.png differ diff --git a/docs/_images/example_06b.png b/docs/_images/example_06b.png index eb7ed09e..3cbeb7c0 100644 Binary files a/docs/_images/example_06b.png and b/docs/_images/example_06b.png differ diff --git a/docs/_images/example_08.png b/docs/_images/example_08.png index a2a823dd..861d1066 100644 Binary files a/docs/_images/example_08.png and b/docs/_images/example_08.png differ diff --git a/docs/_images/example_13.png b/docs/_images/example_13.png index 67fc2186..1fde3047 100644 Binary files a/docs/_images/example_13.png and b/docs/_images/example_13.png differ diff --git a/docs/_sources/ologram.rst.txt b/docs/_sources/ologram.rst.txt index 67e17707..631dfbde 100644 --- a/docs/_sources/ologram.rst.txt +++ b/docs/_sources/ologram.rst.txt @@ -54,7 +54,7 @@ The program will return statistics for both the number of intersections and the - H1: The regions of the query (--peak-file) tend to overlap the reference (--inputfile or --more-bed). -.. warning:: The ologram examples below use 8 CPUs. Please adapt. +.. warning:: The ologram examples below use 8 CPUs. Please adapt the number of threads. @@ -167,33 +167,16 @@ The program will return statistics for both the number of intersections and the ologram (multiple overlaps) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -While previously we computed paiwise enrichment (ie. Query+A, Query+B ...) , It is also possible to use the **OLOGRAM-MODL** Multiple Overlap Dictionary Learning) plugin to find multiple overlaps (ie. between n>=2 sets) enrichment (ie. Query+A+B, Query+A+C, ...) in order to highlight combinations of genomic regions, such as Transcriptional Regulator complexes. +While previously we computed paiwise enrichment (ie. Query+A, Query+B, ...) , it is also possible to use the **OLOGRAM-MODL** Multiple Overlap Dictionary Learning) plugin to find multiple overlaps (ie. between n>=2 sets) enrichment (ie. Query+A+B, Query+A+C, ...) in order to highlight combinations of genomic regions, such as Transcriptional Regulator complexes. This is done only on custom regions supplied as BEDs supplied with the `--more-bed` argument. In most cases you may use the --no-gtf argument and only pass the regions of interest. - -For statistical reasons, we recommend shuffling across a relevant subsection of the genome only (ie. enhancers only) using --bed-excl or --bed-incl to ensure the longer combinations have a reasonable chance of being randomly encountered in the shuffles. - - -**MODL itemset mining algorithm:** By default, OLOGRAM-MODL will compute the enrichment of all n-wise combinations that are encountered in the real data it was passed. This however can add up to 2**N combinations and make the result hard to read. Furthermore, in biological data noise is a real problem and can obscure the relevant combinations. - -As such, we also give the option to use a custom itemset mining algorithm on the true overlaps to identify interesting combinations. - -In broad strokes, this custom algorithm MODL (Multiple Overlap Dictionary Learning) will perform many matrix factorizations on the matrix of true overlaps to identify relevant correlation groups of genomic regions. Then a greedy algorithm based on how much these words improve the reconstruction will select the utmost best words. MODL is only used to filter the output of OLOGRAM : once it returns a list of interesting combination, OLOGRAM will compute their enrichment as usual, but for them only. Each combination is of the form [Query + A + B + C] where A, B and C are BED files given as --more-bed. You can also manually specify the combinations to be studied with the format defined in OLOGRAM notes (below). - -Unlike classical association rules mining algorithms, this focuses on mining relevant bio complexes/clusters and correlation groups (item sets), and you should not request more than 20-30 combinations. As a matrix factorization based algorithm, it is designed to be resistant -to noise which is a known problem in biological data. Its goal is to extract meaningful frequent combinations from noisy data. As a result however, it is biased in favor of the most abundant combinations in the data, and may return correlation groups if you ask for too few words (ie. if AB, BC and AC are complexes, ABC might be returned). - - -This itemset mining algorithm is a work-in-progress. Whether you use MODL will not change the results for each combination, it only changes which combinations are displayed. If you want the enrichment of all combinations, ignore it. To use MODL, use the --multiple-overlap-max-number-of-combinations argument. - +For statistical reasons, we recommend shuffling across a relevant subsection of the genome only (ie. enhancers only) using --bed-excl or --bed-incl to ensure the longer combinations have a reasonable chance of being randomly encountered in the shuffles. Conversely, if you do not filter the combinations, keep in mind that the longer ones may be enriched even though they are present only on a few base pairs, because at random they would be even rarer. **Exact combinations:** By default, OLOGRAM will compute "inexact" combinations, meaning that when encountering an overlap of [Query + A + B + C] it will count towards [A + B + ...]. For exact intersections (ie. [Query + A + B + nothing else]), set the --multiple-overlap-target-combi-size flag to the number of --more-bed plus one. You will know if the combinations are computed as inexact by the '...' in their name in the result file. Intersections not including the query file are discarded. - - **Simple example:** Comparing the query (-p) against two other BED files, analyzing multiple overlaps. @@ -238,19 +221,30 @@ Comparing the query (-p) against two other BED files, analyzing multiple overlap As the computation of multiple overlaps can be RAM-intensive, if you have a very large amount of candidate genomic feature sets (hundreds) we recommend selecting less candidates among them first by running a pairwise analysis. -**MODL algorithm API:** MODL can also be used independantly as a combination mining algorithm. -This can work on any type of data, biological or not, that respects the conventional formatting for lists of transactions: the data needs to be a matrix with one line per transaction and one column per element. +**MODL itemset mining algorithm:** By default, OLOGRAM-MODL will compute the enrichment of all n-wise combinations that are encountered in the real data it was passed. This however can add up to 2**N combinations and make the result hard to read. Furthermore, in biological data noise is a real problem and can obscure the relevant combinations. As such, we also give the option to use a custom itemset mining algorithm on the true overlaps to identify interesting combinations. + + + +Details +----------------- -For example, if you have three possible elements A, B and C, a line of [1,0,1] means a transaction containing A and C. -For a factor allowance of k and n final queried words, the matrix will be rebuilt with k*n words in step 1. -factor allowance is K in K*n words in step 1 where n is final queries nb of words. +In broad strokes, the custom itemset algorithm MODL (Multiple Overlap Dictionary Learning) will perform many matrix factorizations on the matrix of true overlaps to identify relevant correlation groups of genomic regions. Then a greedy algorithm based on how much these words improve the reconstruction will select the utmost best words. MODL is only used to filter the output of OLOGRAM : once it returns a list of interesting combination, OLOGRAM will compute their enrichment as usual, but for them only. Each combination is of the form [Query + A + B + C] where A, B and C are BED files given as --more-bed. You can also manually specify the combinations to be studied with the format defined in OLOGRAM notes (below). -MODL and will discard combinations rarer than 1/10000 occurences to reduce computing times and will also reduce the abundance of all unique lines in the matrix to their square roots to reduce the emphasis on the most frequent elements. -However, this can magnify the impact of the noise quadratically as well, and can be disabled when using the manual API. +Unlike classical association rules mining algorithms, this focuses on mining relevant bio complexes/clusters and correlation groups (item sets), and you should not request more than 20-30 combinations. As a matrix factorization based algorithm, it is designed to be resistant +to noise which is a known problem in biological data. Its goal is to extract meaningful frequent combinations from noisy data. As a result however, it is biased in favor of the most abundant combinations in the data, and may return correlation groups if you ask for too few words (ie. if AB, BC and AC are complexes, ABC might be returned). + + +This itemset mining algorithm is a work-in-progress. Whether you use MODL will not change the results for each combination, it only changes which combinations are displayed. If you want the enrichment of all combinations, ignore it. To use MODL, use the --multiple-overlap-max-number-of-combinations argument. + + + +**MODL algorithm API:** MODL can also be used independantly as a combination mining algorithm. +This can work on any type of data, biological or not, that respects the conventional formatting for lists of transactions: the data needs to be a matrix with one line per transaction and one column per element. For example, if you have three possible elements A, B and C, a line of [1,0,1] means a transaction containing A and C. +For a factor allowance of k and n final queried words, the matrix will be rebuilt with k*n words in step 1. MODL will discard combinations rarer than 1/10000 occurences to reduce computing times. It will also reduce the abundance of all unique lines in the matrix to their square roots to reduce the emphasis on the most frequent elements. However, the latter can magnify the impact of the noise as well and can be disabled when using the manual API. To de-emphasize longer words, which can help in this case, we can also normalize words by their summed square in step 2. If you are passing a custom error function, it must have the signature error_function(X_true, X_rebuilt, code). X_true is the real data, X_rebuilt is the reconstruction to evaluate, and code is the encoded version which in our case is used to assess sparsity. All are NumPY matrices. @@ -272,11 +266,12 @@ Here is an example: nb_threads = 1, step_1_factor_allowance = 2, # How many words to ask for in each step 1 rebuilding, as a multiplier of multiple_overlap_max_number_of_combinations error_function = None, # Custom error function in step 2 - smother = True) # Should the smothering (quadratic reduction of abundance) be applied ? + smother = True, # Should the smothering (quadratic reduction of abundance) be applied ? + normalize_words = False) # Normalize words by their summed squared in step 2 ? interesting_combis = combi_miner.find_interesting_combinations() -For more details about usage and implementation, please read the notes below : +For more details about usage and implementation, please read the notes below. **Arguments:** @@ -284,7 +279,27 @@ For more details about usage and implementation, please read the notes below : :shell: -Since the results of MODL only depend on the true intersections and not on the shuffles, you can run MODL with 1 shuffle to pre-select interesting combinations, and then run the full analysis on many shuffles. We then recommend selecting the combinations that interest you in the resulting tsv, using MODL's selection as a starting point, and adding or removing some combinations based on your own needs (eg. adding all the highest fold changes, or all particular combinations containing the Transcription Factor X that you are studying). Then, run ologram_modl_treeify on the resulting filtered tsv. + +**Manual intersection computing:** To manually compute an overlap matrix between any number of BED files, the following Python code can be used. + +.. code-block:: python + + import pybedtools + import numpy as np + from pygtftk.stats.intersect.overlap_stats_compute import compute_true_intersection + + # Register the BED files as pybedtools.BedTool objects + bedA = pybedtools.BedTool(path_to_your_query) + bedsB = [pybedtools.BedTool(bedfilepath) for bedfilepath in list_of_all_paths_to_more_bed] + + # Use our custom intersection computing algorithm to get the matrix of overlaps + true_intersection = compute_true_intersection(bedA, bedsB) + flags_matrix = np.array([i[3] for i in true_intersection]) + +The resulting flags_matrix is a NumPy array that can be edited, and on which MODL can be run. + +Since the results of MODL only depend on the true intersections and not on the shuffles, you can run MODL with 1 shuffle or on a manually computed matrix as above to pre-select interesting combinations, and then run the full analysis on many shuffles. We then recommend selecting the combinations that interest you in the resulting tsv file, using MODL's selection as a starting point and adding or removing some combinations based on your own needs (eg. adding all the highest fold changes, or all particular combinations containing the Transcription Factor X that you are studying). + ologram_merge_stats @@ -325,11 +340,13 @@ This also works with OLOGRAM-MODL results, since they follow the same basic form ologram_modl_treeify ~~~~~~~~~~~~~~~~~~~~~~ -**Description:** Visualize n-wise enrichment results (OLOGRAM-MODL) as a tree of combinations. Works on the result (tsv file) of an OLOGRAM analysis called with --more-bed-multiple-overlap. +**Description:** Visualize n-wise enrichment results (OLOGRAM-MODL) as a tree of combinations. Works on the result (tsv file) of an OLOGRAM analysis called with --more-bed-multiple-overlap. On the graph, S designated the total number of basepairs in which this combinations is encountered in the real data. Fold change gives the ratio with the number of basepairs in the shuffles, with the associated Negative Binomial p-value. + +This recommended representation is useful to find master regulators, by showing which additions to a combinations increase its enrichment, and allowing to see whether overlaps that contain the element X also contain the element Y (looking at how a child combination accounts for the S of its parent in an inexact counting). -We recommend this representation. The tsv file can be edited before passing it to the command, for example by keeping only the combinations you are interested in. +The tsv result file can be edited before passing it to the command, for example by keeping only the combinations you are interested in, such as all combinations containing the Transcription Factor you are studying. We recommend running MODL to make a pre-selection. -On the graph, S designated the total number of basepairs in which this combinations is encountered in the real data. Fold change gives the ratio with the number of basepairs in the shuffles, with the associated Negative Binomial p-value. +We also recommend discarding the rarest combinations found on such a very small number of basepairs that they are unlikely tobe biologically significant. This is mostly relevant when you have many sets (k >= 5) since longer combinations will often be enriched through sheer unlikelihood. .. command-output:: gtftk ologram_modl_treeify -i multiple_overlap_trivial_ologram_stats.tsv -o treeified.pdf -l ThisWasTheNameOfTheQuery :shell: diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js index c1bb9ae4..bd0de63c 100644 --- a/docs/_static/documentation_options.js +++ b/docs/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: '1.2.1', + VERSION: '1.2.3', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/_static/example_01.png b/docs/_static/example_01.png index 9d6fe4a4..0104648b 100644 Binary files a/docs/_static/example_01.png and b/docs/_static/example_01.png differ diff --git a/docs/_static/example_01b.png b/docs/_static/example_01b.png index cf809c40..1c5660bb 100644 Binary files a/docs/_static/example_01b.png and b/docs/_static/example_01b.png differ diff --git a/docs/_static/example_02.png b/docs/_static/example_02.png index 8439c50a..f15653b3 100644 Binary files a/docs/_static/example_02.png and b/docs/_static/example_02.png differ diff --git a/docs/_static/example_05.png b/docs/_static/example_05.png index 2937ac10..4702bf91 100644 Binary files a/docs/_static/example_05.png and b/docs/_static/example_05.png differ diff --git a/docs/_static/example_06.png b/docs/_static/example_06.png index 79f3e737..713d6c11 100644 Binary files a/docs/_static/example_06.png and b/docs/_static/example_06.png differ diff --git a/docs/_static/example_06b.png b/docs/_static/example_06b.png index 3cbeb7c0..ecfe0e0b 100644 Binary files a/docs/_static/example_06b.png and b/docs/_static/example_06b.png differ diff --git a/docs/_static/example_07.png b/docs/_static/example_07.png index aaefb731..6c312dd1 100644 Binary files a/docs/_static/example_07.png and b/docs/_static/example_07.png differ diff --git a/docs/_static/example_08.png b/docs/_static/example_08.png index 861d1066..952b4eaf 100644 Binary files a/docs/_static/example_08.png and b/docs/_static/example_08.png differ diff --git a/docs/_static/example_13.png b/docs/_static/example_13.png index 1fde3047..c5c7d0c2 100644 Binary files a/docs/_static/example_13.png and b/docs/_static/example_13.png differ diff --git a/docs/_static/example_pa_01.pdf b/docs/_static/example_pa_01.pdf index 54ea2927..bd4f566c 100644 Binary files a/docs/_static/example_pa_01.pdf and b/docs/_static/example_pa_01.pdf differ diff --git a/docs/_static/example_pa_02.pdf b/docs/_static/example_pa_02.pdf index ec941371..82426456 100644 Binary files a/docs/_static/example_pa_02.pdf and b/docs/_static/example_pa_02.pdf differ diff --git a/docs/_static/example_pa_03.pdf b/docs/_static/example_pa_03.pdf index 379b27d2..d0325285 100644 Binary files a/docs/_static/example_pa_03.pdf and b/docs/_static/example_pa_03.pdf differ diff --git a/docs/_static/example_pa_04.pdf b/docs/_static/example_pa_04.pdf index 8b24f3fa..9f37f661 100644 Binary files a/docs/_static/example_pa_04.pdf and b/docs/_static/example_pa_04.pdf differ diff --git a/docs/_static/merge_ologram_stats_01.pdf b/docs/_static/merge_ologram_stats_01.pdf index 72a77908..5d5e6e6e 100644 Binary files a/docs/_static/merge_ologram_stats_01.pdf and b/docs/_static/merge_ologram_stats_01.pdf differ diff --git a/docs/_static/treeified.pdf b/docs/_static/treeified.pdf index 465cbd87..dea2bfd4 100644 Binary files a/docs/_static/treeified.pdf and b/docs/_static/treeified.pdf differ diff --git a/docs/about.html b/docs/about.html index 1bd7dd9e..40bca68c 100644 --- a/docs/about.html +++ b/docs/about.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Warning about supported GTF file formats — gtftk 1.2.1 documentation + Warning about supported GTF file formats — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

Navigation

  • previous |
  • - + @@ -202,13 +202,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/annotation.html b/docs/annotation.html index 354b9056..37bcb9a2 100644 --- a/docs/annotation.html +++ b/docs/annotation.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘annotation’ — gtftk 1.2.1 documentation + Commands from section ‘annotation’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -477,13 +477,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/api.html b/docs/api.html index 19023d8a..f788d1fd 100644 --- a/docs/api.html +++ b/docs/api.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Help on Python package (pygtftk) — gtftk 1.2.1 documentation + Help on Python package (pygtftk) — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -2298,7 +2298,7 @@

    Help on Python package (pygtftk)
    pygtftk.stats.intersect.overlap_stats_compute.compute_true_intersection(bedA, bedsB)
    -

    Returns the custom-computed tur intersection between bedA and all in bedsB combined, where +

    Returns the custom-computed true intersection between bedA and all in bedsB combined, where bedA is a pybedtools.BedTool object and bedsB is a list of such objects.

    Returns also the intersection flags.

    @@ -2553,9 +2553,27 @@

    Help on Python package (pygtftk)
    -pygtftk.stats.intersect.modl.tree.apply_recursively_to_all_nodes(node, function, global_results)
    +pygtftk.stats.intersect.modl.tree.apply_recursively_to_all_nodes(node, function, global_results, no_duplicates=True, stop_condition=<function <lambda>>)

    General utility function to recursively apply a function to all nodes of a tree. Also pass a global_results dict to be added to.

    +

    Since nodes can have several parents, by default this will remember which nodes +have been seen and not operate on them twice (no_duplicates).

    +

    A stop condition fonction of signature stop_condition(current_node, global_results) +can be passed : the recursion will stop if it returns True at any point.

    +
    >>> from pygtftk.stats.intersect.modl.tree import Library, apply_recursively_to_all_nodes
    +>>> from pygtftk import utils
    +>>> utils.VERBOSITY = 0
    +>>> words = [(1,0,0), (1,1,0), (0,1,0), (1,1,1)]
    +>>> l = Library()
    +>>> l.build_nodes_for_words(words)
    +>>> l.assign_nodes()
    +>>> manual_print_words = set([str(n) for n in l.assigned_nodes])
    +>>> gr = dict()
    +>>> apply_recursively_to_all_nodes(l.root_node, str, gr)
    +>>> recursive_print_words = set(gr.values())
    +>>> assert recursive_print_words == manual_print_words
    +
    +
    @@ -2601,7 +2619,7 @@

    Help on Python package (pygtftk)
    -class pygtftk.stats.intersect.modl.dict_learning.Modl(flags_matrix, multiple_overlap_target_combi_size=- 1, multiple_overlap_max_number_of_combinations=5, nb_threads=1, step_1_factor_allowance=2, error_function=None, smother=True)
    +class pygtftk.stats.intersect.modl.dict_learning.Modl(flags_matrix, multiple_overlap_target_combi_size=- 1, multiple_overlap_max_number_of_combinations=5, nb_threads=1, step_1_factor_allowance=2, error_function=None, smother=True, normalize_words=False)

    This class encapsulates the MODL approach :

    Takes as input a matrix of flags, with one flag per intersection, and returns the list of interesting combis.

    @@ -2616,10 +2634,11 @@

    Help on Python package (pygtftk)
    >>> from pygtftk.stats.intersect.modl.dict_learning import Modl, test_data_for_modl
     >>> import numpy as np
     >>> np.random.seed(42)
    @@ -2644,7 +2663,7 @@ 

    Help on Python package (pygtftk)
    -select_best_words_from_library(error_function=None)
    +select_best_words_from_library()

    This is step 2. Takes the library of candidates produced at step 1 and will get the best N words among it that best rebuild the original matrix.

    @@ -2682,7 +2701,7 @@

    Help on Python package (pygtftk)quentin.q.ferre@gmail.com>

    -pygtftk.stats.intersect.modl.subroutines.build_best_dict_from_library(data, library, queried_words_nb, error_function=None, nb_threads=1, transform_alpha=None)
    +pygtftk.stats.intersect.modl.subroutines.build_best_dict_from_library(data, library, queried_words_nb, error_function=None, nb_threads=1, normalize_words=False, transform_alpha=None)

    Given a data matrix and a library, will select the best n = queried_words_nb words with a greedy algorithm from the library to rebuild the data.

    data is a matrix with one transaction per row and one element per column, as usual

    @@ -3492,13 +3511,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/bwig_coverage.html b/docs/bwig_coverage.html index 34330025..2f615dc9 100644 --- a/docs/bwig_coverage.html +++ b/docs/bwig_coverage.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - The pygtftk.bwig.bw_coverage module — gtftk 1.2.1 documentation + The pygtftk.bwig.bw_coverage module — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -178,13 +178,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/conversion.html b/docs/conversion.html index 713c67c0..0c179407 100644 --- a/docs/conversion.html +++ b/docs/conversion.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘conversion’ — gtftk 1.2.1 documentation + Commands from section ‘conversion’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -324,13 +324,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/coordinates.html b/docs/coordinates.html index 33024498..c328a355 100644 --- a/docs/coordinates.html +++ b/docs/coordinates.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘coordinates’ — gtftk 1.2.1 documentation + Commands from section ‘coordinates’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -425,13 +425,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/coverage.html b/docs/coverage.html index afa73ef4..cf6f1e10 100644 --- a/docs/coverage.html +++ b/docs/coverage.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘coverage’ — gtftk 1.2.1 documentation + Commands from section ‘coverage’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -214,19 +214,19 @@

    profile
    $ gtftk profile -D -i mini_real_promoter.zip -o profile_prom -pf png -if  example_01.png
    - |-- 18:59-WARNING-profile : --group-by not set. Choosing 'bwig'.
    + |-- 0:57-WARNING-profile : --group-by not set. Choosing 'bwig'.
     
    _images/example_01.png

    Changing colors and applying color order can be done using the following syntax:

    $ gtftk profile -D -i mini_real_promoter.zip -c 'red,blue,violet' -d H3K79me,H3K4me3,H3K36me3 -o profile_prom -pf png -if  example_01b.png
    - |-- 19:14-WARNING-profile : --group-by not set. Choosing 'bwig'.
    + |-- 0:57-WARNING-profile : --group-by not set. Choosing 'bwig'.
     
    _images/example_01b.png

    Transcript coverage is obtained using the mini_real_tx.zip matrix. This provides a simple overlayed profile of all epigenetic marks along the transcript body extended in 5’ and 3’ regions:

    $ gtftk profile -D -i mini_real_tx.zip -o profile_tx -pf png -if  example_02.png
    - |-- 19:14-WARNING-profile : --group-by not set. Choosing 'bwig'.
    + |-- 0:57-WARNING-profile : --group-by not set. Choosing 'bwig'.
     
    _images/example_02.png @@ -234,111 +234,111 @@

    profile
    $ gtftk profile -D -i mini_real_promoter.zip -f tx_classes -g bwig  -t tx_classes.txt -o profile_prom  -pf png -if  example_05.png -e -V 2 -fc 2
    - |-- 19:14-DEBUG-profile : Using pandas version 1.1.2
    - |-- 19:14-DEBUG-profile : Pandas location /Users/puthier/anaconda3/envs/pygtftk_dev37/lib/python3.7/site-packages/pandas/__init__.py
    - |-- 19:14-DEBUG-profile : Using numpy version 1.19.1
    - |-- 19:14-DEBUG-profile : Pandas numpy /Users/puthier/anaconda3/envs/pygtftk_dev37/lib/python3.7/site-packages/numpy/__init__.py
    - |-- 19:14-DEBUG-profile : Using plotnine version 0.7.1
    - |-- 19:14-DEBUG-profile : Pandas plotnine /Users/puthier/anaconda3/envs/pygtftk_dev37/lib/python3.7/site-packages/plotnine/__init__.py
    - |-- 19:14-DEBUG-profile : Creating directory : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_4jm84gd0
    - |-- 19:14-DEBUG-profile : Uncompressing : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_4jm84gd0
    - |-- 19:14-DEBUG-profile : Reading : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_4jm84gd0/mini_real_promoter
    - |-- 19:14-INFO-profile : Getting configuration info from input file.
    - |-- 19:14-INFO-profile : Reading transcript file.
    - |-- 19:14-INFO-profile : Deleting duplicates in transcript-file.
    - |-- 19:14-INFO-profile : Checking how many transcripts where found in the transcript list.
    - |-- 19:14-INFO-profile : Keeping 804 transcript out of 833 in input transcript list.
    - |-- 19:14-DEBUG-profile : Color order : ['H3K79me', 'H3K36me3', 'H3K4me3']
    - |-- 19:14-DEBUG-profile : Profile color : ['#000000', '#00bb00', '#cccccc']
    - |-- 19:14-INFO-profile : Searching coverage columns.
    - |-- 19:14-INFO-profile : Melting.
    - |-- 19:14-INFO-profile : Ceiling
    - |-- 19:29-INFO-profile : Computing column ordering.
    - |-- 19:29-INFO-profile : Preparing diagram
    - |-- 19:29-INFO-profile : Theming and ordering. Please be patient...
    - |-- 19:29-INFO-profile : Preparing x axis
    - |-- 19:29-INFO-profile : facet_col 2
    - |-- 19:29-INFO-profile : Page width set to 6
    - |-- 19:29-INFO-profile : Page height set to 5.0
    - |-- 19:29-INFO-profile : Saving diagram to file : example_05.png
    - |-- 19:29-INFO-profile : Be patient. This may be long for large datasets.
    - |-- 19:29-DEBUG-profile : Deleting temp file : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_4jm84gd0
    + |-- 0:57-DEBUG-profile : Using pandas version 1.1.2
    + |-- 0:57-DEBUG-profile : Pandas location /Users/puthier/anaconda3/envs/pygtftk_37/lib/python3.7/site-packages/pandas/__init__.py
    + |-- 0:57-DEBUG-profile : Using numpy version 1.19.2
    + |-- 0:57-DEBUG-profile : Pandas numpy /Users/puthier/anaconda3/envs/pygtftk_37/lib/python3.7/site-packages/numpy/__init__.py
    + |-- 0:57-DEBUG-profile : Using plotnine version 0.7.1
    + |-- 0:57-DEBUG-profile : Pandas plotnine /Users/puthier/anaconda3/envs/pygtftk_37/lib/python3.7/site-packages/plotnine/__init__.py
    + |-- 0:57-DEBUG-profile : Creating directory : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_onenfmua
    + |-- 0:57-DEBUG-profile : Uncompressing : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_onenfmua
    + |-- 0:57-DEBUG-profile : Reading : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_onenfmua/mini_real_promoter
    + |-- 0:57-INFO-profile : Getting configuration info from input file.
    + |-- 0:57-INFO-profile : Reading transcript file.
    + |-- 0:57-INFO-profile : Deleting duplicates in transcript-file.
    + |-- 0:57-INFO-profile : Checking how many transcripts where found in the transcript list.
    + |-- 0:57-INFO-profile : Keeping 804 transcript out of 833 in input transcript list.
    + |-- 0:57-DEBUG-profile : Color order : ['H3K4me3', 'H3K79me', 'H3K36me3']
    + |-- 0:57-DEBUG-profile : Profile color : ['#000000', '#00bb00', '#cccccc']
    + |-- 0:57-INFO-profile : Searching coverage columns.
    + |-- 0:57-INFO-profile : Melting.
    + |-- 0:57-INFO-profile : Ceiling
    + |-- 0:57-INFO-profile : Computing column ordering.
    + |-- 0:57-INFO-profile : Preparing diagram
    + |-- 0:57-INFO-profile : Theming and ordering. Please be patient...
    + |-- 0:57-INFO-profile : Preparing x axis
    + |-- 0:57-INFO-profile : facet_col 2
    + |-- 0:57-INFO-profile : Page width set to 6
    + |-- 0:57-INFO-profile : Page height set to 5.0
    + |-- 0:57-INFO-profile : Saving diagram to file : example_05.png
    + |-- 0:57-INFO-profile : Be patient. This may be long for large datasets.
    + |-- 0:57-DEBUG-profile : Deleting temp file : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_onenfmua
     
    _images/example_05.png

    Alternatively, the groups can be set to chromosomes or transcript classes:

    $ gtftk profile -D -i mini_real_promoter.zip -g tx_classes -f bwig  -t tx_classes.txt -o profile_prom  -pf png -if  example_06.png -V 2 -nm ranging
    - |-- 19:29-DEBUG-profile : Using pandas version 1.1.2
    - |-- 19:29-DEBUG-profile : Pandas location /Users/puthier/anaconda3/envs/pygtftk_dev37/lib/python3.7/site-packages/pandas/__init__.py
    - |-- 19:29-DEBUG-profile : Using numpy version 1.19.1
    - |-- 19:29-DEBUG-profile : Pandas numpy /Users/puthier/anaconda3/envs/pygtftk_dev37/lib/python3.7/site-packages/numpy/__init__.py
    - |-- 19:29-DEBUG-profile : Using plotnine version 0.7.1
    - |-- 19:29-DEBUG-profile : Pandas plotnine /Users/puthier/anaconda3/envs/pygtftk_dev37/lib/python3.7/site-packages/plotnine/__init__.py
    - |-- 19:29-DEBUG-profile : Creating directory : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_f2tvlc3s
    - |-- 19:29-DEBUG-profile : Uncompressing : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_f2tvlc3s
    - |-- 19:29-DEBUG-profile : Reading : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_f2tvlc3s/mini_real_promoter
    - |-- 19:29-INFO-profile : Getting configuration info from input file.
    - |-- 19:29-INFO-profile : Reading transcript file.
    - |-- 19:29-INFO-profile : Deleting duplicates in transcript-file.
    - |-- 19:29-INFO-profile : Checking how many transcripts where found in the transcript list.
    - |-- 19:29-INFO-profile : Keeping 804 transcript out of 833 in input transcript list.
    - |-- 19:29-DEBUG-profile : Color order : ['antisense', 'protein_coding', 'lincRNA']
    - |-- 19:29-DEBUG-profile : Profile color : ['#000000', '#00bb00', '#cccccc']
    - |-- 19:29-INFO-profile : Searching coverage columns.
    - |-- 19:29-INFO-profile : Melting.
    - |-- 19:29-INFO-profile : Ceiling
    - |-- 19:29-INFO-profile : Normalizing (ranging)
    - |-- 19:29-INFO-profile : Computing column ordering.
    - |-- 19:29-INFO-profile : Preparing diagram
    - |-- 19:29-INFO-profile : Theming and ordering. Please be patient...
    - |-- 19:29-INFO-profile : Preparing x axis
    - |-- 19:29-INFO-profile : facet_col 3
    - |-- 19:29-INFO-profile : Page width set to 9
    - |-- 19:29-INFO-profile : Page height set to 2.0
    - |-- 19:29-INFO-profile : Saving diagram to file : example_06.png
    - |-- 19:29-INFO-profile : Be patient. This may be long for large datasets.
    - |-- 19:29-DEBUG-profile : Deleting temp file : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_f2tvlc3s
    + |-- 0:57-DEBUG-profile : Using pandas version 1.1.2
    + |-- 0:57-DEBUG-profile : Pandas location /Users/puthier/anaconda3/envs/pygtftk_37/lib/python3.7/site-packages/pandas/__init__.py
    + |-- 0:57-DEBUG-profile : Using numpy version 1.19.2
    + |-- 0:57-DEBUG-profile : Pandas numpy /Users/puthier/anaconda3/envs/pygtftk_37/lib/python3.7/site-packages/numpy/__init__.py
    + |-- 0:57-DEBUG-profile : Using plotnine version 0.7.1
    + |-- 0:57-DEBUG-profile : Pandas plotnine /Users/puthier/anaconda3/envs/pygtftk_37/lib/python3.7/site-packages/plotnine/__init__.py
    + |-- 0:57-DEBUG-profile : Creating directory : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_8dfns9ml
    + |-- 0:57-DEBUG-profile : Uncompressing : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_8dfns9ml
    + |-- 0:57-DEBUG-profile : Reading : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_8dfns9ml/mini_real_promoter
    + |-- 0:57-INFO-profile : Getting configuration info from input file.
    + |-- 0:57-INFO-profile : Reading transcript file.
    + |-- 0:57-INFO-profile : Deleting duplicates in transcript-file.
    + |-- 0:57-INFO-profile : Checking how many transcripts where found in the transcript list.
    + |-- 0:57-INFO-profile : Keeping 804 transcript out of 833 in input transcript list.
    + |-- 0:57-DEBUG-profile : Color order : ['antisense', 'lincRNA', 'protein_coding']
    + |-- 0:57-DEBUG-profile : Profile color : ['#000000', '#00bb00', '#cccccc']
    + |-- 0:57-INFO-profile : Searching coverage columns.
    + |-- 0:57-INFO-profile : Melting.
    + |-- 0:57-INFO-profile : Ceiling
    + |-- 0:57-INFO-profile : Normalizing (ranging)
    + |-- 0:57-INFO-profile : Computing column ordering.
    + |-- 0:57-INFO-profile : Preparing diagram
    + |-- 0:57-INFO-profile : Theming and ordering. Please be patient...
    + |-- 0:57-INFO-profile : Preparing x axis
    + |-- 0:57-INFO-profile : facet_col 3
    + |-- 0:57-INFO-profile : Page width set to 9
    + |-- 0:57-INFO-profile : Page height set to 2.0
    + |-- 0:57-INFO-profile : Saving diagram to file : example_06.png
    + |-- 0:57-INFO-profile : Be patient. This may be long for large datasets.
    + |-- 0:57-DEBUG-profile : Deleting temp file : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_8dfns9ml
     
    _images/example_06.png
    $ gtftk profile -D -i mini_real_promoter.zip -g chrom -f bwig  -t tx_classes.txt -o profile_prom  -pf png -if  example_06b.png -V 2 -nm ranging
    - |-- 19:29-DEBUG-profile : Using pandas version 1.1.2
    - |-- 19:29-DEBUG-profile : Pandas location /Users/puthier/anaconda3/envs/pygtftk_dev37/lib/python3.7/site-packages/pandas/__init__.py
    - |-- 19:29-DEBUG-profile : Using numpy version 1.19.1
    - |-- 19:29-DEBUG-profile : Pandas numpy /Users/puthier/anaconda3/envs/pygtftk_dev37/lib/python3.7/site-packages/numpy/__init__.py
    - |-- 19:29-DEBUG-profile : Using plotnine version 0.7.1
    - |-- 19:29-DEBUG-profile : Pandas plotnine /Users/puthier/anaconda3/envs/pygtftk_dev37/lib/python3.7/site-packages/plotnine/__init__.py
    - |-- 19:29-DEBUG-profile : Creating directory : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_aovxheri
    - |-- 19:29-DEBUG-profile : Uncompressing : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_aovxheri
    - |-- 19:29-DEBUG-profile : Reading : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_aovxheri/mini_real_promoter
    - |-- 19:29-INFO-profile : Getting configuration info from input file.
    - |-- 19:29-DEBUG-profile : Color order : ['chr4', 'chr12', 'chr17', 'chr22', 'chr8', 'chr16', 'chr21', 'chr1', 'chr9', 'chr5', 'chr2', 'chr15', 'chr10', 'chrX', 'chr14', 'chr3', 'chr11', 'chr7', 'chr18', 'chr19', 'chr20', 'chr13', 'chr6']
    - |-- 19:29-DEBUG-profile : Profile color : ['#000000', '#6c007c', '#850096', '#2500a5', '#0000ca', '#0041dd', '#0086dd', '#009fca', '#00aaa1', '#00a76f', '#009c00', '#00bb00', '#00da00', '#00f900', '#88ff00', '#dbf400', '#f7db00', '#ffb500', '#ff6100', '#f60000', '#da0000', '#cc1313', '#cccccc']
    - |-- 19:29-INFO-profile : Searching coverage columns.
    - |-- 19:29-INFO-profile : Melting.
    - |-- 19:29-INFO-profile : Ceiling
    - |-- 19:29-INFO-profile : Normalizing (ranging)
    - |-- 19:44-INFO-profile : Computing column ordering.
    - |-- 19:44-INFO-profile : Preparing diagram
    - |-- 19:44-INFO-profile : Theming and ordering. Please be patient...
    - |-- 19:44-INFO-profile : Preparing x axis
    - |-- 19:44-INFO-profile : facet_col 3
    - |-- 19:44-INFO-profile : Page width set to 9
    - |-- 19:44-INFO-profile : Page height set to 2.0
    - |-- 19:44-INFO-profile : Saving diagram to file : example_06b.png
    - |-- 19:44-INFO-profile : Be patient. This may be long for large datasets.
    - |-- 19:44-DEBUG-profile : Deleting temp file : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_aovxheri
    + |-- 0:57-DEBUG-profile : Using pandas version 1.1.2
    + |-- 0:57-DEBUG-profile : Pandas location /Users/puthier/anaconda3/envs/pygtftk_37/lib/python3.7/site-packages/pandas/__init__.py
    + |-- 0:57-DEBUG-profile : Using numpy version 1.19.2
    + |-- 0:57-DEBUG-profile : Pandas numpy /Users/puthier/anaconda3/envs/pygtftk_37/lib/python3.7/site-packages/numpy/__init__.py
    + |-- 0:57-DEBUG-profile : Using plotnine version 0.7.1
    + |-- 0:57-DEBUG-profile : Pandas plotnine /Users/puthier/anaconda3/envs/pygtftk_37/lib/python3.7/site-packages/plotnine/__init__.py
    + |-- 0:57-DEBUG-profile : Creating directory : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_7ihvich5
    + |-- 0:57-DEBUG-profile : Uncompressing : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_7ihvich5
    + |-- 0:57-DEBUG-profile : Reading : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_7ihvich5/mini_real_promoter
    + |-- 0:57-INFO-profile : Getting configuration info from input file.
    + |-- 0:57-DEBUG-profile : Color order : ['chr11', 'chr15', 'chr2', 'chr22', 'chr9', 'chr16', 'chrX', 'chr18', 'chr3', 'chr14', 'chr17', 'chr4', 'chr1', 'chr6', 'chr5', 'chr12', 'chr13', 'chr19', 'chr20', 'chr8', 'chr10', 'chr21', 'chr7']
    + |-- 0:57-DEBUG-profile : Profile color : ['#000000', '#6c007c', '#850096', '#2500a5', '#0000ca', '#0041dd', '#0086dd', '#009fca', '#00aaa1', '#00a76f', '#009c00', '#00bb00', '#00da00', '#00f900', '#88ff00', '#dbf400', '#f7db00', '#ffb500', '#ff6100', '#f60000', '#da0000', '#cc1313', '#cccccc']
    + |-- 0:57-INFO-profile : Searching coverage columns.
    + |-- 0:57-INFO-profile : Melting.
    + |-- 0:57-INFO-profile : Ceiling
    + |-- 0:57-INFO-profile : Normalizing (ranging)
    + |-- 0:57-INFO-profile : Computing column ordering.
    + |-- 0:57-INFO-profile : Preparing diagram
    + |-- 0:57-INFO-profile : Theming and ordering. Please be patient...
    + |-- 0:57-INFO-profile : Preparing x axis
    + |-- 0:57-INFO-profile : facet_col 3
    + |-- 0:57-INFO-profile : Page width set to 9
    + |-- 0:57-INFO-profile : Page height set to 2.0
    + |-- 0:57-INFO-profile : Saving diagram to file : example_06b.png
    + |-- 0:57-INFO-profile : Be patient. This may be long for large datasets.
    + |-- 0:57-DEBUG-profile : Deleting temp file : /var/folders/zy/wl3dj2_n76zfc8sdvny1q06c0000gn/T/profile_matrix__pygtftk_7ihvich5
     
    _images/example_06b.png

    Note that facets may also be associated to epigenetic marks. In this case each the –group-by can be set to tx_classes or chrom.

    $ gtftk profile -D -i mini_real_tx.zip -g tx_classes -t tx_classes.txt -f bwig  -o profile_tx -pf png -if  example_07.png -w -nm ranging
    - |-- 19:44-WARNING-profile : PlotnineError error, can not show group number: Aesthetics {'ha'} specified two times.
    + |-- 0:57-WARNING-profile : PlotnineError error, can not show group number: Aesthetics {'ha'} specified two times.
     
    _images/example_07.png
    $ gtftk profile -D -i mini_real_tx.zip -g chrom -f bwig  -o profile_tx -pf png -if  example_08.png  -w -nm ranging
    - |-- 20:45-WARNING-profile : PlotnineError error, can not show group number: Aesthetics {'ha'} specified two times.
    + |-- 0:58-WARNING-profile : PlotnineError error, can not show group number: Aesthetics {'ha'} specified two times.
     
    _images/example_08.png @@ -462,13 +462,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/developers.html b/docs/developers.html index 0d66b043..6bce0256 100644 --- a/docs/developers.html +++ b/docs/developers.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Writing your own commands — gtftk 1.2.1 documentation + Writing your own commands — gtftk 1.2.3 documentation @@ -41,7 +41,7 @@

    Navigation

  • previous |
  • - + @@ -232,13 +232,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/editing.html b/docs/editing.html index c3aceec7..da2ac6a1 100644 --- a/docs/editing.html +++ b/docs/editing.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘Editing’ — gtftk 1.2.1 documentation + Commands from section ‘Editing’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -436,7 +436,7 @@

    discretize_key
    $ gtftk join_attr -i simple.gtf -j simple.join_mat -k gene_id -m | gtftk discretize_key -k S1 -d S1_d -n 2 -l A,B  | gtftk select_by_key -k feature -v gene
    - |-- 22:08-INFO-discretize_key : Categories: ['A', 'B']
    + |-- 0:58-INFO-discretize_key : Categories: ['A', 'B']
     chr1	gtftk	gene	125	138	.	+	.	gene_id "G0001";
     chr1	gtftk	gene	180	189	.	+	.	gene_id "G0002";
     chr1	gtftk	gene	50	61	.	-	.	gene_id "G0003"; S1 "0.2322"; S2 "0.4"; S1_d "A";
    @@ -455,7 +455,7 @@ 

    discretize_key diff --git a/docs/example_01.png b/docs/example_01.png index 08a728f5..9d6fe4a4 100644 Binary files a/docs/example_01.png and b/docs/example_01.png differ diff --git a/docs/example_05.png b/docs/example_05.png index 3f6c33d6..2937ac10 100644 Binary files a/docs/example_05.png and b/docs/example_05.png differ diff --git a/docs/example_06.png b/docs/example_06.png index 48f30732..79f3e737 100644 Binary files a/docs/example_06.png and b/docs/example_06.png differ diff --git a/docs/example_06b.png b/docs/example_06b.png index eb7ed09e..3cbeb7c0 100644 Binary files a/docs/example_06b.png and b/docs/example_06b.png differ diff --git a/docs/example_08.png b/docs/example_08.png index a2a823dd..861d1066 100644 Binary files a/docs/example_08.png and b/docs/example_08.png differ diff --git a/docs/example_13.png b/docs/example_13.png index 67fc2186..1fde3047 100644 Binary files a/docs/example_13.png and b/docs/example_13.png differ diff --git a/docs/example_pa_01.pdf b/docs/example_pa_01.pdf index 6ec89d45..54ea2927 100644 Binary files a/docs/example_pa_01.pdf and b/docs/example_pa_01.pdf differ diff --git a/docs/example_pa_02.pdf b/docs/example_pa_02.pdf index 83a94b5b..ec941371 100644 Binary files a/docs/example_pa_02.pdf and b/docs/example_pa_02.pdf differ diff --git a/docs/example_pa_03.pdf b/docs/example_pa_03.pdf index 82ab4365..379b27d2 100644 Binary files a/docs/example_pa_03.pdf and b/docs/example_pa_03.pdf differ diff --git a/docs/example_pa_04.pdf b/docs/example_pa_04.pdf index d7896cc3..8b24f3fa 100644 Binary files a/docs/example_pa_04.pdf and b/docs/example_pa_04.pdf differ diff --git a/docs/genindex.html b/docs/genindex.html index 7f10a29d..265aff0d 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Index — gtftk 1.2.1 documentation + Index — gtftk 1.2.3 documentation @@ -37,7 +37,7 @@

    Navigation

  • modules |
  • - +
    @@ -710,13 +710,13 @@

    Navigation

  • modules |
  • - + diff --git a/docs/gtftk_args.html b/docs/gtftk_args.html index 4c61c4fd..5bfd8691 100644 --- a/docs/gtftk_args.html +++ b/docs/gtftk_args.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Help on gtftk Unix commands — gtftk 1.2.1 documentation + Help on gtftk Unix commands — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -298,13 +298,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/index.html b/docs/index.html index 4492e7a0..e5b2369c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -18,7 +18,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Welcome to pygtftk documentation page — gtftk 1.2.1 documentation + Welcome to pygtftk documentation page — gtftk 1.2.3 documentation @@ -43,7 +43,7 @@

    Navigation

  • next |
  • - + @@ -136,6 +136,9 @@

    Table of contentCommands from section ‘ologram’ + +
  • Details diff --git a/docs/information.html b/docs/information.html index 952594b9..50c060c1 100644 --- a/docs/information.html +++ b/docs/information.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘information’ — gtftk 1.2.1 documentation + Commands from section ‘information’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -69,10 +69,10 @@

    apropos
    $ gtftk apropos -k promoter
    - |-- 22:08-INFO-apropos : >> Keyword 'promoter' was found in the following command:
    + |-- 0:58-INFO-apropos : >> Keyword 'promoter' was found in the following command:
    +	- ologram.
     	- coverage.
     	- divergent.
    -	- ologram.
     

    Arguments:

    @@ -738,13 +738,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/installation.html b/docs/installation.html index 75cc65c5..8433ed07 100644 --- a/docs/installation.html +++ b/docs/installation.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Installing pygtftk/gtftk — gtftk 1.2.1 documentation + Installing pygtftk/gtftk — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -109,13 +109,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/merge_ologram_stats_01.pdf b/docs/merge_ologram_stats_01.pdf index 72a77908..5d5e6e6e 100644 Binary files a/docs/merge_ologram_stats_01.pdf and b/docs/merge_ologram_stats_01.pdf differ diff --git a/docs/miscellaneous.html b/docs/miscellaneous.html index b6239361..62c9af39 100644 --- a/docs/miscellaneous.html +++ b/docs/miscellaneous.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘miscellaneous’ — gtftk 1.2.1 documentation + Commands from section ‘miscellaneous’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -69,13 +69,13 @@

    control_list
    $ gtftk control_list -i mini_real_counts_ENCFF630HEX.tsv -r mini_real_control_1.txt -D -V 2 -s -l -p 1 -ju -if example_13.png -pf png
    - |-- 9:11-INFO-control_list : 0 duplicate lines have been deleted in reference file.
    - |-- 9:11-INFO-control_list : Found 50 genes of the reference in the provided signal file
    - |-- 9:11-INFO-control_list : All reference genes were found.
    - |-- 9:11-INFO-control_list : Searching for genes with matched signal.
    - |-- 9:11-INFO-control_list : Preparing a dataframe for plotting.
    - |-- 9:11-INFO-control_list : Saving diagram to file : example_13.png
    - |-- 9:11-INFO-control_list : Be patient. This may be long for large datasets.
    + |-- 0:59-INFO-control_list : 0 duplicate lines have been deleted in reference file.
    + |-- 0:59-INFO-control_list : Found 50 genes of the reference in the provided signal file
    + |-- 0:59-INFO-control_list : All reference genes were found.
    + |-- 0:59-INFO-control_list : Searching for genes with matched signal.
    + |-- 0:59-INFO-control_list : Preparing a dataframe for plotting.
    + |-- 0:59-INFO-control_list : Saving diagram to file : example_13.png
    + |-- 0:59-INFO-control_list : Be patient. This may be long for large datasets.
     
    _images/example_13.png @@ -237,13 +237,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/objects.inv b/docs/objects.inv index ddc2b754..899cd31f 100644 Binary files a/docs/objects.inv and b/docs/objects.inv differ diff --git a/docs/ologram.html b/docs/ologram.html index ae181b21..9a08744c 100644 --- a/docs/ologram.html +++ b/docs/ologram.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘ologram’ — gtftk 1.2.1 documentation + Commands from section ‘ologram’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -98,12 +98,12 @@

    ologram

    Warning

    -

    The ologram examples below use 8 CPUs. Please adapt.

    +

    The ologram examples below use 8 CPUs. Please adapt the number of threads.

    Example: Perform a basic annotation. We are searching whether H3K4me3 peaks tends to be enriched in some specific genomic elements. The bars in the bar plot diagram will be ordered according to ‘summed_bp_overlaps_pvalue’.

    $ gtftk ologram -i hg38_chr1.gtf.gz -p ENCFF112BHN_H3K4me3_chr1.bed -c hg38_chr1.genome -u 1500 -d 1500 -D  -pf example_pa_01.pdf -k 8 -j summed_bp_overlaps_pvalue
    - |-- 9:11-WARNING-ologram : Using only 8 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
    - |-- 9:11-WARNING-ologram : Computing log(p-val) for a Neg Binom with mean >= var ; var was set to mean+1 (start_codon)
    + |-- 0:59-WARNING-ologram : Using only 8 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
    + |-- 1:00-WARNING-ologram : Computing log(p-val) for a Neg Binom with mean >= var ; var was set to mean+1 (start_codon)
     

    @@ -118,7 +118,7 @@

    ologram
    $ gtftk select_by_key -i mini_real.gtf.gz -k gene_biotype -v protein_coding,lincRNA,antisense,processed_transcript  |  gtftk ologram  -m gene_biotype -p ENCFF112BHN_H3K4me3_K562_sub.bed -c hg38 -D -n  -pf example_pa_02.pdf -k 8 -j summed_bp_overlaps_pvalue
    - |-- 9:12-WARNING-ologram : Using only 8 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
    + |-- 1:00-WARNING-ologram : Using only 8 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
     

    @@ -137,8 +137,8 @@

    ologram
    $ gtftk join_attr -i mini_real.gtf.gz -H -j mini_real_counts_ENCFF630HEX.tsv -k gene_name -n exprs -t exon | gtftk discretize_key -k exprs -p -d exprs_class -n 6  -u | gtftk ologram -p ENCFF119BYM_H3K36me3_K562_sub.bed -c hg38 -D -n -m exprs_class -pf example_pa_03.pdf -k 8 -j summed_bp_overlaps_pvalue
    - |-- 9:13-INFO-discretize_key : Categories: ['[0.0_183.0]', '(183.0_549.0]', '(549.0_1018.0]', '(1018.0_1631.0]', '(1631.0_3139.0]', '(3139.0_41703.0]']
    - |-- 9:13-WARNING-ologram : Using only 8 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
    + |-- 1:02-INFO-discretize_key : Categories: ['[0.0_183.0]', '(183.0_549.0]', '(549.0_1018.0]', '(1018.0_1631.0]', '(1631.0_3139.0]', '(3139.0_41703.0]']
    + |-- 1:01-WARNING-ologram : Using only 8 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
     

    @@ -153,8 +153,8 @@

    ologram
    $ gtftk add_exon_nb -k exon_nbr -i mini_real.gtf.gz | gtftk discretize_key -p -d exon_nbr_cat -n 5  -k exon_nbr | gtftk ologram -p ENCFF112BHN_H3K4me3_K562_sub.bed -c hg38 -D -n -m exon_nbr_cat -pf example_pa_04.pdf -k 8 -j summed_bp_overlaps_pvalue
    - |-- 9:14-INFO-discretize_key : Categories: ['[1.0_2.0]', '(2.0_4.0]', '(4.0_6.0]', '(6.0_12.0]', '(12.0_107.0]']
    - |-- 9:14-WARNING-ologram : Using only 8 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
    + |-- 1:03-INFO-discretize_key : Categories: ['[1.0_2.0]', '(2.0_4.0]', '(4.0_6.0]', '(6.0_12.0]', '(12.0_107.0]']
    + |-- 1:03-WARNING-ologram : Using only 8 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
     

    @@ -174,27 +174,21 @@

    ologram

    ologram (multiple overlaps)

    -

    While previously we computed paiwise enrichment (ie. Query+A, Query+B …) , It is also possible to use the OLOGRAM-MODL Multiple Overlap Dictionary Learning) plugin to find multiple overlaps (ie. between n>=2 sets) enrichment (ie. Query+A+B, Query+A+C, …) in order to highlight combinations of genomic regions, such as Transcriptional Regulator complexes.

    +

    While previously we computed paiwise enrichment (ie. Query+A, Query+B, …) , it is also possible to use the OLOGRAM-MODL Multiple Overlap Dictionary Learning) plugin to find multiple overlaps (ie. between n>=2 sets) enrichment (ie. Query+A+B, Query+A+C, …) in order to highlight combinations of genomic regions, such as Transcriptional Regulator complexes.

    This is done only on custom regions supplied as BEDs supplied with the –more-bed argument. In most cases you may use the –no-gtf argument and only pass the regions of interest.

    -

    For statistical reasons, we recommend shuffling across a relevant subsection of the genome only (ie. enhancers only) using –bed-excl or –bed-incl to ensure the longer combinations have a reasonable chance of being randomly encountered in the shuffles.

    -

    MODL itemset mining algorithm: By default, OLOGRAM-MODL will compute the enrichment of all n-wise combinations that are encountered in the real data it was passed. This however can add up to 2**N combinations and make the result hard to read. Furthermore, in biological data noise is a real problem and can obscure the relevant combinations.

    -

    As such, we also give the option to use a custom itemset mining algorithm on the true overlaps to identify interesting combinations.

    -

    In broad strokes, this custom algorithm MODL (Multiple Overlap Dictionary Learning) will perform many matrix factorizations on the matrix of true overlaps to identify relevant correlation groups of genomic regions. Then a greedy algorithm based on how much these words improve the reconstruction will select the utmost best words. MODL is only used to filter the output of OLOGRAM : once it returns a list of interesting combination, OLOGRAM will compute their enrichment as usual, but for them only. Each combination is of the form [Query + A + B + C] where A, B and C are BED files given as –more-bed. You can also manually specify the combinations to be studied with the format defined in OLOGRAM notes (below).

    -

    Unlike classical association rules mining algorithms, this focuses on mining relevant bio complexes/clusters and correlation groups (item sets), and you should not request more than 20-30 combinations. As a matrix factorization based algorithm, it is designed to be resistant -to noise which is a known problem in biological data. Its goal is to extract meaningful frequent combinations from noisy data. As a result however, it is biased in favor of the most abundant combinations in the data, and may return correlation groups if you ask for too few words (ie. if AB, BC and AC are complexes, ABC might be returned).

    -

    This itemset mining algorithm is a work-in-progress. Whether you use MODL will not change the results for each combination, it only changes which combinations are displayed. If you want the enrichment of all combinations, ignore it. To use MODL, use the –multiple-overlap-max-number-of-combinations argument.

    +

    For statistical reasons, we recommend shuffling across a relevant subsection of the genome only (ie. enhancers only) using –bed-excl or –bed-incl to ensure the longer combinations have a reasonable chance of being randomly encountered in the shuffles. Conversely, if you do not filter the combinations, keep in mind that the longer ones may be enriched even though they are present only on a few base pairs, because at random they would be even rarer.

    Exact combinations: By default, OLOGRAM will compute “inexact” combinations, meaning that when encountering an overlap of [Query + A + B + C] it will count towards [A + B + …]. For exact intersections (ie. [Query + A + B + nothing else]), set the –multiple-overlap-target-combi-size flag to the number of –more-bed plus one. You will know if the combinations are computed as inexact by the ‘…’ in their name in the result file. Intersections not including the query file are discarded.

    Simple example:

    Comparing the query (-p) against two other BED files, analyzing multiple overlaps.

    $ gtftk ologram -z -w -q -c simple_07.chromInfo -p simple_07_peaks.bed --more-bed simple_07_peaks.1.bed simple_07_peaks.2.bed --more-bed-multiple-overlap
    - |-- 9:15-WARNING : Converting to bed6 format (simple_07_peaks.bed).
    - |-- 9:15-WARNING : Converting to bed6 format (simple_07_peaks.1.bed).
    - |-- 9:15-WARNING : Converting to bed6 format (simple_07_peaks.2.bed).
    - |-- 9:15-WARNING-ologram : Using only 1 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
    - |-- 9:15-WARNING-ologram : --more-bed-labels was not set, automatically defaulting to --more-bed file names.
    - |-- 9:15-WARNING-ologram : [Query + simple_07_peaks_1 + ... ]: there may be a poor fit for this feature. Check fit quality in the results. This is likely due to there being too few regions.
    - |-- 9:15-WARNING-ologram : [Query + simple_07_peaks_1 + simple_07_peaks_2 + ... ]: there may be a poor fit for this feature. Check fit quality in the results. This is likely due to there being too few regions.
    - |-- 9:15-WARNING-ologram : Computing log(p-val) for a Neg Binom with mean >= var ; var was set to mean+1 ([Query + simple_07_peaks_1 + simple_07_peaks_2 + ... ])
    + |-- 1:03-WARNING : Converting to bed6 format (simple_07_peaks.bed).
    + |-- 1:03-WARNING : Converting to bed6 format (simple_07_peaks.1.bed).
    + |-- 1:03-WARNING : Converting to bed6 format (simple_07_peaks.2.bed).
    + |-- 1:03-WARNING-ologram : Using only 1 threads, but 16 cores are available. Consider changing the --nb-threads parameter.
    + |-- 1:03-WARNING-ologram : --more-bed-labels was not set, automatically defaulting to --more-bed file names.
    + |-- 1:03-WARNING-ologram : [Query + simple_07_peaks_1 + ... ]: there may be a poor fit for this feature. Check fit quality in the results. This is likely due to there being too few regions.
    + |-- 1:03-WARNING-ologram : [Query + simple_07_peaks_1 + simple_07_peaks_2 + ... ]: there may be a poor fit for this feature. Check fit quality in the results. This is likely due to there being too few regions.
    + |-- 1:03-WARNING-ologram : Computing log(p-val) for a Neg Binom with mean >= var ; var was set to mean+1 ([Query + simple_07_peaks_1 + simple_07_peaks_2 + ... ])
     

    Detailed example:

    @@ -222,13 +216,18 @@

    ologram (multiple overlaps) +

    Details

    +

    In broad strokes, the custom itemset algorithm MODL (Multiple Overlap Dictionary Learning) will perform many matrix factorizations on the matrix of true overlaps to identify relevant correlation groups of genomic regions. Then a greedy algorithm based on how much these words improve the reconstruction will select the utmost best words. MODL is only used to filter the output of OLOGRAM : once it returns a list of interesting combination, OLOGRAM will compute their enrichment as usual, but for them only. Each combination is of the form [Query + A + B + C] where A, B and C are BED files given as –more-bed. You can also manually specify the combinations to be studied with the format defined in OLOGRAM notes (below).

    +

    Unlike classical association rules mining algorithms, this focuses on mining relevant bio complexes/clusters and correlation groups (item sets), and you should not request more than 20-30 combinations. As a matrix factorization based algorithm, it is designed to be resistant +to noise which is a known problem in biological data. Its goal is to extract meaningful frequent combinations from noisy data. As a result however, it is biased in favor of the most abundant combinations in the data, and may return correlation groups if you ask for too few words (ie. if AB, BC and AC are complexes, ABC might be returned).

    +

    This itemset mining algorithm is a work-in-progress. Whether you use MODL will not change the results for each combination, it only changes which combinations are displayed. If you want the enrichment of all combinations, ignore it. To use MODL, use the –multiple-overlap-max-number-of-combinations argument.

    MODL algorithm API: MODL can also be used independantly as a combination mining algorithm.

    -

    This can work on any type of data, biological or not, that respects the conventional formatting for lists of transactions: the data needs to be a matrix with one line per transaction and one column per element.

    -

    For example, if you have three possible elements A, B and C, a line of [1,0,1] means a transaction containing A and C.

    -

    For a factor allowance of k and n final queried words, the matrix will be rebuilt with k*n words in step 1. -factor allowance is K in K*n words in step 1 where n is final queries nb of words.

    -

    MODL and will discard combinations rarer than 1/10000 occurences to reduce computing times and will also reduce the abundance of all unique lines in the matrix to their square roots to reduce the emphasis on the most frequent elements. -However, this can magnify the impact of the noise quadratically as well, and can be disabled when using the manual API.

    +

    This can work on any type of data, biological or not, that respects the conventional formatting for lists of transactions: the data needs to be a matrix with one line per transaction and one column per element. For example, if you have three possible elements A, B and C, a line of [1,0,1] means a transaction containing A and C.

    +

    For a factor allowance of k and n final queried words, the matrix will be rebuilt with k*n words in step 1. MODL will discard combinations rarer than 1/10000 occurences to reduce computing times. It will also reduce the abundance of all unique lines in the matrix to their square roots to reduce the emphasis on the most frequent elements. However, the latter can magnify the impact of the noise as well and can be disabled when using the manual API. To de-emphasize longer words, which can help in this case, we can also normalize words by their summed square in step 2.

    If you are passing a custom error function, it must have the signature error_function(X_true, X_rebuilt, code). X_true is the real data, X_rebuilt is the reconstruction to evaluate, and code is the encoded version which in our case is used to assess sparsity. All are NumPY matrices.

    For more details, see code comments.

    Here is an example:

    @@ -244,11 +243,12 @@

    ologram (multiple overlaps)nb_threads = 1, step_1_factor_allowance = 2, # How many words to ask for in each step 1 rebuilding, as a multiplier of multiple_overlap_max_number_of_combinations error_function = None, # Custom error function in step 2 - smother = True) # Should the smothering (quadratic reduction of abundance) be applied ? + smother = True, # Should the smothering (quadratic reduction of abundance) be applied ? + normalize_words = False) # Normalize words by their summed squared in step 2 ? interesting_combis = combi_miner.find_interesting_combinations() -

    For more details about usage and implementation, please read the notes below :

    +

    For more details about usage and implementation, please read the notes below.

    Arguments:

    $ gtftk ologram -h
       Usage: gtftk ologram [-i GTF] [-c TXT] -p BED [-b [more_bed [more_bed ...]]] [-l more_bed_labels] [-e BED] [-bi BED] [-u upstream] [-d downstream] [-m more_keys] [-n] [-mo] [-mocs multiple_overlap_target_combi_size] [-monc multiple_overlap_max_number_of_combinations] [-moc multiple_overlap_custom_combis] [-k nb_threads] [-s seed] [-mn minibatch_nb] [-ms minibatch_size] [-ma] [-o DIR] [-pw pdf_width] [-ph pdf_height] [-pf pdf_file_alt] [-x] [-y] [-tp tsv_file_path] [-j {None,nb_intersections_expectation_shuffled,nb_intersections_variance_shuffled,nb_intersections_negbinom_fit_quality,nb_intersections_log2_fold_change,nb_intersections_true,nb_intersections_pvalue,summed_bp_overlaps_expectation_shuffled,summed_bp_overlaps_variance_shuffled,summed_bp_overlaps_negbinom_fit_quality,summed_bp_overlaps_log2_fold_change,summed_bp_overlaps_true,summed_bp_overlaps_pvalue}] [-z] [-f] [-w] [-q] [-h] [-V [verbosity]] [-D] [-C] [-K tmp_dir] [-A] [-L logger_file] [-W write_message_to_file]
    @@ -336,11 +336,14 @@ 

    ologram (multiple overlaps)
    import pybedtools
    +import numpy as np
    +from pygtftk.stats.intersect.overlap_stats_compute import compute_true_intersection
    +
    +# Register the BED files as pybedtools.BedTool objects
    +bedA = pybedtools.BedTool(path_to_your_query)
    +bedsB = [pybedtools.BedTool(bedfilepath) for bedfilepath in list_of_all_paths_to_more_bed]
    +
    +# Use our custom intersection computing algorithm to get the matrix of overlaps
    +true_intersection = compute_true_intersection(bedA, bedsB)
    +flags_matrix = np.array([i[3] for i in true_intersection])
    +

    +

    The resulting flags_matrix is a NumPy array that can be edited, and on which MODL can be run.

    +

    Since the results of MODL only depend on the true intersections and not on the shuffles, you can run MODL with 1 shuffle or on a manually computed matrix as above to pre-select interesting combinations, and then run the full analysis on many shuffles. We then recommend selecting the combinations that interest you in the resulting tsv file, using MODL’s selection as a starting point and adding or removing some combinations based on your own needs (eg. adding all the highest fold changes, or all particular combinations containing the Transcription Factor X that you are studying).

    ologram_merge_stats

    Description: Several tsv files resulting from OLOGRAM analyses can be merged into a single diagram report using the merge_ologram_stats.

    @@ -469,9 +486,10 @@

    ologram_merge_stats

    ologram_modl_treeify

    -

    Description: Visualize n-wise enrichment results (OLOGRAM-MODL) as a tree of combinations. Works on the result (tsv file) of an OLOGRAM analysis called with –more-bed-multiple-overlap.

    -

    We recommend this representation. The tsv file can be edited before passing it to the command, for example by keeping only the combinations you are interested in.

    -

    On the graph, S designated the total number of basepairs in which this combinations is encountered in the real data. Fold change gives the ratio with the number of basepairs in the shuffles, with the associated Negative Binomial p-value.

    +

    Description: Visualize n-wise enrichment results (OLOGRAM-MODL) as a tree of combinations. Works on the result (tsv file) of an OLOGRAM analysis called with –more-bed-multiple-overlap. On the graph, S designated the total number of basepairs in which this combinations is encountered in the real data. Fold change gives the ratio with the number of basepairs in the shuffles, with the associated Negative Binomial p-value.

    +

    This recommended representation is useful to find master regulators, by showing which additions to a combinations increase its enrichment, and allowing to see whether overlaps that contain the element X also contain the element Y (looking at how a child combination accounts for the S of its parent in an inexact counting).

    +

    The tsv result file can be edited before passing it to the command, for example by keeping only the combinations you are interested in, such as all combinations containing the Transcription Factor you are studying. We recommend running MODL to make a pre-selection.

    +

    We also recommend discarding the rarest combinations found on such a very small number of basepairs that they are unlikely tobe biologically significant. This is mostly relevant when you have many sets (k >= 5) since longer combinations will often be enriched through sheer unlikelihood.

    $ gtftk ologram_modl_treeify -i multiple_overlap_trivial_ologram_stats.tsv -o treeified.pdf -l ThisWasTheNameOfTheQuery
     
    @@ -582,6 +600,9 @@

    Table of Contents

  • Commands from section ‘ologram’ +
  • +
  • Details
  • diff --git a/docs/py-modindex.html b/docs/py-modindex.html index 735c83d4..8cce10d1 100644 --- a/docs/py-modindex.html +++ b/docs/py-modindex.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Python Module Index — gtftk 1.2.1 documentation + Python Module Index — gtftk 1.2.3 documentation @@ -40,7 +40,7 @@

    Navigation

  • modules |
  • - +
    @@ -164,13 +164,13 @@

    Navigation

  • modules |
  • - + diff --git a/docs/search.html b/docs/search.html index 4b2206ed..67673be6 100644 --- a/docs/search.html +++ b/docs/search.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Search — gtftk 1.2.1 documentation + Search — gtftk 1.2.3 documentation @@ -42,7 +42,7 @@

    Navigation

  • modules |
  • - + @@ -93,13 +93,13 @@

    Navigation

  • modules |
  • - + diff --git a/docs/searchindex.js b/docs/searchindex.js index e2960733..6e9bcfb1 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["about","annotation","api","bwig_coverage","conversion","coordinates","coverage","developers","editing","gtftk_args","index","information","installation","miscellaneous","ologram","selection","sequence"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,"sphinx.ext.todo":2,sphinx:56},filenames:["about.rst","annotation.rst","api.rst","bwig_coverage.rst","conversion.rst","coordinates.rst","coverage.rst","developers.rst","editing.rst","gtftk_args.rst","index.rst","information.rst","installation.rst","miscellaneous.rst","ologram.rst","selection.rst","sequence.rst"],objects:{"pygtftk.Line":{FastaSequence:[2,1,1,""],Feature:[2,1,1,""],FieldSet:[2,1,1,""]},"pygtftk.Line.FastaSequence":{format:[2,2,1,""],write:[2,2,1,""]},"pygtftk.Line.Feature":{add_attr:[2,2,1,""],add_attr_and_write:[2,2,1,""],format:[2,2,1,""],format_tab:[2,2,1,""],from_list:[2,2,1,""],get_3p_end:[2,2,1,""],get_5p_end:[2,2,1,""],get_attr_names:[2,2,1,""],get_attr_value:[2,2,1,""],get_gn_id:[2,2,1,""],get_tx_id:[2,2,1,""],set_attr:[2,2,1,""],write:[2,2,1,""],write_bed:[2,2,1,""],write_bed_3p_end:[2,2,1,""],write_bed_5p_end:[2,2,1,""],write_gtf_to_bed6:[2,2,1,""]},"pygtftk.Line.FieldSet":{format:[2,2,1,""],write:[2,2,1,""]},"pygtftk.bwig":{bw_coverage:[3,0,0,"-"]},"pygtftk.bwig.bw_coverage":{bw_cov_mp:[3,3,1,""],bw_profile_mp:[3,3,1,""],make_tmp_file_pool:[3,3,1,""]},"pygtftk.fasta_interface":{FASTA:[2,1,1,""]},"pygtftk.fasta_interface.FASTA":{transcript_as_bioseq_records:[2,2,1,""],write:[2,2,1,""]},"pygtftk.gtf_interface":{GTF:[2,1,1,""]},"pygtftk.gtf_interface.GTF":{add_attr_column:[2,2,1,""],add_attr_from_dict:[2,2,1,""],add_attr_from_file:[2,2,1,""],add_attr_from_list:[2,2,1,""],add_attr_from_matrix_file:[2,2,1,""],add_attr_to_pos:[2,2,1,""],add_exon_number:[2,2,1,""],add_prefix:[2,2,1,""],convert_to_ensembl:[2,2,1,""],del_attr:[2,2,1,""],eval_numeric:[2,2,1,""],extract_data:[2,2,1,""],extract_data_iter_list:[2,2,1,""],get_3p_end:[2,2,1,""],get_5p_end:[2,2,1,""],get_attr_list:[2,2,1,""],get_attr_value_list:[2,2,1,""],get_chroms:[2,2,1,""],get_feature_list:[2,2,1,""],get_feature_size:[2,2,1,""],get_gn_ids:[2,2,1,""],get_gn_strand:[2,2,1,""],get_gn_to_tx:[2,2,1,""],get_gname_to_tx:[2,2,1,""],get_intergenic:[2,2,1,""],get_introns:[2,2,1,""],get_midpoints:[2,2,1,""],get_sequences:[2,2,1,""],get_transcript_size:[2,2,1,""],get_tss:[2,2,1,""],get_tts:[2,2,1,""],get_tx_ids:[2,2,1,""],get_tx_strand:[2,2,1,""],get_tx_to_gn:[2,2,1,""],get_tx_to_gname:[2,2,1,""],head:[2,2,1,""],is_defined:[2,2,1,""],is_set:[2,2,1,""],merge_attr:[2,2,1,""],message:[2,2,1,""],nb_exons:[2,2,1,""],nrow:[2,2,1,""],select_5p_transcript:[2,2,1,""],select_by_key:[2,2,1,""],select_by_loc:[2,2,1,""],select_by_max_exon_nb:[2,2,1,""],select_by_number_of_exons:[2,2,1,""],select_by_positions:[2,2,1,""],select_by_regexp:[2,2,1,""],select_by_transcript_size:[2,2,1,""],select_longuest_transcripts:[2,2,1,""],select_shortest_transcripts:[2,2,1,""],tail:[2,2,1,""],to_bed:[2,2,1,""],write:[2,2,1,""],write_bed:[2,2,1,""]},"pygtftk.stats":{beta:[2,0,0,"-"],negbin_fit:[2,0,0,"-"]},"pygtftk.stats.beta":{BetaCalculator:[2,1,1,""]},"pygtftk.stats.beta.BetaCalculator":{beta:[2,2,1,""],betainc:[2,2,1,""],betaincreg:[2,2,1,""],contfractbeta:[2,2,1,""]},"pygtftk.stats.intersect":{overlap_stats_compute:[2,0,0,"-"],overlap_stats_shuffling:[2,0,0,"-"]},"pygtftk.stats.intersect.modl":{dict_learning:[2,0,0,"-"],subroutines:[2,0,0,"-"],tree:[2,0,0,"-"]},"pygtftk.stats.intersect.modl.dict_learning":{Modl:[2,1,1,""],squish_matrix:[2,3,1,""],test_data_for_modl:[2,3,1,""]},"pygtftk.stats.intersect.modl.dict_learning.Modl":{find_interesting_combinations:[2,2,1,""],generate_candidate_words:[2,2,1,""],select_best_words_from_library:[2,2,1,""]},"pygtftk.stats.intersect.modl.subroutines":{build_best_dict_from_library:[2,3,1,""],generate_candidate_words:[2,3,1,""],learn_dictionary_and_encode:[2,3,1,""]},"pygtftk.stats.intersect.modl.tree":{Library:[2,1,1,""],Node:[2,1,1,""],apply_recursively_to_all_nodes:[2,3,1,""],get_all_candidates_except:[2,3,1,""],output_visualize:[2,3,1,""]},"pygtftk.stats.intersect.modl.tree.Library":{build_nodes_for_words:[2,2,1,""],build_nodes_for_words_from_ologram_result_df:[2,2,1,""]},"pygtftk.stats.intersect.overlap_stats_compute":{ComputingStatsCombiPartial:[2,1,1,""],DictionaryWithIndex:[2,1,1,""],compute_stats_for_intersection:[2,3,1,""],compute_true_intersection:[2,3,1,""],stats_multiple_overlap:[2,3,1,""],stats_single:[2,3,1,""],which_combis_to_get_from:[2,3,1,""]},"pygtftk.stats.intersect.overlap_stats_compute.DictionaryWithIndex":{get_simple_concatenation:[2,2,1,""]},"pygtftk.stats.intersect.overlap_stats_shuffling":{ComputingIntersectionPartial:[2,1,1,""],compute_all_intersections_minibatch:[2,3,1,""],compute_overlap_stats:[2,3,1,""]},"pygtftk.stats.negbin_fit":{check_negbin_adjustment:[2,3,1,""],empirical_p_val:[2,3,1,""],negbin_pval:[2,3,1,""]},"pygtftk.tab_interface":{TAB:[2,1,1,""]},"pygtftk.tab_interface.TAB":{as_data_frame:[2,2,1,""],as_simple_list:[2,2,1,""],iter_as_list:[2,2,1,""],iterate_with_header:[2,2,1,""],write:[2,2,1,""]},"pygtftk.utils":{GTFtkError:[2,4,1,""],GTFtkInteractiveError:[2,4,1,""],add_prefix_to_file:[2,3,1,""],check_boolean_exprs:[2,3,1,""],check_file_or_dir_exists:[2,3,1,""],check_r_installed:[2,3,1,""],check_r_packages:[2,3,1,""],chomp:[2,3,1,""],chrom_info_as_dict:[2,3,1,""],chrom_info_to_bed_file:[2,3,1,""],close_properly:[2,3,1,""],flatten_list:[2,3,1,""],flatten_list_recur:[2,3,1,""],get_example_feature:[2,3,1,""],get_example_file:[2,3,1,""],head_file:[2,3,1,""],intervals:[2,3,1,""],is_comment:[2,3,1,""],is_empty:[2,3,1,""],is_exon:[2,3,1,""],is_fasta_header:[2,3,1,""],make_outdir_and_file:[2,3,1,""],make_tmp_dir:[2,3,1,""],make_tmp_file:[2,3,1,""],median_comp:[2,3,1,""],message:[2,3,1,""],mkdir_p:[2,3,1,""],nested_dict:[2,3,1,""],random_string:[2,3,1,""],rnd_alpha_numeric_string:[2,3,1,""],silentremove:[2,3,1,""],simple_line_count:[2,3,1,""],simple_nb_column:[2,3,1,""],sort_2_lists:[2,3,1,""],tab_line:[2,3,1,""],to_alphanum:[2,3,1,""],to_list:[2,3,1,""],write_properly:[2,3,1,""]},pygtftk:{Line:[2,0,0,"-"],fasta_interface:[2,0,0,"-"],gtf_interface:[2,0,0,"-"],tab_interface:[2,0,0,"-"],utils:[2,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"],"4":["py","exception","Python exception"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function","4":"py:exception"},terms:{"000000":6,"0000ca":6,"0003700":15,"0025":6,"0041dd":6,"0050789":15,"0086dd":6,"0097194":15,"009c00":6,"009fca":6,"00_ologram_stat":14,"00a76f":6,"00aaa1":6,"00bb00":6,"00da00":6,"00f900":6,"012":2,"01k":1,"01k_d0":1,"062994":6,"068393":6,"0_1018":14,"0_107":14,"0_12":14,"0_1631":14,"0_183":14,"0_2":14,"0_3139":14,"0_4":14,"0_41703":14,"0_549":14,"0_6":14,"100":[2,6,14],"1000":[2,6,14],"10000":[2,14],"100000":2,"1000000000":[2,15],"1001145":2,"1006138":6,"101":9,"1018":14,"106":5,"107":[8,11,15],"110":8,"112":8,"114":8,"115":[5,8],"116":[5,8,11,15],"123":2,"124":[2,4,5,16],"125":[0,2,4,5,8,11,15,16],"128":5,"130":[4,8,11],"132":[4,8],"1370156":6,"13746":11,"138":[0,2,4,5,8,11,15,16],"1380157":6,"138_":2,"1500":[1,2,14],"159":15,"1631":14,"173583":6,"175":5,"176":[8,11,15],"179":[4,5,16],"180":[0,2,5,8,11,15],"182":[8,15],"183":14,"184":8,"18400":2,"18545":11,"18581":11,"186":[8,11,15],"189":[0,4,5,8,11,15],"1914257":6,"1924258":6,"1948":[2,14],"199":2,"1996":2,"1997":2,"1998":[2,6],"1999":2,"1999432787236828e":2,"1e2":2,"200":[2,6,15],"2000":2,"2007":2,"2013":1,"2017":11,"2018":[0,1,4,5,6,8,11,13,15],"2019":14,"2020":14,"20328":6,"209":5,"210":[2,8,11,15],"211":8,"213":8,"214":[8,15],"220":[8,15],"2202732":6,"2212733":6,"222":[5,8,11,15],"222_":2,"227377":6,"2322":[2,8],"2493646":6,"250":2,"2500a5":6,"2503647":6,"272973":6,"29348345":2,"300":[2,5,6,13],"3064167":6,"307":2,"3074168":6,"3139":14,"331067":6,"3630449":6,"3640450":6,"374963":6,"3rd":[2,5,11],"4218580000000003":6,"436956":6,"439256":6,"456":2,"49815":11,"4th":[2,6],"5000":6,"538":6,"53802771":2,"53806156":2,"54801291":2,"549":14,"5555":[2,8,15],"600":[2,5],"630200":2,"632737":6,"63561":11,"6414902":6,"6424903":6,"65630":2,"682232":6,"68360":11,"68396":11,"6a3d9a":13,"6c007c":6,"746325":6,"76_":2,"777777":6,"804":6,"82106":11,"82142":11,"833":6,"850096":6,"859314":6,"88ff00":6,"8gb":14,"900":2,"9178749":6,"9188750":6,"927807":6,"9511049999999999":6,"996137":6,"999":[2,8],"9th":0,"9xy":15,"bioinformatics gene gtf bed bigwig genomics transcript exon cds genomic tss tts splicing dna-sequences intron ngs chip-seq rna-seq python":10,"boolean":[2,9,15],"break":[2,8],"case":[1,2,4,5,6,14,16],"char":2,"class":[6,8,10,14],"default":[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],"export":16,"ferr\u00e9":2,"final":[2,14],"float":2,"function":[2,7,10,14,15],"import":[2,3,6,7,8,14],"int":[1,2,6],"long":[6,7,13,14,15],"new":[1,2,7,8,9,11],"null":[2,14],"return":[1,2,3,5,7,9,11,13,14,15,16],"short":6,"throw":2,"true":[2,3,5,7,11,14,16],"try":[0,1,2,4,5,6,8,11,13,14,15,16],"var":[2,6,14],"void":6,"while":[10,13,14],But:14,CDS:[2,4,8,11,15],Cis:14,For:[0,2,6,9,14,15],IDS:16,IDs:[0,2,13],Its:14,Lis:2,Lrs:2,NOT:14,Not:[1,11,13,15],One:[0,2,5,9],TTS:[1,5,6,9,14,15],That:7,The:[0,1,4,5,6,7,8,9,10,11,13,14,15,16],Then:14,These:[0,1,9,14],Use:[0,2,4,5,6,9,11,14,15],Used:[1,2],Uses:2,Using:[2,6,14],Will:[2,6,14],With:[7,8],__dict__:7,__doc__:7,__file__:7,__init__:6,__main__:7,__name__:7,__notes__:7,_biotyp:8,_id:[2,8],_mrna:2,_rc:2,_rc_mrna:2,_stats_:14,a_b:2,a_bo:2,a_chr:2,a_col:2,a_dict:2,a_fa:2,a_feat:2,a_fil:2,a_gtf:2,a_list:2,a_path:2,a_scor:8,a_str:2,a_tab:2,aaa:2,aatacagagat:2,abc:14,abort:2,about:[6,7,9,10,13,14,15],abov:[2,6,14,15],abspath:7,abud:2,abudan:2,abund:[2,14],abundance_threshold:2,accept:[1,4,5,6,14,16],acceptor:[5,9],access:[0,10],accord:[2,8,14],account:11,accumul:5,accur:2,across:14,activ:[10,15],actual:14,adapt:[2,6,7,14],add:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],add_argu:7,add_argument_group:7,add_attr:2,add_attr_and_writ:2,add_attr_column:2,add_attr_from_dict:2,add_attr_from_fil:2,add_attr_from_list:2,add_attr_from_matrix_fil:2,add_attr_to_po:2,add_bas:2,add_chr:2,add_exon_nb:[5,9,10,14],add_exon_numb:2,add_feature_typ:2,add_help:7,add_prefix:[2,9,10],add_prefix_to_fil:2,add_scor:3,added:[1,2,3,5,6,8,13],adding:[2,14],addit:[1,2,6,8,11,14],adjust:2,aesthet:6,afil:2,aforement:2,after:[8,11,14],against:14,agcgcaccatatg:2,agcgcatg:2,agen:2,ajust:2,algorithm:[2,14],alist:2,all:[0,1,2,4,5,6,8,9,10,11,13,14,15,16],all_chrom1:2,all_chrom2:2,all_chrom:2,all_feature_label:2,all_intersections_for_this_combi:2,all_overlap:2,all_possible_combi:2,allevi:14,allow:[2,5,14],almost:[0,9],alon:7,along:[2,6],alpha:2,alphanumer:[2,15],alreadi:[2,16],also:[1,2,5,6,8,11,14],alt:14,altern:[2,6,9,13,14],alwai:14,ambigu:0,among:[2,14],amount:14,amu:14,anaconda3:6,analys:[6,14],analysi:[8,14],analyz:14,ani:[0,1,2,4,5,6,7,8,10,11,13,14,15,16],annot:[4,7,9,10,11,14],anoth:[1,7,14],antisen:1,antisens:[6,14],anywai:14,api:14,appear:7,append:2,appli:[2,6,8,14,15],apply_recursively_to_all_nod:2,approach:[2,14],appropri:14,apropo:[9,10],architectur:10,arg:[2,7],arg_formatt:7,argpars:7,argument:[0,1,2,4,5,6,7,8,10,11,13,14,15,16],argumentpars:7,around:[1,2,6,14],arrai:2,artifici:[2,11],as_data_fram:2,as_dict:2,as_dict_of_dict:2,as_dict_of_list:2,as_dict_of_merged_list:2,as_dict_of_valu:2,as_list:2,as_list_of_list:2,as_simple_list:2,ask:[2,7,11,14,15],aspect:10,ass:8,assembl:16,assert:[2,3],assert_equ:2,assess:[2,14],assign:[2,8],assign_nod:2,associ:[0,1,2,5,6,8,9,11,14,15],assum:2,atctcaggggcg:2,atctggcg:2,attempt:9,attr:2,attr_list:2,attr_nam:2,attribut:[0,2,4,8,9,11,15],aurkaip1:6,author:[2,14],autom:9,automat:14,avail:[2,6,7,9,11,12,14,15],averag:[6,14],avoid:[0,2],axi:6,axis_text:6,b2df8a:13,b_dict:2,b_file:2,b_gtf:2,b_list:2,back:9,backend:14,bacteria:11,balanc:8,bam:[2,11],bamcompar:6,bamcoverag:6,bar:[2,14,16],base:[2,3,4,5,6,8,9,11,13,14,15],basepair:[2,14],bashrc:9,basi:6,basic:[0,2,4,6,7,10,11,14,15],bat:9,batch:[2,14],bbb:2,bcl:15,becaus:[2,14],bed3:[2,4,9],bed6:[2,4,6,14],bed:[2,3,4,5,6,9,11,14,15],bed_excl:2,bed_format:3,bed_to_gtf:[9,10],bed_to_lists_of_interv:2,beda:2,bedb:2,bedfil:[2,15],bedfile1:2,bedfile2:2,bedsb:2,bedtool:2,been:[2,13],befor:[0,1,2,4,5,6,8,9,11,13,14,15,16],begin:[2,14],behav:4,behaviour:[0,2],being:14,below:[0,2,6,7,8,9,11,14,15],best:[2,14],beta:[10,14],beta_inc:2,betacalcul:2,betainc:2,betaincreg:2,better:[0,14],between:[2,9,11,14],bias:14,bide:14,big:4,big_wig:3,bigwig:[3,6,9],bigwig_to_b:9,bigwiglist:6,bin:[2,3,6,7],bin_around_frac:6,bin_nb:[3,6],bind:14,binom:[2,14],binomi:[2,14],bins_numb:2,bio:[2,14],biolog:14,bla:2,bla_:2,bla_simpl:2,blabla:2,blue:6,bmc:1,bodi:[1,6,9,14],bool:1,bool_exp:2,border:6,border_color:6,both:[6,14],boundari:[2,5],brace:2,brian:2,broad:[2,14],broadli:2,build:[2,9,14],build_best_dict_from_librari:2,build_nodes_for_word:2,build_nodes_for_words_from_ologram_result_df:2,bw_cov_mp:3,bw_coverag:10,bw_list:[3,6],bw_profile_mp:3,bwig:[6,10],by_transcript:[2,5],bypass:14,c_list:2,caagc:16,calcul:[2,14],call:[2,6,7,13,14],callabl:2,can:[0,1,2,4,5,6,7,8,9,10,11,14,16],candid:[2,14],cannot:2,captur:14,care:16,carlo:[2,14],carriag:2,cat:8,catatggtgcgct:2,categori:[8,14],catgcgct:2,cc1313:6,ccc:2,cccccc:6,cccccgttacgtag:[2,16],ccds_id:[2,4,8,11,15],cdna_length:2,cds:15,cds_g0001t001:[4,8],cds_g0001t002:[4,8,11],cds_g0002t001:[8,15],cds_g0003t001:[8,15],cds_g0004t001:[8,15],cds_g0004t002:[8,15],cds_g0005t001:8,cds_g0006t001:8,cds_g0006t002:8,cds_g0007t001:8,cds_g0007t002:8,cds_g0008t001:8,cds_g0009t001:[8,15],cds_g0009t002:[8,15],cds_g0010t001:[2,8],cds_id:2,cea:9,ceil:6,cell:[2,8],chanc:[2,14],chang:[2,6,14],charact:[2,8],character:14,charbonni:14,check:[0,2,6,7,14],check_boolean_expr:2,check_ensembl_format:2,check_file_or_dir_exist:2,check_gene_chr:2,check_negbin_adjust:2,check_r_instal:2,check_r_packag:2,chi:2,chomosom:2,chomp:2,choos:[6,8],chosen:[0,2],chr10:6,chr11:6,chr12:6,chr13:6,chr14:6,chr15:6,chr16:6,chr17:6,chr18:6,chr19:6,chr1:[0,2,4,5,6,8,11,14,15,16],chr20:6,chr21:[2,6],chr22:[6,15],chr2:[2,5,6],chr3:6,chr4:6,chr5:6,chr6:6,chr7:6,chr8:6,chr9:6,chr:[0,1,2,4,5,6,8,9,11,13,14,15,16],chr_info_fil:2,chr_info_path:2,chr_list:2,chr_str:2,chrm:[1,5,6,14],chrom:[0,1,2,3,5,6,8,11,14,15,16],chrom_fil:2,chrom_info:6,chrom_info_as_dict:2,chrom_info_fil:2,chrom_info_to_bed_fil:2,chrom_len:2,chrominfo:[1,2,5,6,9,11,14],chromosom:[0,1,2,4,5,6,8,9,11,13,14,15,16],chrx:[6,15],chry:15,cite:14,classic:[2,4,6,14],classmethod:2,close:2,close_properli:2,closer:14,closest:[1,5,9],closest_gen:[9,10],cluster:14,cmd:7,cmd_object:7,cmdobject:7,code:[1,2,8,9,14],coder:2,coding_pot:2,codon:15,col:[2,6,15],col_from_tab:[9,10],collaps:1,collect:[2,11],collector:2,color:[6,13],color_ord:6,column:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],com:[2,6,14],combi:[2,14],combi_human_read:2,combi_min:[2,14],combin:[2,9,14,16],come:[10,14],comma:[1,2,4,5,6,8,9,11,13,14,15],command:[0,2,10],comment:[0,2,14],common:[0,2],comp:[9,16],compar:14,compat:[3,10],complement:[2,16],complet:14,complex:[2,7,8,14],compliant:2,compon:2,comput:[2,3,5,6,8,9,11,14],compute_all_intersections_minibatch:2,compute_overlap_stat:2,compute_stats_for_intersect:2,compute_true_intersect:2,computingintersectionparti:2,computingstatscombiparti:2,concaten:[2,14],concentr:14,confid:6,configur:6,conjunct:15,conn:9,connect:2,consid:[1,2,14],consist:2,constraint:2,construct:6,consum:[2,14],contain:[0,1,2,3,4,5,6,8,9,11,13,14,15],contfractbeta:2,continu:2,contribut:14,contributor:14,control:[2,3,6,14],control_list:[9,10,11],convent:[1,5,6,10,14,15],converg:[9,10],convers:[2,7,9,10],convert:[0,2,3,9,10,14],convert_ensembl:[0,9,10],convert_to_ensembl:2,coordin:[0,2,4,7,9,10,11],copi:11,copyright:2,cor_group:[2,14],core:14,correl:[2,14],correspond:[1,2,5,6,8,14,15,16],cost:14,could:[2,8,14,15],count:[2,3,6,9,10,13,14,15],count_key_valu:[9,10],counteract:2,coupl:14,cov:6,cover:10,coverag:[3,7,9,10,11,14],cpat:8,cpu:[6,14],cramer:[2,14],creat:[2,6,7,8,9,14,16],critic:2,crosstab:2,csv:[2,13,15],current:[0,2,4,5,8,10],custom:[2,14],da0000:6,dark:6,data:[2,6,8,10,13,14],data_default_factori:2,datafram:[2,13],dataset:[1,2,4,5,6,8,11,13,15,16],datasetnam:2,date:[0,1,4,5,6,8,9,11,13,14,15,16],dbf400:6,debug:[2,6,14],decil:8,declar:[2,7],decomposit:2,dedic:6,deduc:14,deeptool:6,def:7,default_v:2,defin:[0,1,2,4,6,7,9,11,14,15,16],del:16,del_attr:[2,9,10],delect:8,delet:[2,4,6,7,8,9,10,11,13,15,16],delimit:15,demonstr:11,deni:14,depend:[2,14],deplac:14,deplet:14,deriv:14,desc:7,descend:2,describ:1,descript:[0,1,2,4,5,6,8,9,11,13,14,15,16],descriptor:0,deseq:8,design:[0,2,14],desir:2,dest:8,dest_kei:8,destin:[2,8,9],detail:[2,9,14],determin:2,develop:[7,10],deviat:14,dftdftd:2,diagram:[6,13,14],dict:[2,7],dict_learn:[10,14],dictionari:[2,14],dictionarywithindex:2,dictionnari:2,diff:1,differ:[1,2,11],differenti:8,digial:2,dimnish:2,dir:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],direct:[2,5],directli:2,directori:[2,6,7,11,13,14],disabl:[0,14],disappear:5,discard:[0,2,4,13,14],discret:[8,9,14],discretize_kei:[9,10,14],displai:[2,9,14],dissapear:5,dist:11,dist_to_converg:1,dist_to_diverg:1,distanc:[1,7,9,11],distribut:[2,14],distrubut:2,diverg:[9,10,11],divid:2,divis:2,dll:2,dlmf:2,doc:7,document:[2,14],doe:[0,1,2,4,5,6,8,10,11,13,14,15,16],don:[1,2,4,5,11,13,16],done:[6,7,14],donor:[5,9],download:11,downstream:[1,2,3,6,14],dozen:14,dpi:[6,13],draw_profil:6,due:[2,14],duplic:[0,6,13],each:[1,2,5,6,8,9,11,14,15],eas:10,easier:14,easili:10,echo:9,ecolog:6,edit:[6,7,9,10,14],effect:2,eight:0,either:2,element:[2,4,5,6,11,13,14,15],els:[7,14],elsevi:6,emphas:2,emphasi:[2,14],empir:[2,14],empirical_p_v:2,emploi:14,empti:[0,2,4,9,14],encapsul:2,encff112bhn_h3k4me3_chr1:14,encff112bhn_h3k4me3_k562_sub:14,encff119bym_h3k36me3_k562_sub:14,encff431haa_h3k36me3_k562_sub:6,encff742fds_h3k4me3_k562_sub:6,encff947dvy_h3k79me2_k562_sub:6,enclos:[2,9,11,14,15],encod:[2,14],encount:[2,14,15,16],end:[0,2,3,4,5,6,11,15,16],end_str:2,endswith:2,enhanc:[10,14],enough:14,enrich:14,ensembl:[0,2,4,5,9,11,14],ensg00000105483:2,ensg00000107829:11,ensg00000148337:2,ensg00000148339:2,ensg00000153885:2,ensg00000164587:2,ensg56765:16,enst00000284006:2,enst00000331272:11,enst00000338370:6,enst00000372948:2,enst00000373066:2,enst00000373068:2,enst00000373069:2,enst00000377836:6,enst00000378598:6,enst00000401695:2,enst00000407193:2,enst00000430256:2,enst00000437157:6,enst00000445012:2,enst00000457105:11,enst00000462379:6,enst00000466983:2,enst00000469643:6,enst00000469733:6,enst00000470093:11,enst00000472769:2,enst00000482428:11,enst00000489578:11,enst00000511072:6,enst00000517510:2,enst00000519690:2,enst00000520007:2,enst00000526992:15,enst00000587559:2,enst00000587658:2,enst00000589786:2,enst00000624697:6,enst00000634501:2,enst00000634901:2,ensur:14,entri:11,env:[6,7],epigenet:[6,8,14,15],epsilon:2,equal:[2,6,8],equival:2,error:[0,2,6,7,14],error_funct:[2,14],especi:14,essenti:[0,4,9],estim:[2,6],etc:2,eval_numer:2,evalu:[2,14,15],even:[0,1,2,4,5,6,8,11,13,14,15,16],everi:14,everyth:2,exact:[2,14],exactli:2,exampl:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],example_01:6,example_01b:6,example_02:6,example_05:6,example_06:6,example_06b:6,example_07:6,example_08:6,example_13:13,example_gtf:11,example_pa_01:14,example_pa_02:14,example_pa_03:14,example_pa_04:14,except:[0,2],excl:14,exclud:[2,14],exclus:[1,2,8,14],exist:[2,3,4],exit:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],exon:[0,1,2,4,5,6,8,9,11,14,15,16],exon_id:[0,2,4,5,8,11,15],exon_nb:2,exon_nbr:[2,5,11,14],exon_nbr_cat:14,exon_numb:[5,11],exon_numbering_kei:[5,11],exon_s:[9,10],exp:8,expect:[2,8,14],experiment:[15,16],explain:14,explicit:[2,5,16],explicitli:[2,5,16],expos:10,expr:[2,8,14],express:[2,8,9,13,14,15],exprs_class:[8,14],ext:2,extend:[1,2,6,10,14],extens:[2,6],extract:[2,4,5,9,10,13,14,15,16],extract_data:2,extract_data_iter_list:2,f60000:6,f7db00:6,faap20:6,facet:6,facet_col:6,fact:14,factor:[2,6,8,14,15],factoris:2,facult:11,fafa13:4,fai:[2,16],fake:2,fall:2,fals:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],fashion:1,fasta:[9,10,16],fasta_interfac:2,fastasequ:2,fat:4,favor:14,feat:2,feat_id:2,feat_nam:2,feat_name_last:2,feat_siz:[11,15],feat_typ:2,featur:[0,1,2,3,4,5,6,8,9,11,14,15,16],feature_nam:2,feature_s:[9,10,15],feature_typ:16,features_nam:2,feed:2,feel:[4,10],ferr:[2,14],few:14,fewer:14,ff6100:6,ffb500:6,field:[2,4,9],field_count:2,fieldset:2,figur:14,file:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16],file_ext:7,file_or_dir:2,file_out:2,file_with_valu:2,filenam:2,filetyp:7,filter:[10,14],find:[1,2,9,14],find_interesting_combin:[2,14],find_intersect:2,first:[0,2,6,7,11,13,14,15,16],fit:[2,14],fit_algorithm:2,fit_qual:14,fix:[2,8],flag:[1,2,14],flags_matrix:[2,14],flat:9,flatten:2,flatten_list:2,flatten_list_recur:2,flexibl:16,fly:[8,14],focu:6,focus:[2,14],fold:14,folder:[0,1,4,5,6,8,9,11,13,14,15,16],follow:[1,2,3,4,5,6,7,8,9,11,13,14,15,16],foo:[2,16],forc:[2,3,5,14],form:[2,7,14],format:[1,2,3,4,5,6,8,9,10,11,13,14,15,16],format_tab:2,formattedfil:7,formula:2,forward:5,found:[0,2,5,6,7,11,13,14,16],fourth:2,frac:6,fraction:[2,6,15],frame:[0,2,4],free:10,free_gtf_data:2,frequenc:2,frequent:[2,14],from:[2,3,7,9,10],from_list:2,ft_type:[2,4,5,6,11],ftp:11,full:[6,14],fun:7,functool:2,fungi:11,furthermor:14,futhermor:2,futur:[7,14],g0001:[0,1,2,4,5,8,11,13,15,16],g0001_na_g0001t001_chr1:2,g0001t001:[0,1,2,4,5,8,11,13,15,16],g0001t001e001:[0,4,15],g0001t002:[0,1,2,4,5,8,11,13,15,16],g0001t002e001:[0,2,4,11,15],g0002:[0,1,2,4,5,8,11,13,15],g0002t001:[0,1,2,4,5,8,11,13,15,16],g0002t001e001:[0,15],g0003:[0,1,4,5,8,11,13,15],g0003t001:[0,1,2,4,5,8,11,13,15,16],g0003t001e001:[0,5,15],g0003t001e002:[0,5,15],g0004:[0,1,2,4,5,8,11,13,15],g0004_na_g0004t001_chr1:2,g0004t001:[0,1,2,4,5,8,11,13,15],g0004t001e001:[0,5,15],g0004t001e002:[0,5,15],g0004t001e003:[5,15],g0004t002:[0,1,2,4,5,8,11,13,15],g0004t002e001:[0,5,15],g0004t002e002:[0,5,15],g0004t002e003:[0,5,15],g0005:[1,2,4,8,11,13,15],g0005t001:[1,2,4,8,11,13,15],g0005t001e001:15,g0005t001e002:15,g0006:[1,2,4,8,11,13,15],g0006t001:[1,2,4,5,8,11,13,15],g0006t001e001:15,g0006t001e002:15,g0006t001e003:15,g0006t002:[1,2,4,8,11,13,15],g0006t002e001:15,g0006t002e002:15,g0007:[1,4,8,11,13,15],g0007t001:[1,2,4,8,11,13,15],g0007t001e001:15,g0007t002:[1,4,8,13,15],g0007t002e001:15,g0008:[1,4,8,11,13,15],g0008_na_g0008t001_chr1:2,g0008t001:[1,2,4,8,13,15],g0008t001e001:15,g0008t001e002:15,g0009:[1,4,8,11,13,15],g0009t001:[1,2,4,5,8,13,15],g0009t001e001:15,g0009t002:[1,4,5,8,13,15],g0009t002e001:15,g0010:[1,4,8,11,13,15],g0010t001:[1,2,4,8,13,15],g0010t001e001:15,g00:2,g1t1:2,gain:2,gamma:2,garbag:2,gc_off:2,gen:2,gene:[0,1,2,3,4,5,6,8,9,10,11,13,14,15,16],gene_biotyp:14,gene_id:[0,1,2,4,5,8,11,13,15,16],gene_nam:[1,2,4,6,8,14,15],genelist:11,gener:[0,2,8,13,14,15,16],generate_candidate_word:2,genic:[2,5],genom:[0,1,2,5,6,9,11,14,15,16],genome_fa:2,gerard:2,get:[0,2,4,5,6,11,14,15,16],get_3p_end:2,get_5p_3p_coord:[9,10],get_5p_end:2,get_al:2,get_all_candidates_except:2,get_attr_list:[2,9,10],get_attr_nam:2,get_attr_valu:2,get_attr_value_list:[2,9,10],get_ceas_record:9,get_chrom:2,get_exampl:[0,1,4,5,6,8,9,10,13,14,15,16],get_example_featur:2,get_example_fil:2,get_feat_seq:[9,10],get_feature_list:[2,9,10],get_feature_s:2,get_gn_id:2,get_gn_strand:2,get_gn_to_tx:2,get_gname_to_tx:2,get_intergen:2,get_intron:2,get_midpoint:2,get_sequ:2,get_simple_concaten:2,get_transcript_s:2,get_tss:2,get_tt:2,get_tx_id:2,get_tx_seq:[9,10],get_tx_strand:2,get_tx_to_gn:2,get_tx_to_gnam:2,gff2:10,gff3:10,ggccttatta:16,github:[10,12,14],give:[2,5,8,14],given:[1,2,6,14,15],global_result:2,gmail:[2,14],gn_2_tx:2,gn_feat:2,gn_id:2,gn_tx_id:2,gn_val:2,gnu:2,go_id:15,goal:[2,14],good:[2,14],gough:2,gov:2,gpl:2,grai:6,graph:[2,14],graphic:[2,6],grcm38:16,great:9,great_reg_domain:9,greater:2,greedi:[2,14],greeedi:2,grei:6,groomer:0,group:[2,6,7,14],gtec:2,gtf:[1,4,5,6,7,8,9,10,11,13,14,15,16],gtf_bla:2,gtf_data:2,gtf_interfac:[2,7],gtfk:2,gtftk:[0,1,4,5,6,7,8,10,11,13,14,15,16],gtftkerror:2,gtftkinteractiveerror:2,guillaum:14,h3k36me3:[6,14],h3k36me3_ologram_stat:14,h3k4me3:[6,14],h3k4me3_ologram_stat:14,h3k79me2:[6,14],h3k79me2_ologram_stat:14,h3k79me:6,handl:[0,9,10],handler:2,happen:5,hard:14,has:[1,2,8],has_head:2,hase:13,have:[2,5,6,8,11,12,13,14,15],head:[0,1,2,4,5,6,8,11,15,16],head_fil:2,header:[1,2,4,5,8,11,13,16],heatmap:[9,14],height:[6,13,14],help:[0,1,4,5,6,7,8,10,11,13,14,15,16],henc:2,here:[1,2,6,14,15],hes2:6,hg19:[1,5,6,14],hg38:[1,5,6,14],hg38_chr1:[1,11,14],hide:11,hide_undef:2,higher:[2,6],highest:[2,3,6,9,14,15],highlight:14,histogram:2,hold:2,homo_sapien:11,hour:14,how:[2,6,8,14],howev:14,hsapien:15,html:2,http:[2,6,14,15],http_proxi:15,https_proxi:15,hub:2,human:[14,15],hun:15,hundr:[2,10,14],hypothesi:[2,14],identifi:[0,1,2,14,16],ids:[2,4,6,13],idx:11,ignor:[0,14],iii:14,imag:[6,13,14],img:[6,13],impact:14,implement:[5,6,7,9,14],improv:[2,14],in_bed_fil:3,inch:[6,13,14],incl:14,includ:[9,11,13,14,15,16],incomplet:2,increas:[2,9,14],independ:[2,14],independantli:[2,14],index:[2,16],indic:[2,3,8,13,14,16],individu:2,inexact:[2,14],infer:14,infil:[2,13],info:[1,2,5,6,7,8,9,11,13,14,16],inform:[1,2,4,5,7,8,9,10,14],input:[1,2,6,9,14],input_fil:2,input_obj:2,inputfil:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],insid:[6,14],instal:[2,10,11],instanc:[0,2,10],instead:[0,1,2,14],intact:11,integr:2,intens:14,intent:10,inter:[2,14],interact:14,interescet:2,interesect:14,interest:[1,2,6,7,11,14,15],interesting_combi:[2,14],interfac:[2,10],intergen:[2,6,9,10],interpret:7,interrog:11,intersect:[9,10,14],intersections_for_this_combi:2,interv:[2,6,8],intron:[1,2,6,9,10,11,15,16],intron_by_tx:6,intron_nb_in_nam:2,intron_s:[9,10,15],invert:[1,5,8,13,15],invert_match:2,irrespect:5,is_com:2,is_defin:2,is_empti:2,is_exon:2,is_fasta_head:2,is_set:2,isclos:2,isdir:2,isg15:6,ish:2,issu:[0,8],item:[2,14],itemset:[2,14],iter:2,iter_as_list:2,iterate_with_head:2,itermax:2,its:[0,1,2,6,8,14,15],itself:2,jitter:13,job:2,join:[2,8,9,11],join_attr:[9,10,14,15],join_fil:[2,8],join_mat:[2,8,11,15],join_mat_2:8,join_mat_3:8,join_multi_fil:[9,10],jungman:2,just:[1,2,3,7],keep:[0,1,4,5,6,8,9,11,13,14,15,16],kei:[0,1,2,4,5,6,8,9,11,14,15,16],kept:[7,15],key_nam:[1,6,11],key_valu:2,keyword:[9,11],kind:11,know:14,known:14,lab:6,label:[3,6,8,9,14,16],lack:0,lambda:2,lambdat:2,laptop:14,larg:[2,5,6,13,14],lasso:2,lasso_cd:2,last:[2,6],later:[2,8,15],latest:11,learn:[2,14],learn_dictionary_and_encod:2,least:[2,14],leav:11,left:[2,8],leftmost:2,legendr:6,len:2,length:[2,14,15],lentz:2,lepoivr:1,less:14,let:[8,11],level:[2,7,8,11],li1:2,li2:2,lib:6,libgtftk:[8,10],librari:[2,10],licens:2,light:6,like:[2,4,8,14],limit:[4,6,11,14],lincrna:[6,14],line:[0,1,4,6,9,10,13,14,15,16],line_width:6,linedraw:6,list1:2,list2:2,list:[1,2,3,4,5,6,7,8,11,13,14,15],lncrna:14,load:[7,8,14],loc:15,locat:[0,2,5,6,9,14,15],log2:[6,13,14],log:[2,6,8,13,14,15],logger:[0,1,4,5,6,8,9,11,13,14,15,16],logger_fil:[0,1,4,5,6,7,8,11,13,14,15,16],longer:[2,14],longest:[9,15],longuest:2,look:[1,6,11,12,14],lot:[4,9,14],low:14,lower:2,lr1:2,lr2:2,lr3:2,luat:1,made:2,magnifi:14,mai:[0,1,2,4,5,6,7,8,9,11,13,14],main:[2,7,10,14],make:[2,4,7,14],make_outdir_and_fil:2,make_pars:7,make_tmp_dir:2,make_tmp_fil:[2,7],make_tmp_file_pool:3,malform:2,manag:7,mandatori:7,manhattan:2,mani:[2,6,14],manual:14,map:2,mark:[6,8,14],markov:[2,14],match:[1,2,8,9,13,15],math:2,mathemat:2,matplotlib:6,matric:14,matrix:[2,6,8,9,14],matrix_fil:8,matur:[2,9,11,15,16],mature_rna:[9,11,15],max:[2,6,14,15],max_exon_numb:15,max_siz:15,maximum:[2,14,15],mayb:7,mcam:15,mean:[0,2,3,5,6,14],meaning:[2,14],meantim:14,median:[2,6],median_comp:2,melt:6,merg:[2,5,8,9,14],merge_attr:[2,9,10],merge_bed_by_strand:9,merge_ologram_stat:14,merge_ologram_stats_01:14,merged_batches_result:14,messag:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],metavar:7,metazoa:11,method:[2,6],midpoint:[2,9,10],might:[2,14],mimic:4,min:[2,6,15],min_exon_numb:15,min_siz:15,mine:[2,14],mini_r:[1,2,6,8,11,13,14,15],mini_real_10m:11,mini_real_control_1:13,mini_real_counts_encff630hex:[8,13,14],mini_real_en:11,mini_real_noov_rnd_tx:[6,11],mini_real_promot:6,mini_real_tx:6,minibatch:[2,14],minibatch_len:2,minibatch_nb:[2,14],minibatch_s:[2,14],minibatchdictionarylearn:2,minim:6,minimum:[2,15],minu:[2,16],mir34ahg:6,miscellan:[7,9,10],miss:3,mk_matrix:[9,10,15],mkdir_p:2,mm10:[1,5,6,14],mm8:[1,5,6,14],mm9:[1,5,6,14],moc:14,mode:[2,3,5,7],model:[2,14],modifi:2,modl:[9,10,14],modl_subroutin:2,modl_supp_mat:14,modul:[7,10,14],mold:2,moment:[2,11],monc:14,monoexon:15,monoton:2,mont:[2,14],more:[2,4,5,7,9,11,13,14],more_b:14,more_bed_label:14,more_kei:14,more_nam:[2,4,5],most:[0,1,2,5,8,9,11,14,15],mostli:[2,14],move:5,mpmath:2,msg:2,much:14,multi:3,multipl:[2,9,10],multiple_overlap_custom_combi:[2,14],multiple_overlap_max_number_of_combin:[2,14],multiple_overlap_target_combi_s:[2,14],multiple_overlap_trivial_ologram_stat:14,multipli:14,multiprocess:[2,14],multiproess:2,multithread:14,must:[0,2,14],mutipl:[8,9],mutual:[1,8],my_command:7,my_fil:2,my_file_h:2,mycalc:2,myintersect:2,n_atom:2,n_highest:[3,6],n_iter:2,n_job:2,n_run:14,n_word:2,na_omit:[2,15],name:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16],name_column:6,nb_column:2,nb_ex:2,nb_exon:[2,9,10,15],nb_intersections_expectation_shuffl:14,nb_intersections_log2_fold_chang:14,nb_intersections_negbinom_fit_qu:14,nb_intersections_pvalu:14,nb_intersections_tru:14,nb_intersections_variance_shuffl:14,nb_line:2,nb_neighbor:1,nb_proc:[3,6],nb_thread:[2,14],nb_transcript:[9,10],nb_tx:11,nb_window:6,nbinom:2,ncol:2,need:[0,1,2,6,14],neg:[2,5,14],negbin_fit:10,negbin_pv:2,neighbor:1,neighborhood:1,nested_dict:2,nevertheless:14,new_data:2,new_kei:[2,8],new_key_valu:2,newlin:2,next:[2,7,8],nflag:[2,14],nipy_spectr:6,nist:2,nm123:2,nm_334567:0,no_error:2,no_na:2,node:[2,14],nofit:2,nois:[2,14],noisi:14,non:[0,2,4,8,9,11,13],none:[0,1,2,3,4,5,6,7,8,11,13,14,15,16],normal:[0,1,4,5,6,8,11,13,14,15,16],notabl:14,note:[0,1,2,4,5,6,7,8,11,13,14,15,16],noth:14,novel:[2,8,11,14],novel_:8,novel_g0001t002:8,now:[2,6,7,14],nrow:2,nst:6,nucleotid:[5,15],number:[2,3,4,5,6,8,9,11,14,15,16],number_of_set:[2,14],numer:[2,6,8,9,11,15],numpi:[2,6,14],nuniqu:2,obfusc:14,obj:2,object:[2,7],obs:2,obscur:14,observ:[2,9,11],obtain:[2,6,14],occordingli:2,occur:[2,14],odd:2,okai:2,old:0,ologram:[2,9,10,11],ologram_1:[11,14],ologram_2:[11,14],ologram_merge_run:[9,10],ologram_merge_stat:[9,10],ologram_modl_treeifi:[9,10],ologram_output:14,ologram_supp_mat:14,omit:15,onc:14,one:[1,2,4,6,8,9,14,15,16],one_bas:2,ones:14,onli:[0,1,2,4,5,6,7,9,11,14,15],ontolog:[9,15],open:2,oper:[0,2,14],operand:2,opposit:14,optim:2,option:[0,1,2,4,5,6,8,11,13,14,15,16],order:[2,3,6,14,15],ordered_5p:2,ordereddict:2,ordinari:14,org:2,orient:[1,6,11,16],origin:[1,2,14],other:[1,2,11,13,14],otherwis:[2,5,6,11,14],our:[2,14],out:[6,13],out_dir:2,out_fil:2,outfil:2,outlist:2,outlook:14,output:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],output_path:2,output_visu:2,outputdir:14,outputfil:[0,1,2,3,4,5,6,7,8,9,11,13,15,16],outputfilenam:2,outsid:[5,14],over:[2,3],overap_stats_shuffl:2,overlai:6,overlap:[2,5,9,10,15],overlap_promoter_u0:1,overlap_stats_comput:10,overlap_stats_shuffl:10,overlapping_:1,overrid:14,own:[6,10,14],p_valu:14,packag:[6,10],paco:11,page:[6,12,13],page_height:[6,13],page_width:[6,13],pair:[2,11,14],pairwis:14,paiwis:14,palett:6,panda:[2,6],paper:14,param:2,paramet:[2,3,14],parent:2,parse_arg:7,parser:[7,10],parser_grp:7,part:2,partial:[2,5],particular:[2,8,14,15],pass:[0,1,2,4,5,6,7,8,11,13,14,15,16],path:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],patient:[6,13],pdf:[6,13,14],pdf_file_alt:14,pdf_height:14,pdf_width:14,peak:[2,14],peak_fil:14,per:[2,6,9,11,14,15],percentil:[8,14],perform:[0,2,6,11,14],perhap:14,phase:2,pick:2,pipe:10,place:7,plant:11,plch2:6,pleas:[2,6,12,14],plot:[6,13,14],plotnin:6,plotnineerror:6,plu:[2,5,14],plugin:[2,7,9,10,14],png:[6,13],point:[2,13,14],poor:[4,14],pos:2,posit:[2,5],possibl:[2,7,14],post:10,potenti:[2,8],practic:14,prdm16:6,pre:14,preced:2,precis:[2,8,14],pref:[2,3],prefer:14,prefix:[2,3,8,9,15],prepar:[6,13],present:[2,14],prevent:2,previou:14,previous:14,principl:2,print:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],print_gtf:7,probabl:[0,14],problem:[2,10,14],proc:6,process:[0,1,2,3,4,5,6,8,11,13,14,15,16],processed_transcript:14,produc:[1,2,6,9,14,16],product:2,profil:[3,8,9,10],profile_color:6,profile_matrix__pygtftk_4jm84gd0:6,profile_matrix__pygtftk_aovxheri:6,profile_matrix__pygtftk_f2tvlc3:6,profile_prom:6,profile_tx:6,program:[0,6,7,9,10,14],progress:14,promot:[1,3,6,9,11,14],properli:2,proport:2,propos:7,protein:14,protein_cod:[6,14],protist:11,provid:[0,1,2,4,6,7,8,9,11,13,14],proxi:15,pseudo:[3,6,13],pseudo_count:[3,6,13],ptr:2,pull:7,pure:2,purpos:11,put:[2,14],puthier:[6,14],pval:2,pybedtool:2,pygtftk:[7,14],pygtftk_dev37:6,python3:6,python:[7,10],qferr:14,quad:2,quadrat:14,qualiti:[2,14],quantil:6,quentin:[2,14],queri:[2,14],queried_words_nb:2,query_label:14,query_nam:2,question:2,quick:2,quiet:11,quot:16,r_pkg_list:2,rais:[0,2],ram:14,random:[2,9,14,15],random_list:[9,10,11],random_se:2,random_str:2,random_tx:[9,10],randomli:[2,6,9,14,15],rang:[2,6],rarer:[2,14],rate:2,rather:[14,16],ratio:14,read:[2,6,7,14],readm:12,real:[2,14],realis:14,realiti:14,realli:14,reason:14,rebuild:[2,14],rebuilt:14,rebuilt_data:2,rec:2,recalcul:14,recommend:14,reconstruct:[2,14],record:[2,16],recurs:2,red:6,redond:[2,4,8,11,13,16],reduc:[2,14],reduct:14,redund:[2,5,14],refer:[0,1,2,4,6,7,9,13,14],refseq:0,reg:8,regard:2,regener:4,regexp:[2,8,9,15],region:[1,2,3,5,6,9,14,15,16],region_10:5,region_1:5,region_2:5,region_3:5,region_4:5,region_5:5,region_6:5,region_7:5,region_8:5,region_9:5,region_fil:3,regul:[1,14],regular:[2,7,8,15],regulatori:14,rel:[1,2,5,6],relat:[9,11,14],releas:11,relev:[2,14],reli:[9,10],reliabl:14,rememb:14,remind:2,remov:[2,14],repartit:14,replac:2,report:14,repres:[2,9,14,15],represent:[2,9,14],request:[2,4,5,6,7,9,11,14,15],requir:[0,1,2,4,5,6,8,10,11,13,14,15,16],res_a:2,res_b:2,resist:14,resp:2,respect:[2,11,14],restrict:[2,14],result:[2,6,9,14],result_df:2,result_queu:2,retriev:[2,9,10,14],reus:14,rev:16,rev_comp:2,revers:[1,2,16],right:2,rise:5,rm_dup_tss:[9,10],rmtree:2,rn3:[1,5,6,14],rn4:[1,5,6,14],rna:[2,8,11,16],rnd_alpha_numeric_str:2,role:14,root:[2,14],round:2,routin:2,row:[2,6,8],rug:13,rule:[2,9,14],run:[2,3,7,9,14],rvs:2,s1_d:8,sai:[2,8],said:2,same:[0,1,2,5,6,7,9,11,14,15],samemost:15,sampl:[2,8],save:[6,13,14],scale:8,scientif:2,scikit:2,scipi:2,score:[0,2,3,5,11,14],script:[7,9],seaborn:6,search:[1,6,9,11,13,14],second:[2,6,13],section:[2,10,12],see:[0,2,4,5,6,7,8,9,11,13,14,15,16],seed:[2,14,15],seen:14,segfault:8,seldom:14,select:[0,1,2,4,6,7,9,10,11,13,14],select_5p_transcript:2,select_best_words_from_librari:2,select_by_go:[9,10],select_by_intron_s:[9,10],select_by_kei:[0,1,2,4,8,9,10,11,13,14],select_by_loc:[2,9,10],select_by_max_exon_nb:[2,9,10],select_by_nb_exon:[9,10],select_by_number_of_exon:2,select_by_numeric_valu:[9,10],select_by_posit:2,select_by_regexp:[2,9,10],select_by_transcript_s:2,select_by_tx_s:[9,10],select_longuest_transcript:2,select_most_5p_tx:[9,10],select_shortest_transcript:2,self:2,send:[2,7],send_error:2,sens:14,sep:[2,4,5,11,13,15,16],separ:[1,2,4,5,6,8,9,11,13,14,15,16],seq:8,seqid:[0,2,4,8,11,15,16],seqid_list:[9,10],seqnam:15,seqrecord:2,sequenc:[0,2,7,9,10],server:9,set:[0,1,2,3,4,5,6,8,9,10,11,13,14,15,16],set_attr:2,set_color:13,set_na:2,sever:[0,2,5,6,8,9,10,14,15],share:[7,9,11,15],shift:[9,10],shift_valu:5,short_long:[9,10],shortcut:[4,15],shorten:14,shortest:[2,9,15],shortnam:3,should:[0,2,3,4,5,6,7,9,11,13,14,15],show:[0,1,4,5,6,8,9,11,13,14,15,16],shown:14,shuffl:[2,14],shutdown:2,shutil:2,side:2,signal:[6,13],signatur:[2,14],signfic:14,signific:2,silent:[2,14],silentremov:2,simpl:[1,2,4,5,6,8,9,11,13,14,15,16],simple_02:[2,11],simple_03:[2,11],simple_04:[2,11],simple_05:11,simple_06:11,simple_07:[11,14],simple_07_peak:14,simple_07_peaks_1:14,simple_07_peaks_2:14,simple_line_count:2,simple_nb_column:2,simpli:[2,4,5,6,14],sinc:[0,2,14],singl:[2,5,6,14,15],single_nuc:6,site:[5,6,9,14],size:[1,2,5,6,8,9,11,14,15],size_dict:2,skip:[2,13,15],sklearn:2,sleuth:16,small:14,smaller:2,smother:[2,14],snakemak:14,sniff:4,solut:[0,2],solv:2,some:[2,9,14,15],sometim:4,sort:[2,8,14],sort_2_list:2,sourc:[0,2,4,8,15],space:[2,8],spars:2,sparsiti:[2,14],speci:[11,15],special:4,specif:[2,5,6,14],specifi:[2,6,8,11,14],splice:[2,5,9,15],splicing_sit:[9,10],split:[2,3,6],split_char:2,sqlite:9,sqrt:2,squar:[2,14],squish:[2,14],squish_matrix:2,src:8,src_kei:8,ss5:2,stabl:2,stackoverflow:2,stand:7,standard:[6,11,14],start:[0,2,3,4,5,6,11,14,15,16],start_codon:[14,15],start_str:2,start_tim:2,startswith:2,stat:[3,6,10,14],statist:[2,6,9,11,14,15],stats_multiple_overlap:2,stats_singl:2,statu:2,stderr:2,stdin:[0,1,4,5,6,7,8,9,11,13,14,15,16],stdout:[0,1,2,4,5,6,7,8,9,11,13,15,16],step:[2,6,14],step_1_factor_allow:[2,14],still:[4,10,14,16],stop_tim:2,store:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],str:2,str_len:2,strand:[0,1,2,3,5,6,9,16],stranded:1,stream:11,strict:5,string:[0,2,3,8,11],strip:6,strip_text:6,stroke:[2,14],structur:[5,14],studi:14,sub:[9,14],subcommand:[0,10],subdirectori:14,subgenom:14,submodular:2,subroutin:10,subsampl:14,subsect:14,subsequ:[2,8,15],subset:[2,6],subset_bwig:6,success:2,suf:[2,3],suffix:[2,3,8,9],suit:0,suitabl:8,sum:[2,3,6,15],summed_bp_overlaps_expectation_shuffl:14,summed_bp_overlaps_log2_fold_chang:14,summed_bp_overlaps_negbinom_fit_qu:14,summed_bp_overlaps_pvalu:14,summed_bp_overlaps_tru:14,summed_bp_overlaps_variance_shuffl:14,superbatch:[9,14],supervis:2,supplementari:2,suppli:14,support:[2,4,10,14],synonym:15,synopsi:9,syntax:[6,7],sys:7,system:9,t00:2,taatt:16,tab:[10,13,15],tab_interfac:2,tab_lin:2,tabl:14,tabul:[1,2,5,6,8,9,10,11,13,14,15],tag:14,tail:2,take:[2,6,9,15,16],taken:[2,11],target:[2,5,7,8,9,11,14],target_featur:8,tell:14,temp:6,templat:7,temporari:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],tend:[2,14],term:[0,2],termin:[0,1,4,5,6,8,11,13,14,15,16],test:[2,6,10,14,15],test_combi:14,test_data_for_modl:[2,14],test_gtftk_mkdir_p:2,text:[1,2,6,8,11,14],than:[2,10,14],thank:[4,14],thei:[0,2,4,11,14,15],them:[1,2,4,14],theme:6,themselv:14,thi:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],thing:[6,7],think:[0,6,13],third:11,this_combi_onli:2,thiswasthenameofthequeri:14,those:[0,1,2,8,9,11,14],thousand:[2,14],thread:[2,3,6,14],three:14,through:[2,6,9,10],throughout:2,thrown:8,thu:[0,6,7,10,11,14],ties:[2,15],time:[2,6,7,11,14,16],tiny_r:[11,15],tinyurl:6,titl:[6,14],tmem52:6,tmp:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],tmp_dir:[0,1,4,5,6,7,8,11,13,14,15,16],tmp_file:[2,3],tmp_file_list:2,to_alphanum:2,to_b:2,to_list:2,todo:8,token:2,too:[2,14],toolbox:9,toolkit:10,top:[5,14],total:[2,5,14],toward:14,tp_dir:2,trancript:11,transact:[2,14],transcript:[0,1,2,4,5,6,8,9,11,13,14,15,16],transcript_as_bioseq_record:2,transcript_fil:6,transcript_id:[0,1,2,4,5,6,8,11,13,15,16],transcript_id_1:11,transcript_id_2:11,transfer:10,transform:[6,8,13],transform_algorithm:2,transform_alpha:2,translat:[2,8],transpos:[5,9],treat:[9,14],tree:[6,9,10,14],treeifi:14,tri:13,true_intersect:2,tss:[1,2,3,5,6,9,11,14,15],tss_dist:[9,10],tss_num:11,tss_num_1:11,tss_num_2:11,tss_number:9,tsss:[2,11],tsv:[8,9,11,13,14],tsv_file_path:14,tts:[1,6,9,14],ttss:2,tupl:[2,13],tur:2,turn:14,two:[1,2,5,6,8,11,13,14],tx_1:2,tx_2:2,tx_class:6,tx_genomic_length:2,tx_id:2,tx_kept:2,tx_n:2,txgn_id:8,txt:[1,2,4,6,11,13,14],type:[0,1,2,3,4,5,6,7,8,9,11,14,15,16],typic:2,u_df:2,ucsc:0,ultim:5,ultimatli:5,unambigu:16,unassign:2,uncompress:6,undef:4,undefin:[2,4,6],under:[2,14],uniq:[8,11],uniqu:[2,4,6,13,14,16],unitari:10,univ:14,unix:10,unknown:[2,4],unlik:[2,14],unset:[2,4],unstrand:1,unwant:13,unwrap:[1,14],updat:[2,7],upon:[2,5,7,10,11],upon_non:2,upper:6,upper_limit:6,upstream:[1,2,6,14],upward:14,usag:[0,1,4,5,6,8,9,11,13,14,15,16],use:[0,1,2,4,5,6,7,8,11,13,14,16],use_log:2,use_markov_shuffl:2,used:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],useful:[1,2,6,9],user:[6,7,9,11,13,14],user_img_fil:[6,13],user_region:6,uses:2,using:[0,2,5,6,7,8,9,10,14,15,16],usr:7,usual:[2,14],util:[3,7,10,14],utmost:14,utr:[9,14,15,16],v_df:2,val:[2,14],val_tx_1:2,val_tx_2:2,val_tx_n:2,valu:[0,1,2,3,4,5,6,8,9,11,13,14,15],vari:2,variabl:[2,6],varianc:2,variat:2,variou:[2,4,9],vector:2,verbos:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],veri:[5,7,11,14],version:[0,1,2,4,5,6,8,9,11,13,14,15,16],vertebr:11,vicugna:11,vicugna_paco:11,view:0,violet:6,visual:14,visualis:[2,14],wai:[2,3,6],want:[2,6,7,8,14,16],warn:[2,4,6,7,10,14],watson:5,well:[2,14],were:[2,6,13],wether:2,what:[0,1],whatev:2,when:[0,1,2,4,6,7,8,11,14,16],whenev:0,where:[2,6,14],whether:[1,2,3,6,7,14,16],which:[0,2,3,4,5,6,8,11,14],which_col:2,which_combis_to_get_from:2,whichev:2,whitespac:14,whole:[1,4,6],whose:[1,2,15],wide:[0,10],width:[6,13,14],wildcard:16,window:6,wise:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],with_intron:2,within:14,without:[2,5,6,11,14,15],wl3dj2_n76zfc8sdvny1q06c0000gn:6,won:[2,16],wont:2,word:[2,14],work:[0,2,14],workflow:14,wors:2,would:[1,2,4,5,7,14],wrap73:6,wrapper:2,write:[0,1,2,3,4,5,6,8,10,11,13,14,15,16],write_b:2,write_bed_3p_end:2,write_bed_5p_end:2,write_gtf_to_bed6:2,write_message_to_fil:[0,1,4,5,6,8,11,13,14,15,16],write_properli:2,written:[2,5,10],x_lab:6,x_rebuilt:[2,14],x_squish:2,x_true:[2,14],ydacyfxx:6,you:[0,2,4,5,6,7,8,9,14,16],your:[0,6,9,10,14],zero:[2,3,6],zero_bas:2,zero_to_na:3,zip:6},titles:["Warning about supported GTF file formats","Commands from section \u2018annotation\u2019","Help on Python package (pygtftk)","The pygtftk.bwig.bw_coverage module","Commands from section \u2018conversion\u2019","Commands from section \u2018coordinates\u2019","Commands from section \u2018coverage\u2019","Writing your own commands","Commands from section \u2018Editing\u2019","Help on gtftk Unix commands","Welcome to pygtftk documentation page","Commands from section \u2018information\u2019","Installing pygtftk/gtftk","Commands from section \u2018miscellaneous\u2019","Commands from section \u2018ologram\u2019","Commands from section \u2018selection\u2019","Commands from section \u2018sequence\u2019"],titleterms:{"class":2,The:[2,3],about:0,activ:9,add_exon_nb:11,add_prefix:8,annot:1,apropo:11,argument:9,bash:9,bed_to_gtf:4,beta:2,bw_coverag:3,bwig:3,closest_gen:1,col_from_tab:13,command:[1,4,5,6,7,8,9,11,13,14,15,16],complet:9,content:10,control_list:13,convent:0,converg:1,convers:4,convert:4,convert_ensembl:4,coordin:5,count:11,count_key_valu:11,coverag:6,del_attr:8,dict_learn:2,discretize_kei:8,diverg:1,document:10,edit:8,exon_s:1,fasta:2,feature_s:11,file:0,format:0,from:[1,4,5,6,8,11,13,14,15,16],funtion:9,get:9,get_5p_3p_coord:5,get_attr_list:11,get_attr_value_list:11,get_exampl:11,get_feat_seq:16,get_feature_list:11,get_tx_seq:16,gtf:[0,2],gtftk:[2,9,12],help:[2,9],inform:11,instal:12,intergen:5,intersect:2,intron:5,intron_s:1,join_attr:8,join_multi_fil:8,line:2,list:9,main:9,merge_attr:8,midpoint:5,miscellan:13,mk_matrix:6,modl:2,modul:[2,3],multipl:14,name:0,nb_exon:11,nb_transcript:11,negbin_fit:2,ologram:14,ologram_merge_run:14,ologram_merge_stat:14,ologram_modl_treeifi:14,overlap:[1,14],overlap_stats_comput:2,overlap_stats_shuffl:2,own:7,packag:2,page:10,parser:9,profil:6,pygtftk:[2,3,10,12],python:2,random_list:15,random_tx:15,retriev:11,rm_dup_tss:15,section:[1,4,5,6,8,11,13,14,15,16],select:15,select_by_go:15,select_by_intron_s:15,select_by_kei:15,select_by_loc:15,select_by_max_exon_nb:15,select_by_nb_exon:15,select_by_numeric_valu:15,select_by_regexp:15,select_by_tx_s:15,select_most_5p_tx:15,seqid_list:11,sequenc:16,shift:5,short_long:15,splicing_sit:5,stat:2,subroutin:2,support:0,tab:2,tabl:10,tabul:4,test:9,tree:2,tss_dist:11,unix:9,util:2,warn:0,welcom:10,wide:9,write:7,your:7}}) \ No newline at end of file +Search.setIndex({docnames:["about","annotation","api","bwig_coverage","conversion","coordinates","coverage","developers","editing","gtftk_args","index","information","installation","miscellaneous","ologram","selection","sequence"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":1,"sphinx.ext.todo":2,sphinx:56},filenames:["about.rst","annotation.rst","api.rst","bwig_coverage.rst","conversion.rst","coordinates.rst","coverage.rst","developers.rst","editing.rst","gtftk_args.rst","index.rst","information.rst","installation.rst","miscellaneous.rst","ologram.rst","selection.rst","sequence.rst"],objects:{"pygtftk.Line":{FastaSequence:[2,1,1,""],Feature:[2,1,1,""],FieldSet:[2,1,1,""]},"pygtftk.Line.FastaSequence":{format:[2,2,1,""],write:[2,2,1,""]},"pygtftk.Line.Feature":{add_attr:[2,2,1,""],add_attr_and_write:[2,2,1,""],format:[2,2,1,""],format_tab:[2,2,1,""],from_list:[2,2,1,""],get_3p_end:[2,2,1,""],get_5p_end:[2,2,1,""],get_attr_names:[2,2,1,""],get_attr_value:[2,2,1,""],get_gn_id:[2,2,1,""],get_tx_id:[2,2,1,""],set_attr:[2,2,1,""],write:[2,2,1,""],write_bed:[2,2,1,""],write_bed_3p_end:[2,2,1,""],write_bed_5p_end:[2,2,1,""],write_gtf_to_bed6:[2,2,1,""]},"pygtftk.Line.FieldSet":{format:[2,2,1,""],write:[2,2,1,""]},"pygtftk.bwig":{bw_coverage:[3,0,0,"-"]},"pygtftk.bwig.bw_coverage":{bw_cov_mp:[3,3,1,""],bw_profile_mp:[3,3,1,""],make_tmp_file_pool:[3,3,1,""]},"pygtftk.fasta_interface":{FASTA:[2,1,1,""]},"pygtftk.fasta_interface.FASTA":{transcript_as_bioseq_records:[2,2,1,""],write:[2,2,1,""]},"pygtftk.gtf_interface":{GTF:[2,1,1,""]},"pygtftk.gtf_interface.GTF":{add_attr_column:[2,2,1,""],add_attr_from_dict:[2,2,1,""],add_attr_from_file:[2,2,1,""],add_attr_from_list:[2,2,1,""],add_attr_from_matrix_file:[2,2,1,""],add_attr_to_pos:[2,2,1,""],add_exon_number:[2,2,1,""],add_prefix:[2,2,1,""],convert_to_ensembl:[2,2,1,""],del_attr:[2,2,1,""],eval_numeric:[2,2,1,""],extract_data:[2,2,1,""],extract_data_iter_list:[2,2,1,""],get_3p_end:[2,2,1,""],get_5p_end:[2,2,1,""],get_attr_list:[2,2,1,""],get_attr_value_list:[2,2,1,""],get_chroms:[2,2,1,""],get_feature_list:[2,2,1,""],get_feature_size:[2,2,1,""],get_gn_ids:[2,2,1,""],get_gn_strand:[2,2,1,""],get_gn_to_tx:[2,2,1,""],get_gname_to_tx:[2,2,1,""],get_intergenic:[2,2,1,""],get_introns:[2,2,1,""],get_midpoints:[2,2,1,""],get_sequences:[2,2,1,""],get_transcript_size:[2,2,1,""],get_tss:[2,2,1,""],get_tts:[2,2,1,""],get_tx_ids:[2,2,1,""],get_tx_strand:[2,2,1,""],get_tx_to_gn:[2,2,1,""],get_tx_to_gname:[2,2,1,""],head:[2,2,1,""],is_defined:[2,2,1,""],is_set:[2,2,1,""],merge_attr:[2,2,1,""],message:[2,2,1,""],nb_exons:[2,2,1,""],nrow:[2,2,1,""],select_5p_transcript:[2,2,1,""],select_by_key:[2,2,1,""],select_by_loc:[2,2,1,""],select_by_max_exon_nb:[2,2,1,""],select_by_number_of_exons:[2,2,1,""],select_by_positions:[2,2,1,""],select_by_regexp:[2,2,1,""],select_by_transcript_size:[2,2,1,""],select_longuest_transcripts:[2,2,1,""],select_shortest_transcripts:[2,2,1,""],tail:[2,2,1,""],to_bed:[2,2,1,""],write:[2,2,1,""],write_bed:[2,2,1,""]},"pygtftk.stats":{beta:[2,0,0,"-"],negbin_fit:[2,0,0,"-"]},"pygtftk.stats.beta":{BetaCalculator:[2,1,1,""]},"pygtftk.stats.beta.BetaCalculator":{beta:[2,2,1,""],betainc:[2,2,1,""],betaincreg:[2,2,1,""],contfractbeta:[2,2,1,""]},"pygtftk.stats.intersect":{overlap_stats_compute:[2,0,0,"-"],overlap_stats_shuffling:[2,0,0,"-"]},"pygtftk.stats.intersect.modl":{dict_learning:[2,0,0,"-"],subroutines:[2,0,0,"-"],tree:[2,0,0,"-"]},"pygtftk.stats.intersect.modl.dict_learning":{Modl:[2,1,1,""],squish_matrix:[2,3,1,""],test_data_for_modl:[2,3,1,""]},"pygtftk.stats.intersect.modl.dict_learning.Modl":{find_interesting_combinations:[2,2,1,""],generate_candidate_words:[2,2,1,""],select_best_words_from_library:[2,2,1,""]},"pygtftk.stats.intersect.modl.subroutines":{build_best_dict_from_library:[2,3,1,""],generate_candidate_words:[2,3,1,""],learn_dictionary_and_encode:[2,3,1,""]},"pygtftk.stats.intersect.modl.tree":{Library:[2,1,1,""],Node:[2,1,1,""],apply_recursively_to_all_nodes:[2,3,1,""],get_all_candidates_except:[2,3,1,""],output_visualize:[2,3,1,""]},"pygtftk.stats.intersect.modl.tree.Library":{build_nodes_for_words:[2,2,1,""],build_nodes_for_words_from_ologram_result_df:[2,2,1,""]},"pygtftk.stats.intersect.overlap_stats_compute":{ComputingStatsCombiPartial:[2,1,1,""],DictionaryWithIndex:[2,1,1,""],compute_stats_for_intersection:[2,3,1,""],compute_true_intersection:[2,3,1,""],stats_multiple_overlap:[2,3,1,""],stats_single:[2,3,1,""],which_combis_to_get_from:[2,3,1,""]},"pygtftk.stats.intersect.overlap_stats_compute.DictionaryWithIndex":{get_simple_concatenation:[2,2,1,""]},"pygtftk.stats.intersect.overlap_stats_shuffling":{ComputingIntersectionPartial:[2,1,1,""],compute_all_intersections_minibatch:[2,3,1,""],compute_overlap_stats:[2,3,1,""]},"pygtftk.stats.negbin_fit":{check_negbin_adjustment:[2,3,1,""],empirical_p_val:[2,3,1,""],negbin_pval:[2,3,1,""]},"pygtftk.tab_interface":{TAB:[2,1,1,""]},"pygtftk.tab_interface.TAB":{as_data_frame:[2,2,1,""],as_simple_list:[2,2,1,""],iter_as_list:[2,2,1,""],iterate_with_header:[2,2,1,""],write:[2,2,1,""]},"pygtftk.utils":{GTFtkError:[2,4,1,""],GTFtkInteractiveError:[2,4,1,""],add_prefix_to_file:[2,3,1,""],check_boolean_exprs:[2,3,1,""],check_file_or_dir_exists:[2,3,1,""],check_r_installed:[2,3,1,""],check_r_packages:[2,3,1,""],chomp:[2,3,1,""],chrom_info_as_dict:[2,3,1,""],chrom_info_to_bed_file:[2,3,1,""],close_properly:[2,3,1,""],flatten_list:[2,3,1,""],flatten_list_recur:[2,3,1,""],get_example_feature:[2,3,1,""],get_example_file:[2,3,1,""],head_file:[2,3,1,""],intervals:[2,3,1,""],is_comment:[2,3,1,""],is_empty:[2,3,1,""],is_exon:[2,3,1,""],is_fasta_header:[2,3,1,""],make_outdir_and_file:[2,3,1,""],make_tmp_dir:[2,3,1,""],make_tmp_file:[2,3,1,""],median_comp:[2,3,1,""],message:[2,3,1,""],mkdir_p:[2,3,1,""],nested_dict:[2,3,1,""],random_string:[2,3,1,""],rnd_alpha_numeric_string:[2,3,1,""],silentremove:[2,3,1,""],simple_line_count:[2,3,1,""],simple_nb_column:[2,3,1,""],sort_2_lists:[2,3,1,""],tab_line:[2,3,1,""],to_alphanum:[2,3,1,""],to_list:[2,3,1,""],write_properly:[2,3,1,""]},pygtftk:{Line:[2,0,0,"-"],fasta_interface:[2,0,0,"-"],gtf_interface:[2,0,0,"-"],tab_interface:[2,0,0,"-"],utils:[2,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"],"4":["py","exception","Python exception"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function","4":"py:exception"},terms:{"000000":6,"0000ca":6,"0003700":15,"0025":6,"0041dd":6,"0050789":15,"0086dd":6,"0097194":15,"009c00":6,"009fca":6,"00_ologram_stat":14,"00a76f":6,"00aaa1":6,"00bb00":6,"00da00":6,"00f900":6,"012":2,"01k":1,"01k_d0":1,"062994":6,"068393":6,"0_1018":14,"0_107":14,"0_12":14,"0_1631":14,"0_183":14,"0_2":14,"0_3139":14,"0_4":14,"0_41703":14,"0_549":14,"0_6":14,"100":[2,6,14],"1000":[2,6,14],"10000":[2,14],"100000":2,"1000000000":[2,15],"1001145":2,"1006138":6,"101":9,"1018":14,"106":5,"107":[8,11,15],"110":8,"112":8,"114":8,"115":[5,8],"116":[5,8,11,15],"123":2,"124":[2,4,5,16],"125":[0,2,4,5,8,11,15,16],"128":5,"130":[4,8,11],"132":[4,8],"1370156":6,"13746":11,"138":[0,2,4,5,8,11,15,16],"1380157":6,"138_":2,"1500":[1,2,14],"159":15,"1631":14,"173583":6,"175":5,"176":[8,11,15],"179":[4,5,16],"180":[0,2,5,8,11,15],"182":[8,15],"183":14,"184":8,"18400":2,"18545":11,"18581":11,"186":[8,11,15],"189":[0,4,5,8,11,15],"1914257":6,"1924258":6,"1948":[2,14],"199":2,"1996":2,"1997":2,"1998":[2,6],"1999":2,"1999432787236828e":2,"1e2":2,"200":[2,6,15],"2000":2,"2007":2,"2013":1,"2017":11,"2018":[0,1,4,5,6,8,11,13,15],"2019":14,"2020":14,"20328":6,"209":5,"210":[2,8,11,15],"211":8,"213":8,"214":[8,15],"220":[8,15],"2202732":6,"2212733":6,"222":[5,8,11,15],"222_":2,"227377":6,"2322":[2,8],"2493646":6,"250":2,"2500a5":6,"2503647":6,"272973":6,"29348345":2,"300":[2,5,6,13],"3064167":6,"307":2,"3074168":6,"3139":14,"331067":6,"3630449":6,"3640450":6,"374963":6,"3rd":[2,5,11],"4218580000000003":6,"436956":6,"439256":6,"456":2,"49815":11,"4th":[2,6],"5000":6,"538":6,"53802771":2,"53806156":2,"54801291":2,"549":14,"5555":[2,8,15],"600":[2,5],"630200":2,"632737":6,"63561":11,"6414902":6,"6424903":6,"65630":2,"682232":6,"68360":11,"68396":11,"6a3d9a":13,"6c007c":6,"746325":6,"76_":2,"777777":6,"804":6,"82106":11,"82142":11,"833":6,"850096":6,"859314":6,"88ff00":6,"8gb":14,"900":2,"9178749":6,"9188750":6,"927807":6,"9511049999999999":6,"996137":6,"999":[2,8],"9th":0,"9xy":15,"bioinformatics gene gtf bed bigwig genomics transcript exon cds genomic tss tts splicing dna-sequences intron ngs chip-seq rna-seq python":10,"boolean":[2,9,15],"break":[2,8],"case":[1,2,4,5,6,14,16],"char":2,"class":[6,8,10,14],"default":[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],"export":16,"ferr\u00e9":2,"final":[2,14],"float":2,"function":[2,7,10,14,15],"import":[2,3,6,7,8,14],"int":[1,2,6],"long":[6,7,13,14,15],"new":[1,2,7,8,9,11],"null":[2,14],"return":[1,2,3,5,7,9,11,13,14,15,16],"short":6,"throw":2,"true":[2,3,5,7,11,14,16],"try":[0,1,2,4,5,6,8,11,13,14,15,16],"var":[2,6,14],"void":6,"while":[10,13,14],But:14,CDS:[2,4,8,11,15],Cis:14,For:[0,2,6,9,14,15],IDS:16,IDs:[0,2,13],Its:14,Lis:2,Lrs:2,NOT:14,Not:[1,11,13,15],One:[0,2,5,9],TTS:[1,5,6,9,14,15],That:7,The:[0,1,4,5,6,7,8,9,10,11,13,14,15,16],Then:14,These:[0,1,9,14],Use:[0,2,4,5,6,9,11,14,15],Used:[1,2],Uses:2,Using:[2,6,14],Will:[2,6,14],With:[7,8],__dict__:7,__doc__:7,__file__:7,__init__:6,__main__:7,__name__:7,__notes__:7,_biotyp:8,_id:[2,8],_mrna:2,_rc:2,_rc_mrna:2,_stats_:14,a_b:2,a_bo:2,a_chr:2,a_col:2,a_dict:2,a_fa:2,a_feat:2,a_fil:2,a_gtf:2,a_list:2,a_path:2,a_scor:8,a_str:2,a_tab:2,aaa:2,aatacagagat:2,abc:14,abort:2,about:[6,7,9,10,13,14,15],abov:[2,6,14,15],abspath:7,abud:2,abudan:2,abund:[2,14],abundance_threshold:2,accept:[1,4,5,6,14,16],acceptor:[5,9],access:[0,10],accord:[2,8,14],account:[11,14],accumul:5,accur:2,across:14,activ:[10,15],actual:14,adapt:[2,6,7,14],add:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],add_argu:7,add_argument_group:7,add_attr:2,add_attr_and_writ:2,add_attr_column:2,add_attr_from_dict:2,add_attr_from_fil:2,add_attr_from_list:2,add_attr_from_matrix_fil:2,add_attr_to_po:2,add_bas:2,add_chr:2,add_exon_nb:[5,9,10,14],add_exon_numb:2,add_feature_typ:2,add_help:7,add_prefix:[2,9,10],add_prefix_to_fil:2,add_scor:3,added:[1,2,3,5,6,8,13],adding:[2,14],addit:[1,2,6,8,11,14],adjust:2,aesthet:6,afil:2,aforement:2,after:[8,11,14],against:14,agcgcaccatatg:2,agcgcatg:2,agen:2,ajust:2,algorithm:[2,14],alist:2,all:[0,1,2,4,5,6,8,9,10,11,13,14,15,16],all_chrom1:2,all_chrom2:2,all_chrom:2,all_feature_label:2,all_intersections_for_this_combi:2,all_overlap:2,all_possible_combi:2,allevi:14,allow:[2,5,14],almost:[0,9],alon:7,along:[2,6],alpha:2,alphanumer:[2,15],alreadi:[2,16],also:[1,2,5,6,8,11,14],alt:14,altern:[2,6,9,13,14],alwai:14,ambigu:0,among:[2,14],amount:14,amu:14,anaconda3:6,analys:[6,14],analysi:[8,14],analyz:14,ani:[0,1,2,4,5,6,7,8,10,11,13,14,15,16],annot:[4,7,9,10,11,14],anoth:[1,7,14],antisen:1,antisens:[6,14],anywai:14,api:14,appear:7,append:2,appli:[2,6,8,14,15],apply_recursively_to_all_nod:2,approach:[2,14],appropri:14,apropo:[9,10],architectur:10,arg:[2,7],arg_formatt:7,argpars:7,argument:[0,1,2,4,5,6,7,8,10,11,13,14,15,16],argumentpars:7,around:[1,2,6,14],arrai:[2,14],artifici:[2,11],as_data_fram:2,as_dict:2,as_dict_of_dict:2,as_dict_of_list:2,as_dict_of_merged_list:2,as_dict_of_valu:2,as_list:2,as_list_of_list:2,as_simple_list:2,ask:[2,7,11,14,15],aspect:10,ass:8,assembl:16,assert:[2,3],assert_equ:2,assess:[2,14],assign:[2,8],assign_nod:2,assigned_nod:2,associ:[0,1,2,5,6,8,9,11,14,15],assum:2,atctcaggggcg:2,atctggcg:2,attempt:9,attr:2,attr_list:2,attr_nam:2,attribut:[0,2,4,8,9,11,15],aurkaip1:6,author:[2,14],autom:9,automat:14,avail:[2,6,7,9,11,12,14,15],averag:[6,14],avoid:[0,2],axi:6,axis_text:6,b2df8a:13,b_dict:2,b_file:2,b_gtf:2,b_list:2,back:9,backend:14,bacteria:11,balanc:8,bam:[2,11],bamcompar:6,bamcoverag:6,bar:[2,14,16],base:[2,3,4,5,6,8,9,11,13,14,15],basepair:[2,14],bashrc:9,basi:6,basic:[0,2,4,6,7,10,11,14,15],bat:9,batch:[2,14],bbb:2,bcl:15,becaus:[2,14],bed3:[2,4,9],bed6:[2,4,6,14],bed:[2,3,4,5,6,9,11,14,15],bed_excl:2,bed_format:3,bed_to_gtf:[9,10],bed_to_lists_of_interv:2,beda:[2,14],bedb:2,bedfil:[2,15],bedfile1:2,bedfile2:2,bedfilepath:14,bedsb:[2,14],bedtool:[2,14],been:[2,13],befor:[0,1,2,4,5,6,8,9,11,13,14,15,16],begin:[2,14],behav:4,behaviour:[0,2],being:14,below:[0,2,6,7,8,9,11,14,15],best:[2,14],beta:[10,14],beta_inc:2,betacalcul:2,betainc:2,betaincreg:2,better:[0,14],between:[2,9,11,14],bias:14,bide:14,big:4,big_wig:3,bigwig:[3,6,9],bigwig_to_b:9,bigwiglist:6,bin:[2,3,6,7],bin_around_frac:6,bin_nb:[3,6],bind:14,binom:[2,14],binomi:[2,14],bins_numb:2,bio:[2,14],biolog:14,bla:2,bla_:2,bla_simpl:2,blabla:2,blue:6,bmc:1,bodi:[1,6,9,14],bool:1,bool_exp:2,border:6,border_color:6,both:[6,14],boundari:[2,5],brace:2,brian:2,broad:[2,14],broadli:2,build:[2,9,14],build_best_dict_from_librari:2,build_nodes_for_word:2,build_nodes_for_words_from_ologram_result_df:2,bw_cov_mp:3,bw_coverag:10,bw_list:[3,6],bw_profile_mp:3,bwig:[6,10],by_transcript:[2,5],bypass:14,c_list:2,caagc:16,calcul:[2,14],call:[2,6,7,13,14],callabl:2,can:[0,1,2,4,5,6,7,8,9,10,11,14,16],candid:[2,14],cannot:2,captur:14,care:16,carlo:[2,14],carriag:2,cat:8,catatggtgcgct:2,categori:[8,14],catgcgct:2,cc1313:6,ccc:2,cccccc:6,cccccgttacgtag:[2,16],ccds_id:[2,4,8,11,15],cdna_length:2,cds:15,cds_g0001t001:[4,8],cds_g0001t002:[4,8,11],cds_g0002t001:[8,15],cds_g0003t001:[8,15],cds_g0004t001:[8,15],cds_g0004t002:[8,15],cds_g0005t001:8,cds_g0006t001:8,cds_g0006t002:8,cds_g0007t001:8,cds_g0007t002:8,cds_g0008t001:8,cds_g0009t001:[8,15],cds_g0009t002:[8,15],cds_g0010t001:[2,8],cds_id:2,cea:9,ceil:6,cell:[2,8],chanc:[2,14],chang:[2,6,14],charact:[2,8],character:14,charbonni:14,check:[0,2,6,7,14],check_boolean_expr:2,check_ensembl_format:2,check_file_or_dir_exist:2,check_gene_chr:2,check_negbin_adjust:2,check_r_instal:2,check_r_packag:2,chi:2,child:14,chomosom:2,chomp:2,choos:[6,8],chosen:[0,2],chr10:6,chr11:6,chr12:6,chr13:6,chr14:6,chr15:6,chr16:6,chr17:6,chr18:6,chr19:6,chr1:[0,2,4,5,6,8,11,14,15,16],chr20:6,chr21:[2,6],chr22:[6,15],chr2:[2,5,6],chr3:6,chr4:6,chr5:6,chr6:6,chr7:6,chr8:6,chr9:6,chr:[0,1,2,4,5,6,8,9,11,13,14,15,16],chr_info_fil:2,chr_info_path:2,chr_list:2,chr_str:2,chrm:[1,5,6,14],chrom:[0,1,2,3,5,6,8,11,14,15,16],chrom_fil:2,chrom_info:6,chrom_info_as_dict:2,chrom_info_fil:2,chrom_info_to_bed_fil:2,chrom_len:2,chrominfo:[1,2,5,6,9,11,14],chromosom:[0,1,2,4,5,6,8,9,11,13,14,15,16],chrx:[6,15],chry:15,cite:14,classic:[2,4,6,14],classmethod:2,close:2,close_properli:2,closer:14,closest:[1,5,9],closest_gen:[9,10],cluster:14,cmd:7,cmd_object:7,cmdobject:7,code:[1,2,8,9,14],coder:2,coding_pot:2,codon:15,col:[2,6,15],col_from_tab:[9,10],collaps:1,collect:[2,11],collector:2,color:[6,13],color_ord:6,column:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],com:[2,6,14],combi:[2,14],combi_human_read:2,combi_min:[2,14],combin:[2,9,14,16],come:[10,14],comma:[1,2,4,5,6,8,9,11,13,14,15],command:[0,2,10],comment:[0,2,14],common:[0,2],comp:[9,16],compar:14,compat:[3,10],complement:[2,16],complet:14,complex:[2,7,8,14],compliant:2,compon:2,comput:[2,3,5,6,8,9,11,14],compute_all_intersections_minibatch:2,compute_overlap_stat:2,compute_stats_for_intersect:2,compute_true_intersect:[2,14],computingintersectionparti:2,computingstatscombiparti:2,concaten:[2,14],concentr:14,condit:2,confid:6,configur:6,conjunct:15,conn:9,connect:2,consid:[1,2,14],consist:2,constraint:2,construct:6,consum:[2,14],contain:[0,1,2,3,4,5,6,8,9,11,13,14,15],contfractbeta:2,continu:2,contribut:14,contributor:14,control:[2,3,6,14],control_list:[9,10,11],convent:[1,5,6,10,14,15],converg:[9,10],convers:[2,7,9,10,14],convert:[0,2,3,9,10,14],convert_ensembl:[0,9,10],convert_to_ensembl:2,coordin:[0,2,4,7,9,10,11],copi:11,copyright:2,cor_group:[2,14],core:14,correl:[2,14],correspond:[1,2,5,6,8,14,15,16],cost:14,could:[2,8,14,15],count:[2,3,6,9,10,13,14,15],count_key_valu:[9,10],counteract:2,coupl:14,cov:6,cover:10,coverag:[3,7,9,10,11,14],cpat:8,cpu:[6,14],cramer:[2,14],creat:[2,6,7,8,9,14,16],critic:2,crosstab:2,csv:[2,13,15],current:[0,2,4,5,8,10],current_nod:2,custom:[2,14],da0000:6,dark:6,data:[2,6,8,10,13,14],data_default_factori:2,datafram:[2,13],dataset:[1,2,4,5,6,8,11,13,15,16],datasetnam:2,date:[0,1,4,5,6,8,9,11,13,14,15,16],dbf400:6,debug:[2,6,14],decil:8,declar:[2,7],decomposit:2,dedic:6,deduc:14,deeptool:6,def:7,default_v:2,defin:[0,1,2,4,6,7,9,11,14,15,16],del:16,del_attr:[2,9,10],delect:8,delet:[2,4,6,7,8,9,10,11,13,15,16],delimit:15,demonstr:11,deni:14,depend:[2,14],deplac:14,deplet:14,deriv:14,desc:7,descend:2,describ:1,descript:[0,1,2,4,5,6,8,9,11,13,14,15,16],descriptor:0,deseq:8,design:[0,2,14],desir:2,dest:8,dest_kei:8,destin:[2,8,9],detail:[2,9,10],determin:2,develop:[7,10],deviat:14,dftdftd:2,diagram:[6,13,14],dict:[2,7],dict_learn:[10,14],dictionari:[2,14],dictionarywithindex:2,dictionnari:2,diff:1,differ:[1,2,11],differenti:8,digial:2,dimnish:2,dir:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],direct:[2,5],directli:2,directori:[2,6,7,11,13,14],disabl:[0,14],disappear:5,discard:[0,2,4,13,14],discret:[8,9,14],discretize_kei:[9,10,14],displai:[2,9,14],dissapear:5,dist:11,dist_to_converg:1,dist_to_diverg:1,distanc:[1,7,9,11],distribut:[2,14],distrubut:2,diverg:[9,10,11],divid:2,divis:2,dll:2,dlmf:2,doc:7,document:[2,14],doe:[0,1,2,4,5,6,8,10,11,13,14,15,16],don:[1,2,4,5,11,13,16],done:[6,7,14],donor:[5,9],download:11,downstream:[1,2,3,6,14],dozen:14,dpi:[6,13],draw_profil:6,due:[2,14],duplic:[0,6,13],each:[1,2,5,6,8,9,11,14,15],eas:10,easier:14,easili:10,echo:9,ecolog:6,edit:[6,7,9,10,14],effect:2,eight:0,either:2,element:[2,4,5,6,11,13,14,15],els:[7,14],elsevi:6,emphas:[2,14],emphasi:[2,14],empir:[2,14],empirical_p_v:2,emploi:14,empti:[0,2,4,9,14],encapsul:2,encff112bhn_h3k4me3_chr1:14,encff112bhn_h3k4me3_k562_sub:14,encff119bym_h3k36me3_k562_sub:14,encff431haa_h3k36me3_k562_sub:6,encff742fds_h3k4me3_k562_sub:6,encff947dvy_h3k79me2_k562_sub:6,enclos:[2,9,11,14,15],encod:[2,14],encount:[2,14,15,16],end:[0,2,3,4,5,6,11,15,16],end_str:2,endswith:2,enhanc:[10,14],enough:14,enrich:14,ensembl:[0,2,4,5,9,11,14],ensg00000105483:2,ensg00000107829:11,ensg00000148337:2,ensg00000148339:2,ensg00000153885:2,ensg00000164587:2,ensg56765:16,enst00000284006:2,enst00000331272:11,enst00000338370:6,enst00000372948:2,enst00000373066:2,enst00000373068:2,enst00000373069:2,enst00000377836:6,enst00000378598:6,enst00000401695:2,enst00000407193:2,enst00000430256:2,enst00000437157:6,enst00000445012:2,enst00000457105:11,enst00000462379:6,enst00000466983:2,enst00000469643:6,enst00000469733:6,enst00000470093:11,enst00000472769:2,enst00000482428:11,enst00000489578:11,enst00000511072:6,enst00000517510:2,enst00000519690:2,enst00000520007:2,enst00000526992:15,enst00000587559:2,enst00000587658:2,enst00000589786:2,enst00000624697:6,enst00000634501:2,enst00000634901:2,ensur:14,entri:11,env:[6,7],epigenet:[6,8,14,15],epsilon:2,equal:[2,6,8],equival:2,error:[0,2,6,7,14],error_funct:[2,14],especi:14,essenti:[0,4,9],estim:[2,6],etc:2,eval_numer:2,evalu:[2,14,15],even:[0,1,2,4,5,6,8,11,13,14,15,16],everi:14,everyth:2,exact:[2,14],exactli:2,exampl:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],example_01:6,example_01b:6,example_02:6,example_05:6,example_06:6,example_06b:6,example_07:6,example_08:6,example_13:13,example_gtf:11,example_pa_01:14,example_pa_02:14,example_pa_03:14,example_pa_04:14,except:[0,2],excl:14,exclud:[2,14],exclus:[1,2,8,14],exist:[2,3,4],exit:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],exon:[0,1,2,4,5,6,8,9,11,14,15,16],exon_id:[0,2,4,5,8,11,15],exon_nb:2,exon_nbr:[2,5,11,14],exon_nbr_cat:14,exon_numb:[5,11],exon_numbering_kei:[5,11],exon_s:[9,10],exp:8,expect:[2,8,14],experiment:[15,16],explain:14,explicit:[2,5,16],explicitli:[2,5,16],expos:10,expr:[2,8,14],express:[2,8,9,13,14,15],exprs_class:[8,14],ext:2,extend:[1,2,6,10,14],extens:[2,6],extract:[2,4,5,9,10,13,14,15,16],extract_data:2,extract_data_iter_list:2,f60000:6,f7db00:6,faap20:6,facet:6,facet_col:6,fact:14,factor:[2,6,8,14,15],factoris:2,facult:11,fafa13:4,fai:[2,16],fake:2,fall:2,fals:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],fashion:1,fasta:[9,10,16],fasta_interfac:2,fastasequ:2,fat:4,favor:14,feat:2,feat_id:2,feat_nam:2,feat_name_last:2,feat_siz:[11,15],feat_typ:2,featur:[0,1,2,3,4,5,6,8,9,11,14,15,16],feature_nam:2,feature_s:[9,10,15],feature_typ:16,features_nam:2,feed:2,feel:[4,10],ferr:[2,14],few:14,fewer:14,ff6100:6,ffb500:6,field:[2,4,9],field_count:2,fieldset:2,figur:14,file:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16],file_ext:7,file_or_dir:2,file_out:2,file_with_valu:2,filenam:2,filetyp:7,filter:[10,14],find:[1,2,9,14],find_interesting_combin:[2,14],find_intersect:2,first:[0,2,6,7,11,13,14,15,16],fit:[2,14],fit_algorithm:2,fit_qual:14,fix:[2,8],flag:[1,2,14],flags_matrix:[2,14],flat:9,flatten:2,flatten_list:2,flatten_list_recur:2,flexibl:16,fly:[8,14],focu:6,focus:[2,14],fold:14,folder:[0,1,4,5,6,8,9,11,13,14,15,16],follow:[1,2,3,4,5,6,7,8,9,11,13,14,15,16],fonction:2,foo:[2,16],forc:[2,3,5,14],form:[2,7,14],format:[1,2,3,4,5,6,8,9,10,11,13,14,15,16],format_tab:2,formattedfil:7,formula:2,forward:5,found:[0,2,5,6,7,11,13,14,16],fourth:2,frac:6,fraction:[2,6,15],frame:[0,2,4],free:10,free_gtf_data:2,frequenc:2,frequent:[2,14],from:[2,3,7,9,10],from_list:2,ft_type:[2,4,5,6,11],ftp:11,full:[6,14],fun:7,functool:2,fungi:11,furthermor:14,futhermor:2,futur:[7,14],g0001:[0,1,2,4,5,8,11,13,15,16],g0001_na_g0001t001_chr1:2,g0001t001:[0,1,2,4,5,8,11,13,15,16],g0001t001e001:[0,4,15],g0001t002:[0,1,2,4,5,8,11,13,15,16],g0001t002e001:[0,2,4,11,15],g0002:[0,1,2,4,5,8,11,13,15],g0002t001:[0,1,2,4,5,8,11,13,15,16],g0002t001e001:[0,15],g0003:[0,1,4,5,8,11,13,15],g0003t001:[0,1,2,4,5,8,11,13,15,16],g0003t001e001:[0,5,15],g0003t001e002:[0,5,15],g0004:[0,1,2,4,5,8,11,13,15],g0004_na_g0004t001_chr1:2,g0004t001:[0,1,2,4,5,8,11,13,15],g0004t001e001:[0,5,15],g0004t001e002:[0,5,15],g0004t001e003:[5,15],g0004t002:[0,1,2,4,5,8,11,13,15],g0004t002e001:[0,5,15],g0004t002e002:[0,5,15],g0004t002e003:[0,5,15],g0005:[1,2,4,8,11,13,15],g0005t001:[1,2,4,8,11,13,15],g0005t001e001:15,g0005t001e002:15,g0006:[1,2,4,8,11,13,15],g0006t001:[1,2,4,5,8,11,13,15],g0006t001e001:15,g0006t001e002:15,g0006t001e003:15,g0006t002:[1,2,4,8,11,13,15],g0006t002e001:15,g0006t002e002:15,g0007:[1,4,8,11,13,15],g0007t001:[1,2,4,8,11,13,15],g0007t001e001:15,g0007t002:[1,4,8,13,15],g0007t002e001:15,g0008:[1,4,8,11,13,15],g0008_na_g0008t001_chr1:2,g0008t001:[1,2,4,8,13,15],g0008t001e001:15,g0008t001e002:15,g0009:[1,4,8,11,13,15],g0009t001:[1,2,4,5,8,13,15],g0009t001e001:15,g0009t002:[1,4,5,8,13,15],g0009t002e001:15,g0010:[1,4,8,11,13,15],g0010t001:[1,2,4,8,13,15],g0010t001e001:15,g00:2,g1t1:2,gain:2,gamma:2,garbag:2,gc_off:2,gen:2,gene:[0,1,2,3,4,5,6,8,9,10,11,13,14,15,16],gene_biotyp:14,gene_id:[0,1,2,4,5,8,11,13,15,16],gene_nam:[1,2,4,6,8,14,15],genelist:11,gener:[0,2,8,13,14,15,16],generate_candidate_word:2,genic:[2,5],genom:[0,1,2,5,6,9,11,14,15,16],genome_fa:2,gerard:2,get:[0,2,4,5,6,11,14,15,16],get_3p_end:2,get_5p_3p_coord:[9,10],get_5p_end:2,get_al:2,get_all_candidates_except:2,get_attr_list:[2,9,10],get_attr_nam:2,get_attr_valu:2,get_attr_value_list:[2,9,10],get_ceas_record:9,get_chrom:2,get_exampl:[0,1,4,5,6,8,9,10,13,14,15,16],get_example_featur:2,get_example_fil:2,get_feat_seq:[9,10],get_feature_list:[2,9,10],get_feature_s:2,get_gn_id:2,get_gn_strand:2,get_gn_to_tx:2,get_gname_to_tx:2,get_intergen:2,get_intron:2,get_midpoint:2,get_sequ:2,get_simple_concaten:2,get_transcript_s:2,get_tss:2,get_tt:2,get_tx_id:2,get_tx_seq:[9,10],get_tx_strand:2,get_tx_to_gn:2,get_tx_to_gnam:2,gff2:10,gff3:10,ggccttatta:16,github:[10,12,14],give:[2,5,8,14],given:[1,2,6,14,15],global_result:2,gmail:[2,14],gn_2_tx:2,gn_feat:2,gn_id:2,gn_tx_id:2,gn_val:2,gnu:2,go_id:15,goal:[2,14],good:[2,14],gough:2,gov:2,gpl:2,grai:6,graph:[2,14],graphic:[2,6],grcm38:16,great:9,great_reg_domain:9,greater:2,greedi:[2,14],greeedi:2,grei:6,groomer:0,group:[2,6,7,14],gtec:2,gtf:[1,4,5,6,7,8,9,10,11,13,14,15,16],gtf_bla:2,gtf_data:2,gtf_interfac:[2,7],gtfk:2,gtftk:[0,1,4,5,6,7,8,10,11,13,14,15,16],gtftkerror:2,gtftkinteractiveerror:2,guillaum:14,h3k36me3:[6,14],h3k36me3_ologram_stat:14,h3k4me3:[6,14],h3k4me3_ologram_stat:14,h3k79me2:[6,14],h3k79me2_ologram_stat:14,h3k79me:6,handl:[0,9,10],handler:2,happen:5,hard:14,has:[1,2,8],has_head:2,hase:13,have:[2,5,6,8,11,12,13,14,15],head:[0,1,2,4,5,6,8,11,15,16],head_fil:2,header:[1,2,4,5,8,11,13,16],heatmap:[9,14],height:[6,13,14],help:[0,1,4,5,6,7,8,10,11,13,14,15,16],henc:2,here:[1,2,6,14,15],hes2:6,hg19:[1,5,6,14],hg38:[1,5,6,14],hg38_chr1:[1,11,14],hide:11,hide_undef:2,higher:[2,6],highest:[2,3,6,9,14,15],highlight:14,histogram:2,hold:2,homo_sapien:11,hour:14,how:[2,6,8,14],howev:14,hsapien:15,html:2,http:[2,6,14,15],http_proxi:15,https_proxi:15,hub:2,human:[14,15],hun:15,hundr:[2,10,14],hypothesi:[2,14],identifi:[0,1,2,14,16],ids:[2,4,6,13],idx:11,ignor:[0,14],iii:14,imag:[6,13,14],img:[6,13],impact:14,implement:[5,6,7,9,14],improv:[2,14],in_bed_fil:3,inch:[6,13,14],incl:14,includ:[9,11,13,14,15,16],incomplet:2,increas:[2,9,14],independ:[2,14],independantli:[2,14],index:[2,16],indic:[2,3,8,13,14,16],individu:2,inexact:[2,14],infer:14,infil:[2,13],info:[1,2,5,6,7,8,9,11,13,14,16],inform:[1,2,4,5,7,8,9,10,14],input:[1,2,6,9,14],input_fil:2,input_obj:2,inputfil:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],insid:[6,14],instal:[2,10,11],instanc:[0,2,10],instead:[0,1,2,14],intact:11,integr:2,intens:14,intent:10,inter:[2,14],interact:14,interescet:2,interesect:14,interest:[1,2,6,7,11,14,15],interesting_combi:[2,14],interfac:[2,10],intergen:[2,6,9,10],interpret:7,interrog:11,intersect:[9,10,14],intersections_for_this_combi:2,interv:[2,6,8],intron:[1,2,6,9,10,11,15,16],intron_by_tx:6,intron_nb_in_nam:2,intron_s:[9,10,15],invert:[1,5,8,13,15],invert_match:2,irrespect:5,is_com:2,is_defin:2,is_empti:2,is_exon:2,is_fasta_head:2,is_set:2,isclos:2,isdir:2,isg15:6,ish:2,issu:[0,8],item:[2,14],itemset:[2,14],iter:2,iter_as_list:2,iterate_with_head:2,itermax:2,its:[0,1,2,6,8,14,15],itself:2,jitter:13,job:2,join:[2,8,9,11],join_attr:[9,10,14,15],join_fil:[2,8],join_mat:[2,8,11,15],join_mat_2:8,join_mat_3:8,join_multi_fil:[9,10],jungman:2,just:[1,2,3,7],keep:[0,1,4,5,6,8,9,11,13,14,15,16],kei:[0,1,2,4,5,6,8,9,11,14,15,16],kept:[7,15],key_nam:[1,6,11],key_valu:2,keyword:[9,11],kind:11,know:14,known:14,lab:6,label:[3,6,8,9,14,16],lack:0,lambda:2,lambdat:2,laptop:14,larg:[2,5,6,13,14],lasso:2,lasso_cd:2,last:[2,6],later:[2,8,15],latest:11,latter:14,learn:[2,14],learn_dictionary_and_encod:2,least:[2,14],leav:11,left:[2,8],leftmost:2,legendr:6,len:2,length:[2,14,15],lentz:2,lepoivr:1,less:14,let:[8,11],level:[2,7,8,11],li1:2,li2:2,lib:6,libgtftk:[8,10],librari:[2,10],licens:2,light:6,like:[2,4,8,14],limit:[4,6,11,14],lincrna:[6,14],line:[0,1,4,6,9,10,13,14,15,16],line_width:6,linedraw:6,list1:2,list2:2,list:[1,2,3,4,5,6,7,8,11,13,14,15],list_of_all_paths_to_more_b:14,lncrna:14,load:[7,8,14],loc:15,locat:[0,2,5,6,9,14,15],log2:[6,13,14],log:[2,6,8,13,14,15],logger:[0,1,4,5,6,8,9,11,13,14,15,16],logger_fil:[0,1,4,5,6,7,8,11,13,14,15,16],longer:[2,14],longest:[9,15],longuest:2,look:[1,6,11,12,14],lot:[4,9,14],low:14,lower:2,lr1:2,lr2:2,lr3:2,luat:1,made:2,magnifi:14,mai:[0,1,2,4,5,6,7,8,9,11,13,14],main:[2,7,10,14],make:[2,4,7,14],make_outdir_and_fil:2,make_pars:7,make_tmp_dir:2,make_tmp_fil:[2,7],make_tmp_file_pool:3,malform:2,manag:7,mandatori:7,manhattan:2,mani:[2,6,14],manual:14,manual_print_word:2,map:2,mark:[6,8,14],markov:[2,14],master:14,match:[1,2,8,9,13,15],math:2,mathemat:2,matplotlib:6,matric:14,matrix:[2,6,8,9,14],matrix_fil:8,matur:[2,9,11,15,16],mature_rna:[9,11,15],max:[2,6,14,15],max_exon_numb:15,max_siz:15,maximum:[2,14,15],mayb:7,mcam:15,mean:[0,2,3,5,6,14],meaning:[2,14],meantim:14,median:[2,6],median_comp:2,melt:6,merg:[2,5,8,9,14],merge_attr:[2,9,10],merge_bed_by_strand:9,merge_ologram_stat:14,merge_ologram_stats_01:14,merged_batches_result:14,messag:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],metavar:7,metazoa:11,method:[2,6],midpoint:[2,9,10],might:[2,14],mimic:4,min:[2,6,15],min_exon_numb:15,min_siz:15,mind:14,mine:[2,14],mini_r:[1,2,6,8,11,13,14,15],mini_real_10m:11,mini_real_control_1:13,mini_real_counts_encff630hex:[8,13,14],mini_real_en:11,mini_real_noov_rnd_tx:[6,11],mini_real_promot:6,mini_real_tx:6,minibatch:[2,14],minibatch_len:2,minibatch_nb:[2,14],minibatch_s:[2,14],minibatchdictionarylearn:2,minim:6,minimum:[2,15],minu:[2,16],mir34ahg:6,miscellan:[7,9,10],miss:3,mk_matrix:[9,10,15],mkdir_p:2,mm10:[1,5,6,14],mm8:[1,5,6,14],mm9:[1,5,6,14],moc:14,mode:[2,3,5,7],model:[2,14],modifi:2,modl:[9,10,14],modl_subroutin:2,modl_supp_mat:14,modul:[7,10,14],mold:2,moment:[2,11],monc:14,monoexon:15,monoton:2,mont:[2,14],more:[2,4,5,7,9,11,13,14],more_b:14,more_bed_label:14,more_kei:14,more_nam:[2,4,5],most:[0,1,2,5,8,9,11,14,15],mostli:[2,14],move:5,mpmath:2,msg:2,much:14,multi:3,multipl:[2,9,10],multiple_overlap_custom_combi:[2,14],multiple_overlap_max_number_of_combin:[2,14],multiple_overlap_target_combi_s:[2,14],multiple_overlap_trivial_ologram_stat:14,multipli:14,multiprocess:[2,14],multiproess:2,multithread:14,must:[0,2,14],mutipl:[8,9],mutual:[1,8],my_command:7,my_fil:2,my_file_h:2,mycalc:2,myintersect:2,n_atom:2,n_highest:[3,6],n_iter:2,n_job:2,n_run:14,n_word:2,na_omit:[2,15],name:[1,2,3,4,5,6,7,8,9,10,11,13,14,15,16],name_column:6,nb_column:2,nb_ex:2,nb_exon:[2,9,10,15],nb_intersections_expectation_shuffl:14,nb_intersections_log2_fold_chang:14,nb_intersections_negbinom_fit_qu:14,nb_intersections_pvalu:14,nb_intersections_tru:14,nb_intersections_variance_shuffl:14,nb_line:2,nb_neighbor:1,nb_proc:[3,6],nb_thread:[2,14],nb_transcript:[9,10],nb_tx:11,nb_window:6,nbinom:2,ncol:2,need:[0,1,2,6,14],neg:[2,5,14],negbin_fit:10,negbin_pv:2,neighbor:1,neighborhood:1,nested_dict:2,nevertheless:14,new_data:2,new_kei:[2,8],new_key_valu:2,newlin:2,next:[2,7,8],nflag:[2,14],nipy_spectr:6,nist:2,nm123:2,nm_334567:0,no_dupl:2,no_error:2,no_na:2,node:[2,14],nofit:2,nois:[2,14],noisi:14,non:[0,2,4,8,9,11,13],none:[0,1,2,3,4,5,6,7,8,11,13,14,15,16],normal:[0,1,2,4,5,6,8,11,13,14,15,16],normalize_word:[2,14],notabl:14,note:[0,1,2,4,5,6,7,8,11,13,14,15,16],noth:14,novel:[2,8,11,14],novel_:8,novel_g0001t002:8,now:[2,6,7,14],nrow:2,nst:6,nucleotid:[5,15],number:[2,3,4,5,6,8,9,11,14,15,16],number_of_set:[2,14],numer:[2,6,8,9,11,15],numpi:[2,6,14],nuniqu:2,obfusc:14,obj:2,object:[2,7,14],obs:2,obscur:14,observ:[2,9,11],obtain:[2,6,14],occordingli:2,occur:[2,14],odd:2,often:14,okai:2,old:0,ologram:[2,9,10,11],ologram_1:[11,14],ologram_2:[11,14],ologram_merge_run:[9,10],ologram_merge_stat:[9,10],ologram_modl_treeifi:[9,10],ologram_output:14,ologram_supp_mat:14,omit:15,onc:14,one:[1,2,4,6,8,9,14,15,16],one_bas:2,ones:14,onli:[0,1,2,4,5,6,7,9,11,14,15],ontolog:[9,15],open:2,oper:[0,2,14],operand:2,opposit:14,optim:2,option:[0,1,2,4,5,6,8,11,13,14,15,16],order:[2,3,6,14,15],ordered_5p:2,ordereddict:2,ordinari:14,org:2,orient:[1,6,11,16],origin:[1,2,14],other:[1,2,11,13,14],otherwis:[2,5,6,11,14],our:[2,14],out:[6,13],out_dir:2,out_fil:2,outfil:2,outlist:2,outlook:14,output:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],output_path:2,output_visu:2,outputdir:14,outputfil:[0,1,2,3,4,5,6,7,8,9,11,13,15,16],outputfilenam:2,outsid:[5,14],over:[2,3],overap_stats_shuffl:2,overlai:6,overlap:[2,5,9,10,15],overlap_promoter_u0:1,overlap_stats_comput:[10,14],overlap_stats_shuffl:10,overlapping_:1,overrid:14,own:[6,10,14],p_valu:14,packag:[6,10],paco:11,page:[6,12,13],page_height:[6,13],page_width:[6,13],pair:[2,11,14],pairwis:14,paiwis:14,palett:6,panda:[2,6],paper:14,param:2,paramet:[2,3,14],parent:[2,14],parse_arg:7,parser:[7,10],parser_grp:7,part:2,partial:[2,5],particular:[2,8,14,15],pass:[0,1,2,4,5,6,7,8,11,13,14,15,16],path:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],path_to_your_queri:14,patient:[6,13],pdf:[6,13,14],pdf_file_alt:14,pdf_height:14,pdf_width:14,peak:[2,14],peak_fil:14,per:[2,6,9,11,14,15],percentil:[8,14],perform:[0,2,6,11,14],perhap:14,phase:2,pick:2,pipe:10,place:7,plant:11,plch2:6,pleas:[2,6,12,14],plot:[6,13,14],plotnin:6,plotnineerror:6,plu:[2,5,14],plugin:[2,7,9,10,14],png:[6,13],point:[2,13,14],poor:[4,14],pos:2,posit:[2,5],possibl:[2,7,14],post:10,potenti:[2,8],practic:14,prdm16:6,pre:14,preced:2,precis:[2,8,14],pref:[2,3],prefer:14,prefix:[2,3,8,9,15],prepar:[6,13],present:[2,14],prevent:2,previou:14,previous:14,principl:2,print:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],print_gtf:7,probabl:[0,14],problem:[2,10,14],proc:6,process:[0,1,2,3,4,5,6,8,11,13,14,15,16],processed_transcript:14,produc:[1,2,6,9,14,16],product:2,profil:[3,8,9,10],profile_color:6,profile_matrix__pygtftk_7ihvich5:6,profile_matrix__pygtftk_8dfns9ml:6,profile_matrix__pygtftk_onenfmua:6,profile_prom:6,profile_tx:6,program:[0,6,7,9,10,14],progress:14,promot:[1,3,6,9,11,14],properli:2,proport:2,propos:7,protein:14,protein_cod:[6,14],protist:11,provid:[0,1,2,4,6,7,8,9,11,13,14],proxi:15,pseudo:[3,6,13],pseudo_count:[3,6,13],ptr:2,pull:7,pure:2,purpos:11,put:[2,14],puthier:[6,14],pval:2,pybedtool:[2,14],pygtftk:[7,14],pygtftk_37:6,python3:6,python:[7,10,14],qferr:14,quad:2,quadrat:14,qualiti:[2,14],quantil:6,quentin:[2,14],queri:[2,14],queried_words_nb:2,query_label:14,query_nam:2,question:2,quick:2,quiet:11,quot:16,r_pkg_list:2,rais:[0,2],ram:14,random:[2,9,14,15],random_list:[9,10,11],random_se:2,random_str:2,random_tx:[9,10],randomli:[2,6,9,14,15],rang:[2,6],rarer:[2,14],rarest:14,rate:2,rather:[14,16],ratio:14,read:[2,6,7,14],readm:12,real:[2,14],realis:14,realiti:14,realli:14,reason:14,rebuild:[2,14],rebuilt:14,rebuilt_data:2,rec:2,recalcul:14,recommend:14,reconstruct:[2,14],record:[2,16],recurs:2,recursive_print_word:2,red:6,redond:[2,4,8,11,13,16],reduc:[2,14],reduct:14,redund:[2,5,14],refer:[0,1,2,4,6,7,9,13,14],refseq:0,reg:8,regard:2,regener:4,regexp:[2,8,9,15],region:[1,2,3,5,6,9,14,15,16],region_10:5,region_1:5,region_2:5,region_3:5,region_4:5,region_5:5,region_6:5,region_7:5,region_8:5,region_9:5,region_fil:3,regist:14,regul:[1,14],regular:[2,7,8,15],regulatori:14,rel:[1,2,5,6],relat:[9,11,14],releas:11,relev:[2,14],reli:[9,10],reliabl:14,rememb:[2,14],remind:2,remov:[2,14],repartit:14,replac:2,report:14,repres:[2,9,14,15],represent:[2,9,14],request:[2,4,5,6,7,9,11,14,15],requir:[0,1,2,4,5,6,8,10,11,13,14,15,16],res_a:2,res_b:2,resist:14,resp:2,respect:[2,11,14],restrict:[2,14],result:[2,6,9,14],result_df:2,result_queu:2,retriev:[2,9,10,14],reus:14,rev:16,rev_comp:2,revers:[1,2,16],right:2,rise:5,rm_dup_tss:[9,10],rmtree:2,rn3:[1,5,6,14],rn4:[1,5,6,14],rna:[2,8,11,16],rnd_alpha_numeric_str:2,role:14,root:[2,14],root_nod:2,round:2,routin:2,row:[2,6,8],rug:13,rule:[2,9,14],run:[2,3,7,9,14],rvs:2,s1_d:8,sai:[2,8],said:2,same:[0,1,2,5,6,7,9,11,14,15],samemost:15,sampl:[2,8],save:[6,13,14],scale:8,scientif:2,scikit:2,scipi:2,score:[0,2,3,5,11,14],script:[7,9],seaborn:6,search:[1,6,9,11,13,14],second:[2,6,13],section:[2,10,12],see:[0,2,4,5,6,7,8,9,11,13,14,15,16],seed:[2,14,15],seen:[2,14],segfault:8,seldom:14,select:[0,1,2,4,6,7,9,10,11,13,14],select_5p_transcript:2,select_best_words_from_librari:2,select_by_go:[9,10],select_by_intron_s:[9,10],select_by_kei:[0,1,2,4,8,9,10,11,13,14],select_by_loc:[2,9,10],select_by_max_exon_nb:[2,9,10],select_by_nb_exon:[9,10],select_by_number_of_exon:2,select_by_numeric_valu:[9,10],select_by_posit:2,select_by_regexp:[2,9,10],select_by_transcript_s:2,select_by_tx_s:[9,10],select_longuest_transcript:2,select_most_5p_tx:[9,10],select_shortest_transcript:2,self:2,send:[2,7],send_error:2,sens:14,sep:[2,4,5,11,13,15,16],separ:[1,2,4,5,6,8,9,11,13,14,15,16],seq:8,seqid:[0,2,4,8,11,15,16],seqid_list:[9,10],seqnam:15,seqrecord:2,sequenc:[0,2,7,9,10],server:9,set:[0,1,2,3,4,5,6,8,9,10,11,13,14,15,16],set_attr:2,set_color:13,set_na:2,sever:[0,2,5,6,8,9,10,14,15],share:[7,9,11,15],sheer:14,shift:[9,10],shift_valu:5,short_long:[9,10],shortcut:[4,15],shorten:14,shortest:[2,9,15],shortnam:3,should:[0,2,3,4,5,6,7,9,11,13,14,15],show:[0,1,4,5,6,8,9,11,13,14,15,16],shown:14,shuffl:[2,14],shutdown:2,shutil:2,side:2,signal:[6,13],signatur:[2,14],signfic:14,signific:[2,14],silent:[2,14],silentremov:2,simpl:[1,2,4,5,6,8,9,11,13,14,15,16],simple_02:[2,11],simple_03:[2,11],simple_04:[2,11],simple_05:11,simple_06:11,simple_07:[11,14],simple_07_peak:14,simple_07_peaks_1:14,simple_07_peaks_2:14,simple_line_count:2,simple_nb_column:2,simpli:[2,4,5,6,14],sinc:[0,2,14],singl:[2,5,6,14,15],single_nuc:6,site:[5,6,9,14],size:[1,2,5,6,8,9,11,14,15],size_dict:2,skip:[2,13,15],sklearn:2,sleuth:16,small:14,smaller:2,smother:[2,14],snakemak:14,sniff:4,solut:[0,2],solv:2,some:[2,9,14,15],sometim:4,sort:[2,8,14],sort_2_list:2,sourc:[0,2,4,8,15],space:[2,8],spars:2,sparsiti:[2,14],speci:[11,15],special:4,specif:[2,5,6,14],specifi:[2,6,8,11,14],splice:[2,5,9,15],splicing_sit:[9,10],split:[2,3,6],split_char:2,sqlite:9,sqrt:2,squar:[2,14],squish:[2,14],squish_matrix:2,src:8,src_kei:8,ss5:2,stabl:2,stackoverflow:2,stand:7,standard:[6,11,14],start:[0,2,3,4,5,6,11,14,15,16],start_codon:[14,15],start_str:2,start_tim:2,startswith:2,stat:[3,6,10,14],statist:[2,6,9,11,14,15],stats_multiple_overlap:2,stats_singl:2,statu:2,stderr:2,stdin:[0,1,4,5,6,7,8,9,11,13,14,15,16],stdout:[0,1,2,4,5,6,7,8,9,11,13,15,16],step:[2,6,14],step_1_factor_allow:[2,14],still:[4,10,14,16],stop:2,stop_condit:2,stop_tim:2,store:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],str:2,str_len:2,strand:[0,1,2,3,5,6,9,16],stranded:1,stream:11,strict:5,string:[0,2,3,8,11],strip:6,strip_text:6,stroke:[2,14],structur:[5,14],studi:14,sub:[9,14],subcommand:[0,10],subdirectori:14,subgenom:14,submodular:2,subroutin:10,subsampl:14,subsect:14,subsequ:[2,8,15],subset:[2,6],subset_bwig:6,success:2,suf:[2,3],suffix:[2,3,8,9],suit:0,suitabl:8,sum:[2,3,6,14,15],summed_bp_overlaps_expectation_shuffl:14,summed_bp_overlaps_log2_fold_chang:14,summed_bp_overlaps_negbinom_fit_qu:14,summed_bp_overlaps_pvalu:14,summed_bp_overlaps_tru:14,summed_bp_overlaps_variance_shuffl:14,superbatch:[9,14],supervis:2,supplementari:2,suppli:14,support:[2,4,10,14],synonym:15,synopsi:9,syntax:[6,7],sys:7,system:9,t00:2,taatt:16,tab:[10,13,15],tab_interfac:2,tab_lin:2,tabl:14,tabul:[1,2,5,6,8,9,10,11,13,14,15],tag:14,tail:2,take:[2,6,9,15,16],taken:[2,11],target:[2,5,7,8,9,11,14],target_featur:8,tell:14,temp:6,templat:7,temporari:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],tend:[2,14],term:[0,2],termin:[0,1,4,5,6,8,11,13,14,15,16],test:[2,6,10,14,15],test_combi:14,test_data_for_modl:[2,14],test_gtftk_mkdir_p:2,text:[1,2,6,8,11,14],than:[2,10,14],thank:[4,14],thei:[0,2,4,11,14,15],them:[1,2,4,14],theme:6,themselv:14,thi:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],thing:[6,7],think:[0,6,13],third:11,this_combi_onli:2,thiswasthenameofthequeri:14,those:[0,1,2,8,9,11,14],though:14,thousand:[2,14],thread:[2,3,6,14],three:14,through:[2,6,9,10,14],throughout:2,thrown:8,thu:[0,6,7,10,11,14],ties:[2,15],time:[2,6,7,11,14,16],tiny_r:[11,15],tinyurl:6,titl:[6,14],tmem52:6,tmp:[0,1,2,3,4,5,6,8,9,11,13,14,15,16],tmp_dir:[0,1,4,5,6,7,8,11,13,14,15,16],tmp_file:[2,3],tmp_file_list:2,to_alphanum:2,to_b:2,to_list:2,tobe:14,todo:8,token:2,too:[2,14],toolbox:9,toolkit:10,top:[5,14],total:[2,5,14],toward:14,tp_dir:2,trancript:11,transact:[2,14],transcript:[0,1,2,4,5,6,8,9,11,13,14,15,16],transcript_as_bioseq_record:2,transcript_fil:6,transcript_id:[0,1,2,4,5,6,8,11,13,15,16],transcript_id_1:11,transcript_id_2:11,transfer:10,transform:[6,8,13],transform_algorithm:2,transform_alpha:2,translat:[2,8],transpos:[5,9],treat:[9,14],tree:[6,9,10,14],treeifi:14,tri:13,true_intersect:[2,14],tss:[1,2,3,5,6,9,11,14,15],tss_dist:[9,10],tss_num:11,tss_num_1:11,tss_num_2:11,tss_number:9,tsss:[2,11],tsv:[8,9,11,13,14],tsv_file_path:14,tts:[1,6,9,14],ttss:2,tupl:[2,13],turn:14,twice:2,two:[1,2,5,6,8,11,13,14],tx_1:2,tx_2:2,tx_class:6,tx_genomic_length:2,tx_id:2,tx_kept:2,tx_n:2,txgn_id:8,txt:[1,2,4,6,11,13,14],type:[0,1,2,3,4,5,6,7,8,9,11,14,15,16],typic:2,u_df:2,ucsc:0,ultim:5,ultimatli:5,unambigu:16,unassign:2,uncompress:6,undef:4,undefin:[2,4,6],under:[2,14],uniq:[8,11],uniqu:[2,4,6,13,14,16],unitari:10,univ:14,unix:10,unknown:[2,4],unlik:[2,14],unlikelihood:14,unset:[2,4],unstrand:1,unwant:13,unwrap:[1,14],updat:[2,7],upon:[2,5,7,10,11],upon_non:2,upper:6,upper_limit:6,upstream:[1,2,6,14],upward:14,usag:[0,1,4,5,6,8,9,11,13,14,15,16],use:[0,1,2,4,5,6,7,8,11,13,14,16],use_log:2,use_markov_shuffl:2,used:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],useful:[1,2,6,9,14],user:[6,7,9,11,13,14],user_img_fil:[6,13],user_region:6,uses:2,using:[0,2,5,6,7,8,9,10,14,15,16],usr:7,usual:[2,14],util:[3,7,10,14],utmost:14,utr:[9,14,15,16],v_df:2,val:[2,14],val_tx_1:2,val_tx_2:2,val_tx_n:2,valu:[0,1,2,3,4,5,6,8,9,11,13,14,15],vari:2,variabl:[2,6],varianc:2,variat:2,variou:[2,4,9],vector:2,verbos:[0,1,2,3,4,5,6,7,8,9,11,13,14,15,16],veri:[5,7,11,14],version:[0,1,2,4,5,6,8,9,11,13,14,15,16],vertebr:11,vicugna:11,vicugna_paco:11,view:0,violet:6,visual:14,visualis:[2,14],wai:[2,3,6],want:[2,6,7,8,14,16],warn:[2,4,6,7,10,14],watson:5,well:[2,14],were:[2,6,13],wether:2,what:[0,1],whatev:2,when:[0,1,2,4,6,7,8,11,14,16],whenev:0,where:[2,6,14],whether:[1,2,3,6,7,14,16],which:[0,2,3,4,5,6,8,11,14],which_col:2,which_combis_to_get_from:2,whichev:2,whitespac:14,whole:[1,4,6],whose:[1,2,15],wide:[0,10],width:[6,13,14],wildcard:16,window:6,wise:[0,1,2,4,5,6,7,8,9,11,13,14,15,16],with_intron:2,within:14,without:[2,5,6,11,14,15],wl3dj2_n76zfc8sdvny1q06c0000gn:6,won:[2,16],wont:2,word:[2,14],work:[0,2,14],workflow:14,wors:2,would:[1,2,4,5,7,14],wrap73:6,wrapper:2,write:[0,1,2,3,4,5,6,8,10,11,13,14,15,16],write_b:2,write_bed_3p_end:2,write_bed_5p_end:2,write_gtf_to_bed6:2,write_message_to_fil:[0,1,4,5,6,8,11,13,14,15,16],write_properli:2,written:[2,5,10],x_lab:6,x_rebuilt:[2,14],x_squish:2,x_true:[2,14],ydacyfxx:6,you:[0,2,4,5,6,7,8,9,14,16],your:[0,6,9,10,14],zero:[2,3,6],zero_bas:2,zero_to_na:3,zip:6},titles:["Warning about supported GTF file formats","Commands from section \u2018annotation\u2019","Help on Python package (pygtftk)","The pygtftk.bwig.bw_coverage module","Commands from section \u2018conversion\u2019","Commands from section \u2018coordinates\u2019","Commands from section \u2018coverage\u2019","Writing your own commands","Commands from section \u2018Editing\u2019","Help on gtftk Unix commands","Welcome to pygtftk documentation page","Commands from section \u2018information\u2019","Installing pygtftk/gtftk","Commands from section \u2018miscellaneous\u2019","Commands from section \u2018ologram\u2019","Commands from section \u2018selection\u2019","Commands from section \u2018sequence\u2019"],titleterms:{"class":2,The:[2,3],about:0,activ:9,add_exon_nb:11,add_prefix:8,annot:1,apropo:11,argument:9,bash:9,bed_to_gtf:4,beta:2,bw_coverag:3,bwig:3,closest_gen:1,col_from_tab:13,command:[1,4,5,6,7,8,9,11,13,14,15,16],complet:9,content:10,control_list:13,convent:0,converg:1,convers:4,convert:4,convert_ensembl:4,coordin:5,count:11,count_key_valu:11,coverag:6,del_attr:8,detail:14,dict_learn:2,discretize_kei:8,diverg:1,document:10,edit:8,exon_s:1,fasta:2,feature_s:11,file:0,format:0,from:[1,4,5,6,8,11,13,14,15,16],funtion:9,get:9,get_5p_3p_coord:5,get_attr_list:11,get_attr_value_list:11,get_exampl:11,get_feat_seq:16,get_feature_list:11,get_tx_seq:16,gtf:[0,2],gtftk:[2,9,12],help:[2,9],inform:11,instal:12,intergen:5,intersect:2,intron:5,intron_s:1,join_attr:8,join_multi_fil:8,line:2,list:9,main:9,merge_attr:8,midpoint:5,miscellan:13,mk_matrix:6,modl:2,modul:[2,3],multipl:14,name:0,nb_exon:11,nb_transcript:11,negbin_fit:2,ologram:14,ologram_merge_run:14,ologram_merge_stat:14,ologram_modl_treeifi:14,overlap:[1,14],overlap_stats_comput:2,overlap_stats_shuffl:2,own:7,packag:2,page:10,parser:9,profil:6,pygtftk:[2,3,10,12],python:2,random_list:15,random_tx:15,retriev:11,rm_dup_tss:15,section:[1,4,5,6,8,11,13,14,15,16],select:15,select_by_go:15,select_by_intron_s:15,select_by_kei:15,select_by_loc:15,select_by_max_exon_nb:15,select_by_nb_exon:15,select_by_numeric_valu:15,select_by_regexp:15,select_by_tx_s:15,select_most_5p_tx:15,seqid_list:11,sequenc:16,shift:5,short_long:15,splicing_sit:5,stat:2,subroutin:2,support:0,tab:2,tabl:10,tabul:4,test:9,tree:2,tss_dist:11,unix:9,util:2,warn:0,welcom:10,wide:9,write:7,your:7}}) \ No newline at end of file diff --git a/docs/selection.html b/docs/selection.html index b64b59da..f7ec03da 100644 --- a/docs/selection.html +++ b/docs/selection.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘selection’ — gtftk 1.2.1 documentation + Commands from section ‘selection’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -447,8 +447,8 @@

    random_list
    $ gtftk random_list -n 3 -i simple.gtf | gtftk count
     transcript	3
    -exon	5
    -CDS	4
    +exon	4
    +CDS	3
     

    Arguments:

    @@ -493,7 +493,7 @@

    random_tx previous | - + diff --git a/docs/sequence.html b/docs/sequence.html index 03905f88..1ad6004a 100644 --- a/docs/sequence.html +++ b/docs/sequence.html @@ -16,7 +16,7 @@ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); - Commands from section ‘sequence’ — gtftk 1.2.1 documentation + Commands from section ‘sequence’ — gtftk 1.2.3 documentation @@ -45,7 +45,7 @@

    Navigation

  • previous |
  • - + @@ -236,13 +236,13 @@

    Navigation

  • previous |
  • - + diff --git a/docs/source/_static/example_01.png b/docs/source/_static/example_01.png index 9d6fe4a4..0104648b 100644 Binary files a/docs/source/_static/example_01.png and b/docs/source/_static/example_01.png differ diff --git a/docs/source/_static/example_01b.png b/docs/source/_static/example_01b.png index cf809c40..1c5660bb 100644 Binary files a/docs/source/_static/example_01b.png and b/docs/source/_static/example_01b.png differ diff --git a/docs/source/_static/example_02.png b/docs/source/_static/example_02.png index 8439c50a..f15653b3 100644 Binary files a/docs/source/_static/example_02.png and b/docs/source/_static/example_02.png differ diff --git a/docs/source/_static/example_05.png b/docs/source/_static/example_05.png index 2937ac10..4702bf91 100644 Binary files a/docs/source/_static/example_05.png and b/docs/source/_static/example_05.png differ diff --git a/docs/source/_static/example_06.png b/docs/source/_static/example_06.png index 79f3e737..713d6c11 100644 Binary files a/docs/source/_static/example_06.png and b/docs/source/_static/example_06.png differ diff --git a/docs/source/_static/example_06b.png b/docs/source/_static/example_06b.png index 3cbeb7c0..ecfe0e0b 100644 Binary files a/docs/source/_static/example_06b.png and b/docs/source/_static/example_06b.png differ diff --git a/docs/source/_static/example_07.png b/docs/source/_static/example_07.png index aaefb731..6c312dd1 100644 Binary files a/docs/source/_static/example_07.png and b/docs/source/_static/example_07.png differ diff --git a/docs/source/_static/example_08.png b/docs/source/_static/example_08.png index 861d1066..952b4eaf 100644 Binary files a/docs/source/_static/example_08.png and b/docs/source/_static/example_08.png differ diff --git a/docs/source/_static/example_13.png b/docs/source/_static/example_13.png index 1fde3047..c5c7d0c2 100644 Binary files a/docs/source/_static/example_13.png and b/docs/source/_static/example_13.png differ diff --git a/docs/source/_static/example_pa_01.pdf b/docs/source/_static/example_pa_01.pdf index 54ea2927..bd4f566c 100644 Binary files a/docs/source/_static/example_pa_01.pdf and b/docs/source/_static/example_pa_01.pdf differ diff --git a/docs/source/_static/example_pa_02.pdf b/docs/source/_static/example_pa_02.pdf index ec941371..82426456 100644 Binary files a/docs/source/_static/example_pa_02.pdf and b/docs/source/_static/example_pa_02.pdf differ diff --git a/docs/source/_static/example_pa_03.pdf b/docs/source/_static/example_pa_03.pdf index 379b27d2..d0325285 100644 Binary files a/docs/source/_static/example_pa_03.pdf and b/docs/source/_static/example_pa_03.pdf differ diff --git a/docs/source/_static/example_pa_04.pdf b/docs/source/_static/example_pa_04.pdf index 8b24f3fa..9f37f661 100644 Binary files a/docs/source/_static/example_pa_04.pdf and b/docs/source/_static/example_pa_04.pdf differ diff --git a/docs/source/_static/merge_ologram_stats_01.pdf b/docs/source/_static/merge_ologram_stats_01.pdf index 5d5e6e6e..ff9a73c1 100644 Binary files a/docs/source/_static/merge_ologram_stats_01.pdf and b/docs/source/_static/merge_ologram_stats_01.pdf differ diff --git a/docs/source/_static/treeified.pdf b/docs/source/_static/treeified.pdf index dea2bfd4..bc89d307 100644 Binary files a/docs/source/_static/treeified.pdf and b/docs/source/_static/treeified.pdf differ diff --git a/docs/source/conf.py b/docs/source/conf.py index 35dd7f5e..71b6fd17 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -58,10 +58,10 @@ # built documents. # # The short X.Y version. -version = u'1.2.1' +version = u'1.2.3' # The full version, including alpha/beta/rc tags. -release = u'1.2.1' +release = u'1.2.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/example_01.png b/docs/source/example_01.png index 9d6fe4a4..0104648b 100644 Binary files a/docs/source/example_01.png and b/docs/source/example_01.png differ diff --git a/docs/source/example_01b.png b/docs/source/example_01b.png index cf809c40..1c5660bb 100644 Binary files a/docs/source/example_01b.png and b/docs/source/example_01b.png differ diff --git a/docs/source/example_02.png b/docs/source/example_02.png index 8439c50a..f15653b3 100644 Binary files a/docs/source/example_02.png and b/docs/source/example_02.png differ diff --git a/docs/source/example_05.png b/docs/source/example_05.png index 2937ac10..4702bf91 100644 Binary files a/docs/source/example_05.png and b/docs/source/example_05.png differ diff --git a/docs/source/example_06.png b/docs/source/example_06.png index 79f3e737..713d6c11 100644 Binary files a/docs/source/example_06.png and b/docs/source/example_06.png differ diff --git a/docs/source/example_06b.png b/docs/source/example_06b.png index 3cbeb7c0..ecfe0e0b 100644 Binary files a/docs/source/example_06b.png and b/docs/source/example_06b.png differ diff --git a/docs/source/example_07.png b/docs/source/example_07.png index aaefb731..6c312dd1 100644 Binary files a/docs/source/example_07.png and b/docs/source/example_07.png differ diff --git a/docs/source/example_08.png b/docs/source/example_08.png index 861d1066..952b4eaf 100644 Binary files a/docs/source/example_08.png and b/docs/source/example_08.png differ diff --git a/docs/source/example_13.png b/docs/source/example_13.png index 1fde3047..c5c7d0c2 100644 Binary files a/docs/source/example_13.png and b/docs/source/example_13.png differ diff --git a/docs/source/example_pa_01.pdf b/docs/source/example_pa_01.pdf index 54ea2927..bd4f566c 100644 Binary files a/docs/source/example_pa_01.pdf and b/docs/source/example_pa_01.pdf differ diff --git a/docs/source/example_pa_02.pdf b/docs/source/example_pa_02.pdf index ec941371..82426456 100644 Binary files a/docs/source/example_pa_02.pdf and b/docs/source/example_pa_02.pdf differ diff --git a/docs/source/example_pa_03.pdf b/docs/source/example_pa_03.pdf index 379b27d2..d0325285 100644 Binary files a/docs/source/example_pa_03.pdf and b/docs/source/example_pa_03.pdf differ diff --git a/docs/source/example_pa_04.pdf b/docs/source/example_pa_04.pdf index 8b24f3fa..9f37f661 100644 Binary files a/docs/source/example_pa_04.pdf and b/docs/source/example_pa_04.pdf differ diff --git a/docs/source/merge_ologram_stats_01.pdf b/docs/source/merge_ologram_stats_01.pdf index 5d5e6e6e..ff9a73c1 100644 Binary files a/docs/source/merge_ologram_stats_01.pdf and b/docs/source/merge_ologram_stats_01.pdf differ diff --git a/docs/source/treeified.pdf b/docs/source/treeified.pdf index dea2bfd4..bc89d307 100644 Binary files a/docs/source/treeified.pdf and b/docs/source/treeified.pdf differ diff --git a/docs/treeified.pdf b/docs/treeified.pdf index 465cbd87..dea2bfd4 100644 Binary files a/docs/treeified.pdf and b/docs/treeified.pdf differ diff --git a/manylinux/build_wheels.sh b/manylinux/build_wheels.sh index 26703e49..9947349b 100755 --- a/manylinux/build_wheels.sh +++ b/manylinux/build_wheels.sh @@ -8,7 +8,7 @@ yum install bzip2-devel -y yum install xz xz-devel -y # Compile wheels -for PYBIN in $(ls --color=none -d1 /opt/python/*/bin| grep -P "(36)|(37)"); do +for PYBIN in $(ls --color=none -d1 /opt/python/*/bin| grep -P "(36)|(37)|(38)"); do echo "${PYBIN}" echo "" "${PYBIN}/pip" install -U pip diff --git a/pygtftk/version.py b/pygtftk/version.py index 3800222f..0196e4c9 100644 --- a/pygtftk/version.py +++ b/pygtftk/version.py @@ -1,2 +1,2 @@ -__base_version__='1.2.1' -__version__='1.2.1' +__base_version__='1.2.3' +__version__='1.2.3' diff --git a/requirements.txt b/requirements.txt index 4da4e8f0..996c0a3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,26 @@ -scipy ==1.2.0 -numpy >=1.15.3 -pybedtools >=0.7.8 -pyyaml >=3.12 -requests >=2.13.0 -cffi >=1.10.0 -pyparsing >=2.2.0 -biopython >=1.78 -matplotlib >=3.0.0 -plotnine >=0.5.1 GitPython >=2.1.8 -cloudpickle >=0.4.0 -ftputil >=3.3.1 -pandas >=0.23.3,!=1.0.0,!=1.0.0rc0 -pybigwig >=0.3.12 -setuptools -cython >=0.29.6 -mpmath >=1.1.0 -scikit-learn >=0.21.2 +biopython >=1.69; python_version < '3.8' +biopython >=1.78; python_version > '3.7' +cffi >=1.10.0 +cloudpickle +cython >= 0.29.21; python_version > '3.7' +cython >=0.29.6; python_version < '3.8' +ftputil >= 4.0.0; python_version > '3.7' +ftputil >=3.3.1; python_version < '3.8' graphviz +matplotlib >= 3.3.1 +mpmath >= 1.1.0 +numpy >= 1.19.1; python_version > '3.7' +numpy >=1.15.3; python_version < '3.8' +pandas >= 1.1.2 +plotnine >= 0.7.1 +pybedtools >= 0.8.1 +pybigwig >= 0.3.17 +pyparsing >=2.2.0 +pyyaml +requests >=2.23.0 +scikit-learn >= 0.23.2 +scipy ==1.2.0; python_version < '3.8' +scipy >= 1.5.2; python_version > '3.7' seaborn +setuptools diff --git a/setup.cfg b/setup.cfg index a88dacf8..6b248433 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = pygtftk -version = 1.2.1 +version = 1.2.3 author = D. Puthier and F. Lopez author-email = denis.puthier@univ-amu.fr diff --git a/setup.py b/setup.py index 5b9db081..6dd858de 100644 --- a/setup.py +++ b/setup.py @@ -11,19 +11,19 @@ # A set of builtin packages # ------------------------------------------------------------------------- -import glob -import hashlib -import os -import platform import re import shutil import subprocess import sys from subprocess import DEVNULL -from tempfile import NamedTemporaryFile +import glob +import hashlib import numpy as np +import os +import platform from Cython.Distutils import build_ext +from tempfile import NamedTemporaryFile # ------------------------------------------------------------------------- # Python compiler version @@ -54,7 +54,7 @@ __url_source__ = 'https://github.com/dputhier/pygtftk' __url_tracker__ = 'https://github.com/dputhier/pygtftk' __keywords__ = 'genomics bioinformatics GTF BED' -__python_requires__ = '>=3.6,<3.8' +__python_requires__ = '>=3.6,<=3.8' __classifiers__ = ("License :: OSI Approved :: MIT License", "Operating System :: MacOS", "Operating System :: POSIX :: Linux", @@ -62,6 +62,7 @@ "Environment :: Console", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Intended Audience :: Science/Research", "Natural Language :: English", "Topic :: Scientific/Engineering :: Bio-Informatics", @@ -165,11 +166,9 @@ platform.system() - extra_comp_cython = ['-W', '-O3'] extra_link_cython = [] - # Use OpenMP only on Linux, as clang by default does not support it on OSX # TODO Make it a parameter if platform.system() == 'Darwin': @@ -178,14 +177,19 @@ extra_comp_cython += ['-fopenmp'] extra_link_cython += ['-fopenmp'] - - # Avoid Cython warning about NumPy API deprecation upon installation if platform.system() == 'Darwin': extra_comp_cython += ['-Wno-#warnings'] if platform.system() == 'Linux': extra_comp_cython += ['-Wno-cpp'] +# Avoid error "fatal error: 'complex' file not found" under OSX (Python 3.6) + +if platform.system() == 'Darwin': + if platform.python_version_tuple()[0:2] == ('3', '6'): + extra_comp_cython += ["-stdlib=libc++"] + extra_link_cython += ["-stdlib=libc++"] + # NOTE : the separation in several different modules was needed to make it # work on MacOSX for some unfathomable reason. @@ -201,13 +205,15 @@ cython_ologram_3 = Extension(name='pygtftk.stats.intersect.read_bed.read_bed_as_list', sources=["pygtftk/stats/intersect/read_bed/read_bed_as_list.pyx", - "pygtftk/stats/intersect/read_bed/exclude.cpp"], # Include custom Cpp code + "pygtftk/stats/intersect/read_bed/exclude.cpp"], # Include custom Cpp code extra_compile_args=extra_comp_cython, extra_link_args=extra_link_cython, include_dirs=[np.get_include()], language='c++') cython_ologram_4 = Extension(name='pygtftk.stats.multiprocessing.multiproc', - sources=["pygtftk/stats/multiprocessing/multiproc.pyx", "pygtftk/stats/multiprocessing/multiproc_structs.pxd", "pygtftk/stats/multiprocessing/multiproc.pxd"], + sources=["pygtftk/stats/multiprocessing/multiproc.pyx", + "pygtftk/stats/multiprocessing/multiproc_structs.pxd", + "pygtftk/stats/multiprocessing/multiproc.pxd"], extra_compile_args=extra_comp_cython, extra_link_args=extra_link_cython, language='c') @@ -253,6 +259,8 @@ # Declare the setup function # ---------------------------------------------------------------------- +with open('requirements.txt') as f: + pack_required = f.read().splitlines() setup(name="pygtftk", include_dirs=[np.get_include()], @@ -341,34 +349,9 @@ 'sphinx_bootstrap_theme >=0.4.9', 'sphinxcontrib-googleanalytics'], 'gffutils': ['gffutils']}, - install_requires=['nose', - 'pyyaml >=3.12', - 'cloudpickle >=0.5.6', - 'ftputil >=3.3.1', - 'pybedtools >=0.7.8', - 'pandas >=0.23.3, !=1.0.0, !=1.0.0rc0', - 'requests >=2.13.0', - 'pyBigWig >=0.3.12', - 'cffi >=1.10.0', - 'biopython >=1.69', - 'pyparsing >=2.2.0', - 'GitPython >=2.1.8', - 'pyparsing', - 'matplotlib >=3.0.0', - 'plotnine >=0.5.1', - 'setuptools', - 'cython >=0.29.6', - 'mpmath >=1.1.0', - 'scikit-learn >=0.21.2', - 'graphviz', - 'seaborn' - ], + install_requires=pack_required, ext_modules=[lib_pygtftk] + [cython_ologram_1, cython_ologram_2, cython_ologram_3, cython_ologram_4]) - - - - # ---------------------------------------------------------------------- # Update gtftk config directory # ---------------------------------------------------------------------- diff --git a/tools/fasta/unfold_fasta.py b/tools/fasta/unfold_fasta.py index 29253cac..a78994a1 100644 --- a/tools/fasta/unfold_fasta.py +++ b/tools/fasta/unfold_fasta.py @@ -5,7 +5,7 @@ from Bio import SeqIO -__DESC__ = '''Take a FASTA as input and produced a new FASTA file with record +__DESC__ = '''Take a FASTA as input and produced a new FASTA file with record ordered as in --id-file.'''