HerwigLab · YalanBi · May 12, 2025 · May 7, 2025 · May 7, 2025 · May 7, 2025
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -1,13 +1,21 @@
-# File: .readthedocs.yaml
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 
 version: 2
 
 # Build from the docs/ directory with Sphinx
-sphinx:
-  configuration: docs/conf.py
+build:
+  os: "ubuntu-24.04"
+  tools:
+    nodejs: "20"
+    python: "3.12"
 
 # Explicitly set the version of Python
 python:
-  version: 3.12
   install:
-    - requirements: docs/requirements.txt
+    - requirements: docs/requirements.txt
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+  fail_on_warning: False
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -56,9 +56,9 @@
 * new feature: filter_transcripts function for genes
 * changed SUBSTANTIAL filter to 1% of the genes total (was 5%)
 * coordination test:
-    * changed argument and column naming, to make it consistent with other test results
-    * added conditional delta PSI effect size measure
-    * order of events is now according to gene strand: A upstream of B
+  * changed argument and column naming, to make it consistent with other test results
+  * added conditional delta PSI effect size measure
+  * order of events is now according to gene strand: A upstream of B
 
 ## [0.3.0]
 
@@ -76,7 +76,6 @@
 * added function to import samples from csv/gtf to import transcriptome reconstruction / quantification from other tools.
 * dropped requirement for gtf files to be tabix indexed.
 
-
 ## [0.2.10]
 
 * fixed get_overlap - important for correct assignment of mono exonic genes to reference
@@ -106,22 +105,20 @@
 * added colors parameter to plotting functions
 * various fixes of command line script run_isotools.py
 
-
 ## [0.2.7]
 
 * added command line script run_isotools.py
 * added test data for unit tests
 
-
 ## [0.2.6]
 
 * Added unit tests
 * Fixed bug in novel splicing subcategory assignment
 * new feature: rarefaction analysis
 * Changed filtering: expressions get evaluated during iteration
-    * Predefined filters are added automatically
-    * Add / remove filters one by one
-    * added optional progress bar to iter_genes/transcripts
+  * Predefined filters are added automatically
+  * Add / remove filters one by one
+  * added optional progress bar to iter_genes/transcripts
 
 ## [0.2.5]
 
@@ -174,7 +171,7 @@
 * fix: property of transcripts included {sample_name:0}
 * save the TSS and PAS positions
 * New: use_satag parameter for add_sample_from_bam
-* Change: use median TSS/PAS (of all reads with same splice pattern) as transcript start/end (e.g. exons[0][0]/exons[-1][1])
+* Change: use median TSS/PAS (of all reads with same splice pattern) as transcript start/end (e.g. exons\[0\]\[0\]/exons\[-1\]\[1\])
 * Fix: Novel exon skipping annotation now finds all exonic regions that are skipped.
 * change: Default filter of FRAGMENTS now only tags reads that do not use a reference TSS or PAS
 
@@ -206,8 +203,8 @@
 
 * Change: refactored SpliceGraph to SegmentGraph to better comply with common terms in literature
 * New: added a basic implementation of an actual SpliceGraph (as commonly defined in literature)
-    * based on sorted dict
-    * not used so far, but maybe useful in importing the long read bam files since it can be extended easily
+  * based on sorted dict
+  * not used so far, but maybe useful in importing the long read bam files since it can be extended easily
 * New: added decorators "experimental" and "deprecated" to mark unsafe functions
 * Change: in differential splicing changed the alternative fraction, to match the common PSI (% spliced in) definition
 * Change: narrowed definition of mutually exclusive exons: the alternatives now need to to feature exactly one ME exon and rejoin at node C

diff --git a/README.md b/README.md
@@ -14,16 +14,19 @@ Key features:
 * Import of LRTS bam files (aligned full length transcripts).
 * Import of reference annotation in gff3/gtf format.
 * Computation of quality control metrics.
-* Annotation and classification of novel transcripts with biologically motivated classification scheme.
+* Annotation and classification of novel transcripts using the biologically motivated classification scheme SQANTI.
+* Evaluation of the coding potential of isoforms.
 * Definition of alternative splicing events based on segment graphs.
 * Detection of differential alternative splicing between samples and groups of samples.
-* Data visualization.
+* Gene modelling based on structural and expression variability.
+* Support for proteogenomic approaches at the interface of transcriptomics and proteomics.
+* Various data visualizations.
 
-## documentation:
+## Documentation
 
 The documentation, including tutorials with real-world case studies and the complete API reference is available at [readthedocs](https://isotools.readthedocs.io/en/latest/ "documentation")
 
-## installation:
+## Installation
 
 Isotools is available from PyPI, and can be installed with the pip command:
 
@@ -39,7 +42,7 @@ cd isotools
 python3 -m pip install .
 ```
 
-## usage:
+## Usage
 
 This code block demonstrates the basic file import with IsoTools.
 It uses a small test data set contained in this repository, and should run within seconds. The paths are relative to the root of the repository.
@@ -50,19 +53,19 @@ from isotools import Transcriptome
 import logging
 logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
 # import the reference annotation
-isoseq = Transcriptome.from_reference('tests/data/example.gff.gz')
-# import the isoseq data
+transcriptome = Transcriptome.from_reference('tests/data/example.gff.gz')
+# import the transcriptome data
 for sa in ('CTL', 'VPA'):
-    isoseq.add_sample_from_bam(f'../tests/data/example_1_{sa}.bam', sample_name=sa, group=sa, platform='SequelII')
+    transcriptome.add_sample_from_bam(f'../tests/data/example_1_{sa}.bam', sample_name=sa, group=sa, platform='SequelII')
 # save the imported file as pkl file (for faster import)
-isoseq.add_qc_metrics('../tests/data/example.fa')
-isoseq.save('../tests/data/example_1_isotools.pkl')
+transcriptome.add_qc_metrics('../tests/data/example.fa')
+transcriptome.save('../tests/data/example_1_isotools.pkl')
 ```
 
-## Citation and feedback:
+## Citation and feedback
 
 * If you run into any issues, please use the [github issues report feature](https://github.com/HerwigLab/IsoTools2/issues).
 * For general feedback, please write us an email to [[email protected]](mailto:[email protected]) and [[email protected]](mailto:[email protected]).
 * If you use IsoTools in your publication, please cite the following paper in addition to this repository:
-  * Lienhard, Matthias et al. “IsoTools: a flexible workflow for long-read transcriptome sequencing analysis.” Bioinformatics (Oxford, England) vol. 39,6 (2023): btad364. [doi:10.1093/bioinformatics/btad364](https://doi.org/10.1093/bioinformatics/btad364)
-  * Bi, Yalan et al. “IsoTools 2.0: Software for Comprehensive Analysis of Long-read Transcriptome Sequencing Data.” Journal of molecular biology, 169049. 26 Feb. 2025, [doi:10.1016/j.jmb.2025.169049](https://doi.org/10.1016/j.jmb.2025.169049)
+  * Lienhard, Matthias et al. “**IsoTools: a flexible workflow for long-read transcriptome sequencing analysis**.” Bioinformatics (Oxford, England) vol. 39,6 (2023): btad364. [doi:10.1093/bioinformatics/btad364](https://doi.org/10.1093/bioinformatics/btad364)
+  * Bi, Yalan et al. “**IsoTools 2.0: Software for Comprehensive Analysis of Long-read Transcriptome Sequencing Data**.” Journal of molecular biology, 169049. 26 Feb. 2025, [doi:10.1016/j.jmb.2025.169049](https://doi.org/10.1016/j.jmb.2025.169049)
diff --git a/docs/conf.py b/docs/conf.py
@@ -25,7 +25,7 @@
 
 project = "isotools"
 copyright = "2021, Matthias Lienhard"
-author = "Matthias Lienhard"
+author = "Matthias Lienhard, Yalan Bi"
 
 # The short X.Y version
 version = ".".join(__version__.split(".")[:2])

diff --git a/docs/quickstart.rst b/docs/quickstart.rst
@@ -7,10 +7,13 @@ Key features:
 * Import of LRTS bam files (aligned full length transcripts).
 * Import of reference annotation in gff3/gtf format.
 * Computation of quality control metrics.
-* Annotation and classification of novel transcripts with biologically motivated classification scheme.
+* Annotation and classification of novel transcripts using the biologically motivated classification scheme SQANTI.
+* Evaluation of the coding potential of isoforms.
 * Definition of alternative splicing events based on segment graphs.
 * Detection of differential alternative splicing between samples and groups of samples.
-* Data visualization.
+* Gene modelling based on structural and expression variability.
+* Support for proteogenomic approaches at the interface of transcriptomics and proteomics.
+* Various data visualizations.
 
 .. image:: notebooks/Isotools_overview_slide.png
   :width: 800
@@ -26,25 +29,28 @@ The package can be installed with pip:
 
 Usage
 -----
-This code block demonstrates the basic file import with isoseq. For a more comprehensive real world example see the tutorial.
+This code block demonstrates the basic file import with IsoTools.
+For more comprehensive real world examples see the [tutorials](https://isotools.readthedocs.io/en/latest/tutorials.html "readthedocs").
 
 .. code-block:: python
 
     from isotools import Transcriptome
     import logging
     logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
-    isoseq=Transcriptome.from_reference('reference_file.gff3.gz')
+    transcriptome=Transcriptome.from_reference('reference_file.gff3.gz')
     isoseq_bam_fn={'sample1':'isoseq_fn_s1.bam', 'sample2':'isoseq_fn_s2.bam'}
     groups={'sample1':'control', 'sample2':'treatment'}
     for sa,bam in isoseq_bam_fn.items():
-        isoseq.add_sample_from_bam(bam, sample_name=sa, group=groups[sa])
-    isoseq.add_qc_metrics('genome.fa')
-    isoseq.make_index()
-    isoseq.add_filter()
-    isoseq.save('example_isotools.pkl')
+        transcriptome.add_sample_from_bam(bam, sample_name=sa, group=groups[sa])
+    transcriptome.add_qc_metrics('genome.fa')
+    transcriptome.make_index()
+    transcriptome.add_filter()
+    transcriptome.save('example_isotools.pkl')
 
 Citation and feedback
 ---------------------
-* If you run into any issues, please use the `github issues <https://github.com/MatthiasLienhard/isotools/issues>`_ report feature.
-* For feedback, please write me an email to `[email protected] <mailto:[email protected]>`_.
-* If you use isotools in your publication, please cite the following [paper](https://doi.org/10.1093/bioinformatics/btad364): Lienhard et al, Bioinformatics, 2023: IsoTools: a flexible workflow for long-read transcriptome sequencing analysis
+* If you run into any issues, please use the `github issues <https://github.com/HerwigLab/IsoTools2/issues>`_ report feature.
+* For feedback, please write me an email to `[email protected] <mailto:[email protected]>`_ and `[email protected] <mailto:[email protected]>`_.
+* If you use IsoTools in your publication, please cite the following paper in addition to this repository:
+  * Lienhard, Matthias et al. “**IsoTools: a flexible workflow for long-read transcriptome sequencing analysis**.” Bioinformatics (Oxford, England) vol. 39,6 (2023): btad364. [doi:10.1093/bioinformatics/btad364](https://doi.org/10.1093/bioinformatics/btad364)
+  * Bi, Yalan et al. “**IsoTools 2.0: Software for Comprehensive Analysis of Long-read Transcriptome Sequencing Data**.” Journal of molecular biology, 169049. 26 Feb. 2025, [doi:10.1016/j.jmb.2025.169049](https://doi.org/10.1016/j.jmb.2025.169049)
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,13 +1,13 @@
 # File: docs/requirements.txt
 
 # Defining the exact version will make sure things don't break
-sphinx>=4.2
-sphinx_rtd_theme==0.5.2
-sphinx-argparse==0.3.2
-readthedocs-sphinx-search==0.1.0
-myst_parser==0.15.1
-nbsphinx==0.8.6
-Jinja2<3.1
-IPython==7.27.0
-isotools
-
+sphinx==8.1.3
+sphinx_rtd_theme==3.0.2
+sphinx-argparse==0.5.2
+sphinx-notfound-page==1.1.0
+readthedocs-sphinx-search==0.3.2
+myst_parser==4.0.1
+nbsphinx==0.9.7
+Jinja2==3.1.6
+IPython==9.2.0
+isotools
diff --git a/notebooks/README.md b/notebooks/README.md
@@ -1,4 +1,5 @@
 # Notebooks
-This folder contains notebooks to replicate the results from the paper 
 
-IsoTools: IsoTools: a python toolbox for long-read transcriptome sequencing (in preparation)
+This folder contains notebooks to replicate the results described in the [paper](https://doi.org/10.1093/bioinformatics/btad364)
+
+Lienhard, Matthias et al. “**IsoTools: a flexible workflow for long-read transcriptome sequencing analysis**.” Bioinformatics (Oxford, England) vol. 39,6 (2023): btad364. [doi:10.1093/bioinformatics/btad364](https://doi.org/10.1093/bioinformatics/btad364)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,53 @@
 [build-system]
-requires = ["setuptools>=46.4", "wheel"]
+requires = ["setuptools >= 77.0.3", "wheel"]
 build-backend = "setuptools.build_meta"
 
+[project]
+name = "isotools"
+authors = [
+  { name="Matthias Lienhard", email="[email protected]" },
+  { name="Yalan Bi", email="[email protected]" },
+]
+description = "Framework for the analysis of long read transcriptome sequencing data"
+readme = "README.md"
+requires-python = ">=3.10"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Operating System :: MacOS",
+    "Operating System :: POSIX :: Linux",
+]
+license = "MIT"
+license-files = ["LICEN[CS]E*"]
+dynamic = ["version"]
+dependencies = [
+    "numpy",
+    "pandas",
+    "tqdm",
+    "intervaltree",
+    "matplotlib",
+    "seaborn",
+    "biopython",
+    "pysam",
+    "umap-learn",
+    "scikit-learn",
+    "scipy",
+    "statsmodels",
+    "pyhmmer",
+    "requests",
+    "CPAT",
+    "python-ternary",
+]
+
+[project.urls]
+Homepage = "https://github.com/HerwigLab/IsoTools2"
+Documentation = "https://isotools.readthedocs.io/"
+Repository = "https://github.com/HerwigLab/IsoTools2.git"
+Issues = "https://github.com/HerwigLab/IsoTools2/issues"
+Changelog = "https://github.com/HerwigLab/IsoTools2/blob/master/CHANGELOG.md"
+
+[tool.setuptools.dynamic]
+version = {file = ["VERSION.txt"]}
+
 [tool.pytest.ini_options]
 addopts = "--cov=isotools"
 testpaths = [

diff --git a/requirements.txt b/requirements.txt
@@ -10,8 +10,7 @@ umap-learn
 scikit-learn
 scipy
 statsmodels
-importlib-metadata
 pyhmmer
 requests
 CPAT
-ternary
+python-ternary
diff --git a/setup.cfg b/setup.cfg
@@ -13,7 +13,8 @@ project_urls =
 classifiers =
     Programming Language :: Python :: 3
     License :: OSI Approved :: MIT License
-    Operating System :: OS Independent
+    Operating System :: MacOS :: MacOS X
+    Operating System :: POSIX :: Linux
 
 [options]
 package_dir =
@@ -36,7 +37,7 @@ install_requires =
     pyhmmer
     requests
     CPAT
-    ternary
+    python-ternary
 
 [options.packages.find]
 where = src

diff --git a/src/isotools/_transcriptome_io.py b/src/isotools/_transcriptome_io.py
@@ -1757,13 +1757,16 @@ def _read_gff_file(file_name, chromosomes, progress_bar=True):
     cds_start = dict()
     cds_stop = dict()
     # takes quite some time... add a progress bar?
-    with tqdm(
-        total=path.getsize(file_name),
-        unit_scale=True,
-        unit="B",
-        unit_divisor=1024,
-        disable=not progress_bar,
-    ) as pbar, TabixFile(file_name) as gff:
+    with (
+        tqdm(
+            total=path.getsize(file_name),
+            unit_scale=True,
+            unit="B",
+            unit_divisor=1024,
+            disable=not progress_bar,
+        ) as pbar,
+        TabixFile(file_name) as gff,
+    ):
         chrom_ids = get_gff_chrom_dict(gff, chromosomes)
         for line in gff.fetch():
             file_pos = (