From 09a0c8092f3ab77a80fbebc59ce4e5b92d6d44b0 Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla <jayaram.kancherla@gmail.com>
Date: Fri, 20 Dec 2024 11:40:39 -0800
Subject: [PATCH] chore: remove Python 3.8 support (#133)

---
 .github/workflows/pypi-publish.yml     |   8 +-
 .github/workflows/pypi-test.yml        |   6 +-
 .pre-commit-config.yaml                |  13 +-
 CHANGELOG.md                           |   5 +
 pyproject.toml                         |   4 +
 setup.cfg                              |   2 +-
 src/genomicranges/GenomicRanges.py     | 223 ++++++-------------------
 src/genomicranges/GenomicRangesList.py | 132 +++++++++------
 src/genomicranges/SeqInfo.py           |  12 +-
 src/genomicranges/io/gtf.py            |   4 +-
 src/genomicranges/io/ucsc.py           |   4 +-
 src/genomicranges/utils.py             |  15 +-
 12 files changed, 170 insertions(+), 258 deletions(-)

diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
index 7b591a2..030cd10 100644
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -13,11 +13,11 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v2
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v5
       with:
-        python-version: 3.9
+        python-version: 3.11
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml
index 9dc019a..3766e8c 100644
--- a/.github/workflows/pypi-test.yml
+++ b/.github/workflows/pypi-test.yml
@@ -15,13 +15,13 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ]
+        python-version: [ '3.9', '3.10', '3.11', '3.12' ]
 
     name: Python ${{ matrix.python-version }}
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Setup Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
         cache: 'pip'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index eed031a..e60a5f4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,18 +18,18 @@ repos:
     args: ['--fix=auto']  # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows
 
 # - repo: https://github.com/PyCQA/docformatter
-#   rev: v1.7.5
+#   rev: master
 #   hooks:
 #     - id: docformatter
 #       additional_dependencies: [tomli]
 #       args: [--in-place, --wrap-descriptions=120, --wrap-summaries=120]
 #       # --config, ./pyproject.toml
 
-- repo: https://github.com/psf/black
-  rev: 24.8.0
-  hooks:
-  - id: black
-    language_version: python3
+# - repo: https://github.com/psf/black
+#   rev: 24.8.0
+#   hooks:
+#   - id: black
+#     language_version: python3
 
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
@@ -37,6 +37,7 @@ repos:
   hooks:
     - id: ruff
       args: [--fix, --exit-non-zero-on-fix]
+    - id: ruff-format
 
 ## If like to embrace black styles even in the docs:
 # - repo: https://github.com/asottile/blacken-docs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f259e9..f98a970 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## Version 0.5.0
+
+- chore: Remove Python 3.8 (EOL)
+- precommit: Replace docformatter with ruff's formatter
+
 ## Version 0.4.32 - 0.4.33
 
 - Bump IRanges package version to fix coercion issues to pandas.
diff --git a/pyproject.toml b/pyproject.toml
index a7cea75..00aa968 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,10 @@ extend-ignore = ["F821"]
 [tool.ruff.pydocstyle]
 convention = "google"
 
+[tool.ruff.format]
+docstring-code-format = true
+docstring-code-line-length = 20
+
 [tool.ruff.per-file-ignores]
 "__init__.py" = ["E402", "F401"]
 
diff --git a/setup.cfg b/setup.cfg
index 64f5187..2d3326e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -41,7 +41,7 @@ package_dir =
     =src
 
 # Require a min/specific Python version (comma-separated conditions)
-python_requires = >=3.8
+python_requires = >=3.9
 
 # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
 # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py
index b770afe..7d0101d 100644
--- a/src/genomicranges/GenomicRanges.py
+++ b/src/genomicranges/GenomicRanges.py
@@ -47,9 +47,7 @@ def _validate_seqnames(seqnames, seqinfo, num_ranges):
 
     _l = len(seqinfo)
     if (seqnames > _l).any():
-        raise ValueError(
-            "'seqnames' contains sequence name not represented in 'seqinfo'."
-        )
+        raise ValueError("'seqnames' contains sequence name not represented in 'seqinfo'.")
 
 
 def _validate_ranges(ranges, num_ranges):
@@ -107,11 +105,7 @@ def __iter__(self):
 
     def __next__(self):
         if self._current_index < len(self._gr):
-            iter_row_index = (
-                self._gr.names[self._current_index]
-                if self._gr.names is not None
-                else None
-            )
+            iter_row_index = self._gr.names[self._current_index] if self._gr.names is not None else None
 
             iter_slice = self._gr[self._current_index]
             self._current_index += 1
@@ -203,14 +197,10 @@ def __init__(
             _num_ranges = _guess_num_ranges(self._seqnames, self._ranges)
             _validate_ranges(self._ranges, _num_ranges)
             _validate_seqnames(self._seqnames, self._seqinfo, _num_ranges)
-            _validate_optional_attrs(
-                self._strand, self._mcols, self._names, _num_ranges
-            )
+            _validate_optional_attrs(self._strand, self._mcols, self._names, _num_ranges)
 
     def _build_reverse_seqindex(self, seqinfo: SeqInfo):
-        self._reverse_seqindex = ut.reverse_index.build_reverse_index(
-            seqinfo.get_seqnames()
-        )
+        self._reverse_seqindex = ut.reverse_index.build_reverse_index(seqinfo.get_seqnames())
 
     def _remove_reverse_seqindex(self):
         del self._reverse_seqindex
@@ -400,9 +390,7 @@ def __str__(self) -> str:
                     data = self._mcols.column(col)
                     showed = ut.show_as_cell(data, indices)
                     header = [col, "<" + ut.print_type(data) + ">"]
-                    showed = ut.truncate_strings(
-                        showed, width=max(40, len(header[0]), len(header[1]))
-                    )
+                    showed = ut.truncate_strings(showed, width=max(40, len(header[0]), len(header[1])))
                     if insert_ellipsis:
                         showed = showed[:3] + ["..."] + showed[3:]
                     columns.append(header + showed)
@@ -469,9 +457,7 @@ def get_seqnames(
         else:
             raise ValueError("Argument 'as_type' must be 'factor' or 'list'.")
 
-    def set_seqnames(
-        self, seqnames: Union[Sequence[str], np.ndarray], in_place: bool = False
-    ) -> "GenomicRanges":
+    def set_seqnames(self, seqnames: Union[Sequence[str], np.ndarray], in_place: bool = False) -> "GenomicRanges":
         """Set new sequence names.
 
         Args:
@@ -490,9 +476,7 @@ def set_seqnames(
         _validate_seqnames(seqnames, len(self))
 
         if not isinstance(seqnames, np.ndarray):
-            seqnames = np.asarray(
-                [self._seqinfo.get_seqnames().index(x) for x in list(seqnames)]
-            )
+            seqnames = np.asarray([self._seqinfo.get_seqnames().index(x) for x in list(seqnames)])
 
         output = self._define_output(in_place)
         output._seqnames = seqnames
@@ -873,9 +857,7 @@ def set_metadata(self, metadata: dict, in_place: bool = False) -> "GenomicRanges
             or as a reference to the (in-place-modified) original.
         """
         if not isinstance(metadata, dict):
-            raise TypeError(
-                f"`metadata` must be a dictionary, provided {type(metadata)}."
-            )
+            raise TypeError(f"`metadata` must be a dictionary, provided {type(metadata)}.")
         output = self._define_output(in_place)
         output._metadata = metadata
         return output
@@ -1127,9 +1109,7 @@ def from_pandas(cls, input: "pandas.DataFrame") -> "GenomicRanges":
         if input.index is not None:
             names = [str(i) for i in input.index.to_list()]
 
-        return cls(
-            ranges=ranges, seqnames=seqnames, strand=strand, names=names, mcols=mcols
-        )
+        return cls(ranges=ranges, seqnames=seqnames, strand=strand, names=names, mcols=mcols)
 
     ################################
     ######>> polars interop <<######
@@ -1144,9 +1124,7 @@ def to_polars(self) -> "polars.DataFrame":
         import polars as pl
 
         _rdf = self._ranges.to_polars()
-        _rdf = _rdf.with_columns(
-            seqnames=self.get_seqnames(), strand=self.get_strand(as_type="list")
-        )
+        _rdf = _rdf.with_columns(seqnames=self.get_seqnames(), strand=self.get_strand(as_type="list"))
 
         if self._names is not None:
             _rdf = _rdf.with_columns(rownames=self._names)
@@ -1210,9 +1188,7 @@ def from_polars(cls, input: "polars.DataFrame") -> "GenomicRanges":
 
         names = None
 
-        return cls(
-            ranges=ranges, seqnames=seqnames, strand=strand, names=names, mcols=mcols
-        )
+        return cls(ranges=ranges, seqnames=seqnames, strand=strand, names=names, mcols=mcols)
 
     #####################################
     ######>> intra-range methods <<######
@@ -1282,9 +1258,7 @@ def flank(
         # figure out which position to pin, start or end?
         start_flags = np.repeat(start, len(all_strands))
         if not ignore_strand:
-            start_flags = [
-                start != (all_strands[i] == -1) for i in range(len(all_strands))
-            ]
+            start_flags = [start != (all_strands[i] == -1) for i in range(len(all_strands))]
 
         new_starts = []
         new_widths = []
@@ -1295,9 +1269,7 @@ def flank(
             sf = start_flags[idx]
             tstart = 0
             if both is True:
-                tstart = (
-                    all_starts[idx] - abs(width) if sf else all_ends[idx] - abs(width)
-                )
+                tstart = all_starts[idx] - abs(width) if sf else all_ends[idx] - abs(width)
             else:
                 if width >= 0:
                     tstart = all_starts[idx] - abs(width) if sf else all_ends[idx]
@@ -1356,9 +1328,7 @@ def resize(
         output._ranges = self._ranges.resize(width=width, fix=fix)
         return output
 
-    def shift(
-        self, shift: Union[int, List[int], np.ndarray] = 0, in_place: bool = False
-    ) -> "GenomicRanges":
+    def shift(self, shift: Union[int, List[int], np.ndarray] = 0, in_place: bool = False) -> "GenomicRanges":
         """Shift all intervals.
 
         Args:
@@ -1382,9 +1352,7 @@ def shift(
         output._ranges = self._ranges.shift(shift=shift)
         return output
 
-    def promoters(
-        self, upstream: int = 2000, downstream: int = 200, in_place: bool = False
-    ) -> "GenomicRanges":
+    def promoters(self, upstream: int = 2000, downstream: int = 200, in_place: bool = False) -> "GenomicRanges":
         """Extend intervals to promoter regions.
 
         Generates promoter ranges relative to the transcription start site (TSS),
@@ -1419,21 +1387,13 @@ def promoters(
 
         new_starts = np.asarray(
             [
-                (
-                    all_starts[idx] - upstream
-                    if start_flags[idx]
-                    else all_ends[idx] - downstream
-                )
+                (all_starts[idx] - upstream if start_flags[idx] else all_ends[idx] - downstream)
                 for idx in range(len(start_flags))
             ]
         )
         new_ends = np.asarray(
             [
-                (
-                    all_starts[idx] + downstream
-                    if start_flags[idx]
-                    else all_ends[idx] + upstream
-                )
+                (all_starts[idx] + downstream if start_flags[idx] else all_ends[idx] + upstream)
                 for idx in range(len(start_flags))
             ]
         )
@@ -1471,9 +1431,7 @@ def restrict(
             (in-place-modified) original.
         """
 
-        restricted_ir = self._ranges.restrict(
-            start=start, end=end, keep_all_ranges=True
-        )
+        restricted_ir = self._ranges.restrict(start=start, end=end, keep_all_ranges=True)
         output = self._define_output(in_place)
         output._ranges = restricted_ir
 
@@ -1518,11 +1476,7 @@ def trim(self, in_place: bool = False) -> "GenomicRanges":
             _t_chr = all_chrs[i]
             _end = all_ends[i]
 
-            if (
-                is_circular is not None
-                and is_circular[_t_chr] is False
-                and _end > seqlengths[_t_chr]
-            ):
+            if is_circular is not None and is_circular[_t_chr] is False and _end > seqlengths[_t_chr]:
                 _end = seqlengths[_t_chr] + 1
 
             new_ends.append(_end)
@@ -1561,16 +1515,11 @@ def narrow(
             (in-place-modified) original.
         """
         if start is not None and end is not None and width is not None:
-            raise ValueError(
-                "Only provide two of the three parameters - `start`, "
-                "`end` and `width` but not all!"
-            )
+            raise ValueError("Only provide two of the three parameters - `start`, " "`end` and `width` but not all!")
 
         if width is not None:
             if start is None and end is None:
-                raise ValueError(
-                    "If width is provided, either start or end must be provided."
-                )
+                raise ValueError("If width is provided, either start or end must be provided.")
 
         narrow_ir = self._ranges.narrow(start=start, end=end, width=width)
         output = self._define_output(in_place)
@@ -1585,15 +1534,10 @@ def _group_indices_by_chrm(self, ignore_strand: bool = False) -> dict:
         #     __strand[__strand == 0] = 1
 
         _seqnames = [self._seqinfo._seqnames[i] for i in self._seqnames]
-        grp_keys = np.char.add(
-            np.char.add(_seqnames, f"{_granges_delim}"), __strand.astype(str)
-        )
+        grp_keys = np.char.add(np.char.add(_seqnames, f"{_granges_delim}"), __strand.astype(str))
         unique_grps, inverse_indices = np.unique(grp_keys, return_inverse=True)
 
-        chrm_grps = {
-            str(grp): np.where(inverse_indices == i)[0].tolist()
-            for i, grp in enumerate(unique_grps)
-        }
+        chrm_grps = {str(grp): np.where(inverse_indices == i)[0].tolist() for i, grp in enumerate(unique_grps)}
 
         return chrm_grps
 
@@ -1662,9 +1606,7 @@ def reduce(
         new_seqnames = [x[0] for x in splits]
         new_strand = np.asarray([int(x[1]) for x in splits])
 
-        output = GenomicRanges(
-            seqnames=new_seqnames, strand=new_strand, ranges=all_merged_ranges
-        )
+        output = GenomicRanges(seqnames=new_seqnames, strand=new_strand, ranges=all_merged_ranges)
 
         if with_reverse_map is True:
             output._mcols.set_column("revmap", rev_map, in_place=True)
@@ -1673,9 +1615,7 @@ def reduce(
 
         return output
 
-    def range(
-        self, with_reverse_map: bool = False, ignore_strand: bool = False
-    ) -> "GenomicRanges":
+    def range(self, with_reverse_map: bool = False, ignore_strand: bool = False) -> "GenomicRanges":
         """Calculate range bounds for each distinct (seqname, strand) pair.
 
         Args:
@@ -1713,9 +1653,7 @@ def range(
         new_seqnames = [x[0] for x in splits]
         new_strand = np.asarray([int(x[1]) for x in splits])
 
-        output = GenomicRanges(
-            seqnames=new_seqnames, strand=new_strand, ranges=all_merged_ranges
-        )
+        output = GenomicRanges(seqnames=new_seqnames, strand=new_strand, ranges=all_merged_ranges)
 
         if with_reverse_map is True:
             output._mcols.set_column("revmap", rev_map, in_place=True)
@@ -1784,15 +1722,11 @@ def gaps(
         new_seqnames = [x[0] for x in splits]
         new_strand = np.asarray([int(x[1]) for x in splits])
 
-        output = GenomicRanges(
-            seqnames=new_seqnames, strand=new_strand, ranges=all_merged_ranges
-        )
+        output = GenomicRanges(seqnames=new_seqnames, strand=new_strand, ranges=all_merged_ranges)
 
         return output
 
-    def disjoin(
-        self, with_reverse_map: bool = False, ignore_strand: bool = False
-    ) -> "GenomicRanges":
+    def disjoin(self, with_reverse_map: bool = False, ignore_strand: bool = False) -> "GenomicRanges":
         """Calculate disjoint genomic positions for each distinct (seqname, strand) pair.
 
         Args:
@@ -1834,18 +1768,14 @@ def disjoin(
         new_seqnames = [x[0] for x in splits]
         new_strand = np.asarray([int(x[1]) for x in splits])
 
-        output = GenomicRanges(
-            seqnames=new_seqnames, strand=new_strand, ranges=all_merged_ranges
-        )
+        output = GenomicRanges(seqnames=new_seqnames, strand=new_strand, ranges=all_merged_ranges)
 
         if with_reverse_map is True:
             output._mcols.set_column("revmap", rev_map, in_place=True)
 
         return output
 
-    def coverage(
-        self, shift: int = 0, width: Optional[int] = None, weight: int = 1
-    ) -> Dict[str, np.ndarray]:
+    def coverage(self, shift: int = 0, width: Optional[int] = None, weight: int = 1) -> Dict[str, np.ndarray]:
         """Calculate coverage for each chromosome, For each position, counts the number of ranges that cover it.
 
         Args:
@@ -1873,10 +1803,7 @@ def coverage(
         for chrm, group in chrm_grps.items():
             _grp_subset = self[group]
 
-            all_intvals = [
-                (x[0], x[1])
-                for x in zip(_grp_subset._ranges._start, _grp_subset._ranges.end)
-            ]
+            all_intvals = [(x[0], x[1]) for x in zip(_grp_subset._ranges._start, _grp_subset._ranges.end)]
 
             cov, _ = create_np_vector(intervals=all_intvals, with_reverse_map=False)
 
@@ -2006,23 +1933,15 @@ def intersect_ncls(self, other: "GenomicRanges") -> "GenomicRanges":
         other_end = other.end
 
         other_ncls = NCLS(other.start, other_end, np.arange(len(other)))
-        _self_indexes, _other_indexes = other_ncls.all_overlaps_both(
-            self.start, self_end, np.arange(len(self))
-        )
+        _self_indexes, _other_indexes = other_ncls.all_overlaps_both(self.start, self_end, np.arange(len(self)))
 
-        other_chrms = np.array(
-            [other._seqinfo._seqnames[other._seqnames[i]] for i in _other_indexes]
-        )
-        self_chrms = np.array(
-            [self._seqinfo._seqnames[self._seqnames[i]] for i in _self_indexes]
-        )
+        other_chrms = np.array([other._seqinfo._seqnames[other._seqnames[i]] for i in _other_indexes])
+        self_chrms = np.array([self._seqinfo._seqnames[self._seqnames[i]] for i in _self_indexes])
 
         other_strands = other._strand[_other_indexes]
         self_strands = self._strand[_self_indexes]
 
-        filtered_indexes = np.logical_and(
-            other_chrms == self_chrms, other_strands == self_strands
-        )
+        filtered_indexes = np.logical_and(other_chrms == self_chrms, other_strands == self_strands)
 
         self_starts = self.start[_self_indexes][filtered_indexes]
         other_starts = other.start[_other_indexes][filtered_indexes]
@@ -2100,9 +2019,7 @@ def find_overlaps(
             raise TypeError("'query' is not a `GenomicRanges` object.")
 
         if query_type not in OVERLAP_QUERY_TYPES:
-            raise ValueError(
-                f"'{query_type}' must be one of {', '.join(OVERLAP_QUERY_TYPES)}."
-            )
+            raise ValueError(f"'{query_type}' must be one of {', '.join(OVERLAP_QUERY_TYPES)}.")
 
         rev_map = [[] for _ in range(len(query))]
         subject_chrm_grps = self._group_indices_by_chrm(ignore_strand=ignore_strand)
@@ -2192,9 +2109,7 @@ def count_overlaps(
             raise TypeError("'query' is not a `GenomicRanges` object.")
 
         if query_type not in OVERLAP_QUERY_TYPES:
-            raise ValueError(
-                f"'{query_type}' must be one of {', '.join(OVERLAP_QUERY_TYPES)}."
-            )
+            raise ValueError(f"'{query_type}' must be one of {', '.join(OVERLAP_QUERY_TYPES)}.")
 
         rev_map = [0 for _ in range(len(query))]
         subject_chrm_grps = self._group_indices_by_chrm(ignore_strand=ignore_strand)
@@ -2284,9 +2199,7 @@ def subset_by_overlaps(
             raise TypeError("'query' is not a `GenomicRanges` object.")
 
         if query_type not in OVERLAP_QUERY_TYPES:
-            raise ValueError(
-                f"'{query_type}' must be one of {', '.join(OVERLAP_QUERY_TYPES)}."
-            )
+            raise ValueError(f"'{query_type}' must be one of {', '.join(OVERLAP_QUERY_TYPES)}.")
 
         rev_map = []
         subject_chrm_grps = self._group_indices_by_chrm(ignore_strand=ignore_strand)
@@ -2387,9 +2300,7 @@ def nearest(
                 _sub_subset = self[_subset]
                 _query_subset = query[indices]
 
-                res_idx = _sub_subset._ranges.nearest(
-                    query=_query_subset._ranges, select=select, delete_index=False
-                )
+                res_idx = _sub_subset._ranges.nearest(query=_query_subset._ranges, select=select, delete_index=False)
 
                 for j, val in enumerate(res_idx):
                     _rev_map = [_subset[x] for x in val]
@@ -2450,9 +2361,7 @@ def precede(
                 _sub_subset = self[_subset]
                 _query_subset = query[indices]
 
-                res_idx = _sub_subset._ranges.precede(
-                    query=_query_subset._ranges, select=select, delete_index=False
-                )
+                res_idx = _sub_subset._ranges.precede(query=_query_subset._ranges, select=select, delete_index=False)
 
                 for j, val in enumerate(res_idx):
                     _rev_map = [_subset[x] for x in val]
@@ -2513,9 +2422,7 @@ def follow(
                 _sub_subset = self[_subset]
                 _query_subset = query[indices]
 
-                res_idx = _sub_subset._ranges.follow(
-                    query=_query_subset._ranges, select=select, delete_index=False
-                )
+                res_idx = _sub_subset._ranges.follow(query=_query_subset._ranges, select=select, delete_index=False)
 
                 for j, val in enumerate(res_idx):
                     _rev_map = [_subset[x] for x in val]
@@ -2725,9 +2632,7 @@ def invert_strand(self, in_place: bool = False) -> "GenomicRanges":
     ######>> window methods <<######
     ################################
 
-    def tile_by_range(
-        self, n: Optional[int] = None, width: Optional[int] = None
-    ) -> "GenomicRanges":
+    def tile_by_range(self, n: Optional[int] = None, width: Optional[int] = None) -> "GenomicRanges":
         """Split each sequence length into chunks by ``n`` (number of intervals) or ``width`` (intervals with equal
         width).
 
@@ -2768,22 +2673,16 @@ def tile_by_range(
             elif width is not None:
                 twidth = width
 
-            all_intervals = split_intervals(
-                val._ranges._start[0], val._ranges.end[0] - 1, twidth
-            )
+            all_intervals = split_intervals(val._ranges._start[0], val._ranges.end[0] - 1, twidth)
 
             seqnames.extend([val.get_seqnames()[0]] * len(all_intervals))
             strand.extend([int(val.strand[0])] * len(all_intervals))
             starts.extend([x[0] for x in all_intervals])
             widths.extend(x[1] for x in all_intervals)
 
-        return GenomicRanges(
-            seqnames=seqnames, strand=strand, ranges=IRanges(start=starts, width=widths)
-        )
+        return GenomicRanges(seqnames=seqnames, strand=strand, ranges=IRanges(start=starts, width=widths))
 
-    def tile(
-        self, n: Optional[int] = None, width: Optional[int] = None
-    ) -> "GenomicRanges":
+    def tile(self, n: Optional[int] = None, width: Optional[int] = None) -> "GenomicRanges":
         """Split each interval by ``n`` (number of sub intervals) or ``width`` (intervals with equal width).
 
         Note: Either ``n`` or ``width`` must be provided but not both.
@@ -2824,15 +2723,11 @@ def tile(
                 twidth = math.ceil((val._ranges._width + 1) / (n))
 
                 if twidth < 1:
-                    raise RuntimeError(
-                        f"'width' of region is less than 'n' for range in: {counter}."
-                    )
+                    raise RuntimeError(f"'width' of region is less than 'n' for range in: {counter}.")
             elif width is not None:
                 twidth = width
 
-            all_intervals = split_intervals(
-                val._ranges._start[0], val._ranges.end[0] - 1, twidth
-            )
+            all_intervals = split_intervals(val._ranges._start[0], val._ranges.end[0] - 1, twidth)
 
             seqnames.extend([val.get_seqnames()[0]] * len(all_intervals))
             strand.extend([int(val.strand[0])] * len(all_intervals))
@@ -2841,9 +2736,7 @@ def tile(
 
             counter += 1
 
-        return GenomicRanges(
-            seqnames=seqnames, strand=strand, ranges=IRanges(start=starts, width=widths)
-        )
+        return GenomicRanges(seqnames=seqnames, strand=strand, ranges=IRanges(start=starts, width=widths))
 
     def sliding_windows(self, width: int, step: int = 1) -> "GenomicRanges":
         """Slide along each range by ``width`` (intervals with equal ``width``) and ``step``.
@@ -2881,9 +2774,7 @@ def sliding_windows(self, width: int, step: int = 1) -> "GenomicRanges":
             starts.extend([x[0] for x in all_intervals])
             widths.extend(x[1] for x in all_intervals)
 
-        return GenomicRanges(
-            seqnames=seqnames, strand=strand, ranges=IRanges(start=starts, width=widths)
-        )
+        return GenomicRanges(seqnames=seqnames, strand=strand, ranges=IRanges(start=starts, width=widths))
 
     @classmethod
     def tile_genome(
@@ -2953,9 +2844,7 @@ def tile_genome(
             starts.extend([x[0] for x in all_intervals])
             widths.extend(x[1] for x in all_intervals)
 
-        return GenomicRanges(
-            seqnames=seqnames, strand=strand, ranges=IRanges(start=starts, width=widths)
-        )
+        return GenomicRanges(seqnames=seqnames, strand=strand, ranges=IRanges(start=starts, width=widths))
 
     def binned_average(
         self,
@@ -3034,9 +2923,7 @@ def split(self, groups: list) -> "GenomicRangesList":
         """
 
         if len(groups) != len(self):
-            raise ValueError(
-                "Number of groups must match the number of genomic elements."
-            )
+            raise ValueError("Number of groups must match the number of genomic elements.")
 
         gdict = group_by_indices(groups=groups)
 
@@ -3068,9 +2955,7 @@ def empty(cls):
     ######>> subtract <<######
     ##########################
 
-    def subtract(
-        self, x: "GenomicRanges", min_overlap: int = 1, ignore_strand: bool = False
-    ) -> "GenomicRangesList":
+    def subtract(self, x: "GenomicRanges", min_overlap: int = 1, ignore_strand: bool = False) -> "GenomicRangesList":
         """Subtract searches for features in ``x`` that overlap ``self`` by at least the number of base pairs given by
         ``min_overlap``.
 
@@ -3091,9 +2976,7 @@ def subtract(
             the subtracted regions.
         """
         _x_reduce = x.reduce(ignore_strand=ignore_strand)
-        hits = self.find_overlaps(
-            _x_reduce, min_overlap=min_overlap, ignore_strand=ignore_strand
-        )
+        hits = self.find_overlaps(_x_reduce, min_overlap=min_overlap, ignore_strand=ignore_strand)
 
         gr_idxs = [[] for _ in range(len(self))]
         for ii, ix in enumerate(hits):
diff --git a/src/genomicranges/GenomicRangesList.py b/src/genomicranges/GenomicRangesList.py
index e3b8828..831ba80 100644
--- a/src/genomicranges/GenomicRangesList.py
+++ b/src/genomicranges/GenomicRangesList.py
@@ -16,12 +16,8 @@ def _validate_ranges(ranges, num_ranges):
     if ranges is None:
         raise ValueError("'ranges' cannot be None.")
 
-    if not (
-        isinstance(ranges, GenomicRanges) or ut.is_list_of_type(ranges, GenomicRanges)
-    ):
-        raise TypeError(
-            "`ranges` must be either a `GenomicRanges` or a list of `GenomicRanges`."
-        )
+    if not (isinstance(ranges, GenomicRanges) or ut.is_list_of_type(ranges, GenomicRanges)):
+        raise TypeError("`ranges` must be either a `GenomicRanges` or a list of `GenomicRanges`.")
 
     if isinstance(ranges, list) and sum([len(x) for x in ranges]) != num_ranges:
         raise ValueError(
@@ -40,15 +36,11 @@ def _validate_optional_attrs(mcols, names, num_ranges):
         raise TypeError("'mcols' is not a `BiocFrame` object.")
 
     if mcols.shape[0] != num_ranges:
-        raise ValueError(
-            "Length of 'mcols' does not match the number of genomic elements."
-        )
+        raise ValueError("Length of 'mcols' does not match the number of genomic elements.")
 
     if names is not None:
         if len(names) != num_ranges:
-            raise ValueError(
-                "Length of 'names' does not match the number of genomic elements."
-            )
+            raise ValueError("Length of 'names' does not match the number of genomic elements.")
 
         if any(x is None for x in names):
             raise ValueError("'names' cannot contain None values.")
@@ -79,11 +71,7 @@ def __iter__(self):
 
     def __next__(self):
         if self._current_index < len(self._grl):
-            iter_row_index = (
-                self._grl.names[self._current_index]
-                if self._grl.names is not None
-                else None
-            )
+            iter_row_index = self._grl.names[self._current_index] if self._grl.names is not None else None
 
             iter_slice = self._grl[self._current_index]
             self._current_index += 1
@@ -109,20 +97,84 @@ class GenomicRangesList:
     .. code-block:: python
 
         a = GenomicRanges(
-            seqnames=["chr1", "chr2", "chr1", "chr3"],
-            ranges=IRanges([1, 3, 2, 4], [10, 30, 50, 60]),
-            strand=["-", "+", "*", "+"],
-            mcols=BiocFrame({"score": [1, 2, 3, 4]}),
+            seqnames=[
+                "chr1",
+                "chr2",
+                "chr1",
+                "chr3",
+            ],
+            ranges=IRanges(
+                [
+                    1,
+                    3,
+                    2,
+                    4,
+                ],
+                [
+                    10,
+                    30,
+                    50,
+                    60,
+                ],
+            ),
+            strand=[
+                "-",
+                "+",
+                "*",
+                "+",
+            ],
+            mcols=BiocFrame(
+                {
+                    "score": [
+                        1,
+                        2,
+                        3,
+                        4,
+                    ]
+                }
+            ),
         )
 
         b = GenomicRanges(
-            seqnames=["chr2", "chr4", "chr5"],
-            ranges=IRanges([3, 6, 4], [30, 50, 60]),
-            strand=["-", "+", "*"],
-            mcols=BiocFrame({"score": [2, 3, 4]}),
+            seqnames=[
+                "chr2",
+                "chr4",
+                "chr5",
+            ],
+            ranges=IRanges(
+                [3, 6, 4],
+                [
+                    30,
+                    50,
+                    60,
+                ],
+            ),
+            strand=[
+                "-",
+                "+",
+                "*",
+            ],
+            mcols=BiocFrame(
+                {
+                    "score": [
+                        2,
+                        3,
+                        4,
+                    ]
+                }
+            ),
         )
 
-        grl = GenomicRangesList(ranges=[gr1, gr2], names=["gene1", "gene2"])
+        grl = GenomicRangesList(
+            ranges=[
+                gr1,
+                gr2,
+            ],
+            names=[
+                "gene1",
+                "gene2",
+            ],
+        )
 
     Additionally, you may also provide metadata about the genomic elements in the dictionary
     using mcols attribute.
@@ -280,9 +332,7 @@ def __str__(self) -> str:
         Returns:
             A pretty-printed string containing the contents of this ``GenomicRangesList``.
         """
-        output = (
-            f"GenomicRangesList with {len(self)} range{'s' if len(self) != 1 else ''}"
-        )
+        output = f"GenomicRangesList with {len(self)} range{'s' if len(self) != 1 else ''}"
         output += f" and {len(self._mcols.get_column_names())} metadata column{'s' if len(self._mcols.get_column_names()) != 1 else ''}\n"
 
         if isinstance(self._ranges, GenomicRanges) and len(self._ranges) == 0:
@@ -355,9 +405,7 @@ def get_ranges(self) -> Union[GenomicRanges, List[GenomicRanges]]:
 
         return self._ranges
 
-    def set_ranges(
-        self, ranges: Union[GenomicRanges, List[GenomicRanges]], in_place: bool = False
-    ) -> "GenomicRanges":
+    def set_ranges(self, ranges: Union[GenomicRanges, List[GenomicRanges]], in_place: bool = False) -> "GenomicRanges":
         """Set new genomic ranges.
 
         Args:
@@ -543,9 +591,7 @@ def set_metadata(self, metadata: dict, in_place: bool = False) -> "GenomicRanges
             or as a reference to the (in-place-modified) original.
         """
         if not isinstance(metadata, dict):
-            raise TypeError(
-                f"`metadata` must be a dictionary, provided {type(metadata)}."
-            )
+            raise TypeError(f"`metadata` must be a dictionary, provided {type(metadata)}.")
 
         output = self._define_output(in_place)
         output._metadata = metadata
@@ -587,9 +633,7 @@ def groups(self, group: Union[str, int]) -> "GenomicRangesList":
             group = self._names.map(group)
 
         if group < 0 or group > len(self):
-            raise ValueError(
-                "'group' must be less than the number of genomic elements."
-            )
+            raise ValueError("'group' must be less than the number of genomic elements.")
 
         return self[group]
 
@@ -768,9 +812,7 @@ def to_pandas(self) -> "pandas.DataFrame":
     ######>> slicers <<#########
     ############################
 
-    def __getitem__(
-        self, args: Union[str, int, tuple, list, slice]
-    ) -> Union[GenomicRanges, "GenomicRangesList"]:
+    def __getitem__(self, args: Union[str, int, tuple, list, slice]) -> Union[GenomicRanges, "GenomicRangesList"]:
         """Subset individual genomic elements.
 
         Args:
@@ -808,9 +850,7 @@ def __getitem__(
             if isinstance(idx, list):
                 if ut.is_list_of_type(idx, bool):
                     if len(idx) != len(self):
-                        raise ValueError(
-                            "`indices` is a boolean vector, length should match the size of the data."
-                        )
+                        raise ValueError("`indices` is a boolean vector, length should match the size of the data.")
 
                     idx = [i for i in range(len(idx)) if idx[i] is True]
 
@@ -825,9 +865,7 @@ def __getitem__(
                 if self.mcols is not None:
                     new_mcols = self.mcols[idx, :]
 
-                return GenomicRangesList(
-                    new_ranges, new_range_lengths, new_names, new_mcols, self._metadata
-                )
+                return GenomicRangesList(new_ranges, new_range_lengths, new_names, new_mcols, self._metadata)
             elif isinstance(idx, (slice, range)):
                 if isinstance(idx, range):
                     idx = slice(idx.start, idx.stop, idx.step)
diff --git a/src/genomicranges/SeqInfo.py b/src/genomicranges/SeqInfo.py
index 06e8be7..0b1c19f 100644
--- a/src/genomicranges/SeqInfo.py
+++ b/src/genomicranges/SeqInfo.py
@@ -320,9 +320,7 @@ def get_seqnames(self) -> List[str]:
         """
         return self._seqnames
 
-    def set_seqnames(
-        self, seqnames: Sequence[str], in_place: bool = False
-    ) -> "SeqInfo":
+    def set_seqnames(self, seqnames: Sequence[str], in_place: bool = False) -> "SeqInfo":
         """
         Args:
             seqnames:
@@ -409,9 +407,7 @@ def seqlengths(self) -> List[int]:
         return self.get_seqlengths()
 
     @seqlengths.setter
-    def seqlengths(
-        self, seqlengths: Optional[Union[int, Sequence[int], Dict[str, int]]]
-    ):
+    def seqlengths(self, seqlengths: Optional[Union[int, Sequence[int], Dict[str, int]]]):
         warn(
             "Setting property 'seqlengths' is an in-place operation, use 'set_seqlengths' instead",
             UserWarning,
@@ -471,9 +467,7 @@ def is_circular(self) -> List[bool]:
         return self.get_is_circular()
 
     @is_circular.setter
-    def is_circular(
-        self, is_circular: Optional[Union[bool, Sequence[bool], Dict[str, bool]]]
-    ):
+    def is_circular(self, is_circular: Optional[Union[bool, Sequence[bool], Dict[str, bool]]]):
         warn(
             "Setting property 'is_circular' is an in-place operation, use 'set_is_circular' instead",
             UserWarning,
diff --git a/src/genomicranges/io/gtf.py b/src/genomicranges/io/gtf.py
index 541d4f4..41f20c0 100644
--- a/src/genomicranges/io/gtf.py
+++ b/src/genomicranges/io/gtf.py
@@ -97,9 +97,7 @@ def parse_gtf(
             comment=comment,
         )
 
-    rows = Parallel(n_jobs=-2)(
-        delayed(_parse_all_attribute)(row) for _, row in df.iterrows()
-    )
+    rows = Parallel(n_jobs=-2)(delayed(_parse_all_attribute)(row) for _, row in df.iterrows())
     gtf = DataFrame.from_records(rows)
     gtf.drop(["group"], axis=1)
 
diff --git a/src/genomicranges/io/ucsc.py b/src/genomicranges/io/ucsc.py
index 929405d..7e03a48 100644
--- a/src/genomicranges/io/ucsc.py
+++ b/src/genomicranges/io/ucsc.py
@@ -31,9 +31,7 @@ def access_gtf_ucsc(
     base_path = f"http://hgdownload.cse.ucsc.edu/goldenPath/{genome}/bigZips/genes/"
 
     if type not in ["refGene", "ensGene", "knownGene", "ncbiRefSeq"]:
-        raise ValueError(
-            f"type must be one of refGene, ensGene, knownGene or ncbiRefSeq, provided {type}"
-        )
+        raise ValueError(f"type must be one of refGene, ensGene, knownGene or ncbiRefSeq, provided {type}")
 
     full_path = f"{base_path}/{genome}.{type}.gtf.gz"
 
diff --git a/src/genomicranges/utils.py b/src/genomicranges/utils.py
index b09dd94..fa5fda0 100644
--- a/src/genomicranges/utils.py
+++ b/src/genomicranges/utils.py
@@ -12,9 +12,7 @@
 REV_STRAND_MAP = {"1": "+", "-1": "-", "0": "*"}
 
 
-def sanitize_strand_vector(
-    strand: Union[Sequence[str], Sequence[int], np.ndarray]
-) -> np.ndarray:
+def sanitize_strand_vector(strand: Union[Sequence[str], Sequence[int], np.ndarray]) -> np.ndarray:
     """Create a numpy representation for ``strand``.
 
     Mapping: 1 for "+" (forward strand), 0 for "*" (any strand) and -1 for "-" (reverse strand).
@@ -54,9 +52,7 @@ def sanitize_strand_vector(
             )
         return np.asarray(strand, dtype=np.int8)
     else:
-        raise TypeError(
-            "'strand' must be either a numpy vector, a list of integers or strings representing strand."
-        )
+        raise TypeError("'strand' must be either a numpy vector, a list of integers or strings representing strand.")
 
 
 def _sanitize_vec(x: Sequence):
@@ -210,9 +206,4 @@ def create_np_vector(
 
 
 def group_by_indices(groups: list) -> dict:
-    return {
-        k: [x[0] for x in v]
-        for k, v in groupby(
-            sorted(enumerate(groups), key=lambda x: x[1]), lambda x: x[1]
-        )
-    }
+    return {k: [x[0] for x in v] for k, v in groupby(sorted(enumerate(groups), key=lambda x: x[1]), lambda x: x[1])}