Merge pull request #390 from monarch-initiative/minor-lint-fixes

Fix lints and typing annotations
monarch-initiative · Jan 14, 2025 · 6d4f65a · 6d4f65a
2 parents 1e31d7f + b06338a
commit 6d4f65a
Show file tree

Hide file tree

Showing 11 changed files with 57 additions and 46 deletions.
diff --git a/src/gpsea/model/_base.py b/src/gpsea/model/_base.py
@@ -1,7 +1,6 @@
 import enum
 import typing
 
-import hpotk
 
 
 class Sex(enum.Enum):

diff --git a/src/gpsea/model/_cohort.py b/src/gpsea/model/_cohort.py
@@ -13,7 +13,7 @@
 from ._variant import Variant, VariantInfo
 
 
-I = typing.TypeVar('I', bound=hpotk.model.Identified)
+IDENTIFIED = typing.TypeVar('IDENTIFIED', bound=hpotk.model.Identified)
 """
 Anything that extends `Identified` (e.g. `Disease`, `Phenotype`, `Measurement`).
 """
@@ -289,8 +289,8 @@ def _check_id(
     @staticmethod
     def _find_first_by_id(
         term_id: hpotk.TermId,
-        items: typing.Iterable[I],
-    ) -> typing.Optional[I]:
+        items: typing.Iterable[IDENTIFIED],
+    ) -> typing.Optional[IDENTIFIED]:
         for m in items:
             if m.identifier == term_id:
                 return m
@@ -299,13 +299,13 @@ def _find_first_by_id(
 
     @staticmethod
     def _unique_identifiers_of_identified(
-        items: typing.Iterable[I],
+        items: typing.Iterable[IDENTIFIED],
     ) -> typing.Collection[hpotk.TermId]:
         return set(item.identifier for item in items)
 
     @staticmethod
     def _count_unique_identifiers(
-        items: typing.Iterable[I],
+        items: typing.Iterable[IDENTIFIED],
     ) -> int:
         return len(Patient._unique_identifiers_of_identified(items))
 
@@ -668,8 +668,8 @@ def _count_individuals_with_condition(
 
     def _iterate_through_items(
         self,
-        extract_items: typing.Callable[[Patient,], typing.Iterable[I]],
-    ) -> typing.Iterator[I]:
+        extract_items: typing.Callable[[Patient,], typing.Iterable[IDENTIFIED]],
+    ) -> typing.Iterator[IDENTIFIED]:
         return itertools.chain(item for individual in self._members for item in extract_items(individual))
 
     def _get_most_common(
@@ -689,7 +689,7 @@ def _get_most_common(
 
     @staticmethod
     def _count_distinct_items(
-        items: typing.Iterable[I],
+        items: typing.Iterable[IDENTIFIED],
     ) -> int:
         return len(set(item.identifier for item in items))
 

diff --git a/src/gpsea/model/_protein.py b/src/gpsea/model/_protein.py
@@ -83,7 +83,7 @@ def __repr__(self) -> str:
 
 def _deprecation_warning():
     warnings.warn(
-            f"`FeatureType` was deprecated and will be removed prior `v1.0.0`. Use a `str` instead!",
+            "`FeatureType` was deprecated and will be removed prior `v1.0.0`. Use a `str` instead!",
             DeprecationWarning,
         )
 

diff --git a/src/gpsea/model/_test_gt.py b/src/gpsea/model/_test_gt.py
@@ -30,7 +30,7 @@ def test_iteration(self):
 
         assert len(labels) == len(gts) == len(genotypes)
 
-        assert all(l.label in ('A', 'C', 'D') for l in labels)
+        assert all(sample_labels.label in ('A', 'C', 'D') for sample_labels in labels)
         assert all(
             gt in (Genotype.HETEROZYGOUS, Genotype.HEMIZYGOUS, Genotype.HOMOZYGOUS_REFERENCE)
             for gt in gts

diff --git a/src/gpsea/preprocessing/_config.py b/src/gpsea/preprocessing/_config.py
@@ -401,7 +401,7 @@ def _configure_imprecise_sv_annotator(
 ):
     # Setup cache for SVs
     if cache_dir is not None:
-        sv_cache_dir = os.path.join(cache_dir, "sv_cache")
+        _sv_cache_dir = os.path.join(cache_dir, "sv_cache")
         # TODO: implement the cache.
         # os.makedirs(sv_cache_dir, exist_ok=True)
         # var_cache = VariantAnnotationCache(sv_cache_dir)

diff --git a/src/gpsea/preprocessing/_generic.py b/src/gpsea/preprocessing/_generic.py
@@ -41,7 +41,7 @@ def annotate(self, item: ImpreciseSvInfo) -> typing.Sequence[TranscriptAnnotatio
 
     def _map_to_variant_effects(
         self, 
-        variant_class: str,
+        variant_class: VariantClass,
     ) -> typing.Sequence[VariantEffect]:
         if variant_class == VariantClass.DEL:
             return (VariantEffect.TRANSCRIPT_ABLATION,)

diff --git a/src/gpsea/preprocessing/_vep.py b/src/gpsea/preprocessing/_vep.py
@@ -186,7 +186,7 @@ def format_coordinates_for_vep_query(vc: VariantCoordinates) -> str:
             # TODO: Verify <INS> are working correctly
         else:
             if len(vc.ref) == 0 or len(vc.alt) == 0:
-                raise ValueError(f'Trimmed alleles are not yet supported!')
+                raise ValueError('Trimmed alleles are not yet supported!')
             if len(vc.ref) == 1 and len(vc.alt) != 1:
                 # INS/DUP
                 start = start + 1  # we must "trim"

diff --git a/src/gpsea/view/_draw_variants.py b/src/gpsea/view/_draw_variants.py
@@ -36,7 +36,7 @@ def _calc_aa_based_pos(pos_bases, tx_coordinates):
     :param exons: exon positions
     """
     print(f'{pos_bases=}')
-    exons, cds_start, cds_end = tx_coordinates.exons, tx_coordinates.cds_start, tx_coordinates.cds_end
+    exons, _cds_start, _cds_end = tx_coordinates.exons, tx_coordinates.cds_start, tx_coordinates.cds_end
 
     num_nt = 0
 
@@ -325,7 +325,7 @@ def draw_fig(self, tx_coordinates: TranscriptCoordinates, protein_meta: ProteinM
         # get minimum position on chromosome for all transcripts
         min_exon_limit = np.min(exon_limits)
         feature_limits = np.array([(feature.info.start, feature.info.end) for feature in protein_meta.protein_features])
-        feature_types = [pf.feature_type for pf in protein_meta.protein_features]
+        _feature_types = [pf.feature_type for pf in protein_meta.protein_features]
         feature_limits = (feature_limits * 3) - 2 + min_exon_limit  # to convert from codons to bases
         variant_locations = list()
         for ann in tx_anns:
@@ -334,11 +334,11 @@ def draw_fig(self, tx_coordinates: TranscriptCoordinates, protein_meta: ProteinM
                 if prot_eff_loc is not None:
                     variant_locations.append([prot_eff_loc.start, prot_eff_loc.end])
         variant_locations = np.array(variant_locations)
-        variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns])
+        _variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns])
         exon_labels = [f'{i + 1}' for i in range(len(exon_limits))]
 
         protein_track_x_min, protein_track_x_max = 0.15, 0.85
-        protein_track_y_min, protein_track_y_max = 0.492, 0.508
+        protein_track_y_min, _protein_track_y_max = 0.492, 0.508
         exon_y_min, exon_y_max = 0.39, 0.43
         font_size = 12
         text_padding = 0.004
@@ -364,7 +364,7 @@ def preprocess(x_absolute):
         # x_axis
         x_axis_y = protein_track_y_min - 0.02
         x_axis_min_x, x_axis_max_x = protein_track_x_min, protein_track_x_max
-        big_tick_length, small_tick_length = 0.01, 0.005
+        big_tick_length, _small_tick_length = 0.01, 0.005
         draw_line(x_axis_min_x, x_axis_y, x_axis_max_x, x_axis_y, line_color=self.axis_color,
                   line_width=1.0)  # main line
         draw_line(x_axis_min_x, x_axis_y - big_tick_length, x_axis_min_x, x_axis_y, line_color=self.axis_color,

diff --git a/src/gpsea/view/_protein_visualizable.py b/src/gpsea/view/_protein_visualizable.py
@@ -1,16 +1,24 @@
 import typing
 
-from gpsea.model import *
+from gpsea.model import (
+    Cohort,
+    ProteinMetadata,
+    TranscriptAnnotation,
+    TranscriptCoordinates,
+    Variant,
+    VariantEffect,
+)
 import numpy as np
 
+from gpsea.model.genome._genome import Region
 
-class ProteinVisualizable:
 
+class ProteinVisualizable:
     def __init__(
-            self,
-            tx_coordinates: TranscriptCoordinates,
-            protein_meta: ProteinMetadata,
-            cohort: Cohort,
+        self,
+        tx_coordinates: TranscriptCoordinates,
+        protein_meta: ProteinMetadata,
+        cohort: Cohort,
     ) -> None:
         self._tx_coordinates = tx_coordinates
         self._protein_meta = protein_meta
@@ -19,15 +27,15 @@ def __init__(
         transcript_annotations = ProteinVisualizable._get_tx_anns(
             cohort.all_variants(), self._tx_coordinates.identifier
         )
-        self._variant_regions_on_protein = list()
+        variant_regions_on_protein: typing.List[Region] = list()
         self._variant_effect = list()
         for tx_ann in transcript_annotations:
             variant_effects = tx_ann.variant_effects
             if len(variant_effects) == 0:
                 continue
             prot_eff_loc = tx_ann.protein_effect_location
             if prot_eff_loc is not None:
-                self._variant_regions_on_protein.append(prot_eff_loc)
+                variant_regions_on_protein.append(prot_eff_loc)
                 self._variant_effect.append(variant_effects[0])
 
         self._protein_feature_names = list()
@@ -40,13 +48,17 @@ def __init__(
             self._protein_feature_starts.append(feature.info.start)
             self._protein_feature_ends.append(feature.info.end)
 
-        self._variant_locations = np.array([item.start for item in self._variant_regions_on_protein])
+        self._variant_locations = np.array(
+            [item.start for item in variant_regions_on_protein]
+        )
 
-        #variant_locations = (variant_locations * 3) - 2 + min_exon_limit  # to convert from codons to bases
-        #variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns])
+        # variant_locations = (variant_locations * 3) - 2 + min_exon_limit  # to convert from codons to bases
+        # variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns])
         # count marker occurrences and remove duplicates
         self._variant_locations_counted_absolute, self._marker_counts = np.unique(
-            self._variant_locations, axis=0, return_counts=True,
+            self._variant_locations,
+            axis=0,
+            return_counts=True,
         )
 
         if protein_meta.protein_length > 0:
@@ -59,8 +71,8 @@ def __init__(
 
     @staticmethod
     def _get_tx_anns(
-            variants: typing.Iterable[Variant],
-            tx_id: str,
+        variants: typing.Iterable[Variant],
+        tx_id: str,
     ) -> typing.Sequence[TranscriptAnnotation]:
         """
         By default, the API returns transcript annotations for many transcripts.
@@ -74,7 +86,9 @@ def _get_tx_anns(
                     tx_ann = ann
                     break
             if tx_ann is None:
-                raise ValueError(f'The transcript annotation for {tx_id} was not found!')
+                raise ValueError(
+                    f"The transcript annotation for {tx_id} was not found!"
+                )
             else:
                 tx_anns.append(tx_ann)
 
@@ -103,7 +117,7 @@ def protein_feature_starts(self) -> typing.Sequence[int]:
     @property
     def protein_feature_ends(self) -> typing.Sequence[int]:
         return self._protein_feature_ends
-    
+
     @property
     def protein_feature_types(self) -> typing.Sequence[str]:
         return self._protein_feature_types
@@ -129,7 +143,7 @@ def protein_length(self) -> int:
     @property
     def protein_feature_names(self) -> typing.Sequence[str]:
         return self._protein_feature_names
-            
+
     @property
     def variant_effects(self) -> typing.Sequence[VariantEffect]:
         return self._variant_effect

diff --git a/src/gpsea/view/_txp.py b/src/gpsea/view/_txp.py
@@ -2,9 +2,7 @@
 from collections import defaultdict
 from matplotlib import pyplot as plt
 from matplotlib.patches import Rectangle
-from matplotlib.collections import PatchCollection
 from matplotlib.lines import Line2D
-import typing
 from gpsea.model import Variant, TranscriptCoordinates, ProteinMetadata
 
 
@@ -30,15 +28,15 @@ def draw_variants(self, variants: typing.Iterable[Variant],
                     tx: TranscriptCoordinates,
                     protein: ProteinMetadata):
         title = f"{protein.protein_id} ({protein.label})"
-        fig, ax = plt.subplots(1, figsize=(10, 10))
+        _, ax = plt.subplots(1, figsize=(10, 10))
         protein_domains = set()
-        THRESHOLD = 2
+        _THRESHOLD = 2
         BOTTOM_MARGIN = 20
         amino_acid_len = tx.get_codon_count()
         # draw a box that is ten aax tall, where aax is the dimension of one amino acid
         prot_start = get_interpolated_location_in_protein(1, amino_acid_len)
         prot_end = get_interpolated_location_in_protein(amino_acid_len, amino_acid_len)
-        box_height = 10/amino_acid_len
+        _box_height = 10/amino_acid_len
         prot_width = prot_end - prot_start + 1
         protein_height = prot_width/20
         #rect = Rectangle((prot_start, BOTTOM_MARGIN), prot_width, protein_height)
@@ -59,11 +57,11 @@ def draw_variants(self, variants: typing.Iterable[Variant],
                 hgvs_cdna = hgvs
             variant_effects = tx_annot.variant_effects
             if len(variant_effects) > 1:
-                var_effect = "MULTIPLE"
+                _var_effect = "MULTIPLE"
             elif len(variant_effects) == 0:
-                var_effect = "UNKNOWN"
+                _var_effect = "UNKNOWN"
             else:
-                var_effect = variant_effects[0].name
+                _var_effect = variant_effects[0].name
             for p in tx_annot.protein_affected:
                 for f in p.domains():
                     protein_domains.add(f.info)
@@ -92,7 +90,7 @@ def draw_variants(self, variants: typing.Iterable[Variant],
                 start = feature.start
                 end = feature.end
                 #print(name, start, end, box_color)
-                box_height = 10/amino_acid_len
+                _box_height = 10/amino_acid_len
                 prot_width = prot_end - prot_start + 1
                 protein_height = prot_width/20
                 #rect = Rectangle((prot_start, BOTTOM_MARGIN), prot_width, protein_height)

diff --git a/tests/analysis/pscore/test_de_vries_scorer.py b/tests/analysis/pscore/test_de_vries_scorer.py
@@ -4,7 +4,7 @@
 import pytest
 
 from gpsea.analysis.pscore import DeVriesPhenotypeScorer
-from gpsea.model import Patient, SampleLabels, Phenotype, Sex
+from gpsea.model import Patient, Phenotype
 
 intrauterine_growth_retardation = 'HP:0001511'
 small_for_gestational_age = 'HP:0001518'