Skip to content

Commit

Permalink
Merge pull request #390 from monarch-initiative/minor-lint-fixes
Browse files Browse the repository at this point in the history
Fix lints and typing annotations
  • Loading branch information
ielis authored Jan 14, 2025
2 parents 1e31d7f + b06338a commit 6d4f65a
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 46 deletions.
1 change: 0 additions & 1 deletion src/gpsea/model/_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import enum
import typing

import hpotk


class Sex(enum.Enum):
Expand Down
16 changes: 8 additions & 8 deletions src/gpsea/model/_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ._variant import Variant, VariantInfo


I = typing.TypeVar('I', bound=hpotk.model.Identified)
IDENTIFIED = typing.TypeVar('IDENTIFIED', bound=hpotk.model.Identified)
"""
Anything that extends `Identified` (e.g. `Disease`, `Phenotype`, `Measurement`).
"""
Expand Down Expand Up @@ -289,8 +289,8 @@ def _check_id(
@staticmethod
def _find_first_by_id(
term_id: hpotk.TermId,
items: typing.Iterable[I],
) -> typing.Optional[I]:
items: typing.Iterable[IDENTIFIED],
) -> typing.Optional[IDENTIFIED]:
for m in items:
if m.identifier == term_id:
return m
Expand All @@ -299,13 +299,13 @@ def _find_first_by_id(

@staticmethod
def _unique_identifiers_of_identified(
items: typing.Iterable[I],
items: typing.Iterable[IDENTIFIED],
) -> typing.Collection[hpotk.TermId]:
return set(item.identifier for item in items)

@staticmethod
def _count_unique_identifiers(
items: typing.Iterable[I],
items: typing.Iterable[IDENTIFIED],
) -> int:
return len(Patient._unique_identifiers_of_identified(items))

Expand Down Expand Up @@ -668,8 +668,8 @@ def _count_individuals_with_condition(

def _iterate_through_items(
self,
extract_items: typing.Callable[[Patient,], typing.Iterable[I]],
) -> typing.Iterator[I]:
extract_items: typing.Callable[[Patient,], typing.Iterable[IDENTIFIED]],
) -> typing.Iterator[IDENTIFIED]:
return itertools.chain(item for individual in self._members for item in extract_items(individual))

def _get_most_common(
Expand All @@ -689,7 +689,7 @@ def _get_most_common(

@staticmethod
def _count_distinct_items(
items: typing.Iterable[I],
items: typing.Iterable[IDENTIFIED],
) -> int:
return len(set(item.identifier for item in items))

Expand Down
2 changes: 1 addition & 1 deletion src/gpsea/model/_protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def __repr__(self) -> str:

def _deprecation_warning():
warnings.warn(
f"`FeatureType` was deprecated and will be removed prior `v1.0.0`. Use a `str` instead!",
"`FeatureType` was deprecated and will be removed prior `v1.0.0`. Use a `str` instead!",
DeprecationWarning,
)

Expand Down
2 changes: 1 addition & 1 deletion src/gpsea/model/_test_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_iteration(self):

assert len(labels) == len(gts) == len(genotypes)

assert all(l.label in ('A', 'C', 'D') for l in labels)
assert all(sample_labels.label in ('A', 'C', 'D') for sample_labels in labels)
assert all(
gt in (Genotype.HETEROZYGOUS, Genotype.HEMIZYGOUS, Genotype.HOMOZYGOUS_REFERENCE)
for gt in gts
Expand Down
2 changes: 1 addition & 1 deletion src/gpsea/preprocessing/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def _configure_imprecise_sv_annotator(
):
# Setup cache for SVs
if cache_dir is not None:
sv_cache_dir = os.path.join(cache_dir, "sv_cache")
_sv_cache_dir = os.path.join(cache_dir, "sv_cache")
# TODO: implement the cache.
# os.makedirs(sv_cache_dir, exist_ok=True)
# var_cache = VariantAnnotationCache(sv_cache_dir)
Expand Down
2 changes: 1 addition & 1 deletion src/gpsea/preprocessing/_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def annotate(self, item: ImpreciseSvInfo) -> typing.Sequence[TranscriptAnnotatio

def _map_to_variant_effects(
self,
variant_class: str,
variant_class: VariantClass,
) -> typing.Sequence[VariantEffect]:
if variant_class == VariantClass.DEL:
return (VariantEffect.TRANSCRIPT_ABLATION,)
Expand Down
2 changes: 1 addition & 1 deletion src/gpsea/preprocessing/_vep.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def format_coordinates_for_vep_query(vc: VariantCoordinates) -> str:
# TODO: Verify <INS> are working correctly
else:
if len(vc.ref) == 0 or len(vc.alt) == 0:
raise ValueError(f'Trimmed alleles are not yet supported!')
raise ValueError('Trimmed alleles are not yet supported!')
if len(vc.ref) == 1 and len(vc.alt) != 1:
# INS/DUP
start = start + 1 # we must "trim"
Expand Down
10 changes: 5 additions & 5 deletions src/gpsea/view/_draw_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def _calc_aa_based_pos(pos_bases, tx_coordinates):
:param exons: exon positions
"""
print(f'{pos_bases=}')
exons, cds_start, cds_end = tx_coordinates.exons, tx_coordinates.cds_start, tx_coordinates.cds_end
exons, _cds_start, _cds_end = tx_coordinates.exons, tx_coordinates.cds_start, tx_coordinates.cds_end

num_nt = 0

Expand Down Expand Up @@ -325,7 +325,7 @@ def draw_fig(self, tx_coordinates: TranscriptCoordinates, protein_meta: ProteinM
# get minimum position on chromosome for all transcripts
min_exon_limit = np.min(exon_limits)
feature_limits = np.array([(feature.info.start, feature.info.end) for feature in protein_meta.protein_features])
feature_types = [pf.feature_type for pf in protein_meta.protein_features]
_feature_types = [pf.feature_type for pf in protein_meta.protein_features]
feature_limits = (feature_limits * 3) - 2 + min_exon_limit # to convert from codons to bases
variant_locations = list()
for ann in tx_anns:
Expand All @@ -334,11 +334,11 @@ def draw_fig(self, tx_coordinates: TranscriptCoordinates, protein_meta: ProteinM
if prot_eff_loc is not None:
variant_locations.append([prot_eff_loc.start, prot_eff_loc.end])
variant_locations = np.array(variant_locations)
variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns])
_variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns])
exon_labels = [f'{i + 1}' for i in range(len(exon_limits))]

protein_track_x_min, protein_track_x_max = 0.15, 0.85
protein_track_y_min, protein_track_y_max = 0.492, 0.508
protein_track_y_min, _protein_track_y_max = 0.492, 0.508
exon_y_min, exon_y_max = 0.39, 0.43
font_size = 12
text_padding = 0.004
Expand All @@ -364,7 +364,7 @@ def preprocess(x_absolute):
# x_axis
x_axis_y = protein_track_y_min - 0.02
x_axis_min_x, x_axis_max_x = protein_track_x_min, protein_track_x_max
big_tick_length, small_tick_length = 0.01, 0.005
big_tick_length, _small_tick_length = 0.01, 0.005
draw_line(x_axis_min_x, x_axis_y, x_axis_max_x, x_axis_y, line_color=self.axis_color,
line_width=1.0) # main line
draw_line(x_axis_min_x, x_axis_y - big_tick_length, x_axis_min_x, x_axis_y, line_color=self.axis_color,
Expand Down
48 changes: 31 additions & 17 deletions src/gpsea/view/_protein_visualizable.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import typing

from gpsea.model import *
from gpsea.model import (
Cohort,
ProteinMetadata,
TranscriptAnnotation,
TranscriptCoordinates,
Variant,
VariantEffect,
)
import numpy as np

from gpsea.model.genome._genome import Region

class ProteinVisualizable:

class ProteinVisualizable:
def __init__(
self,
tx_coordinates: TranscriptCoordinates,
protein_meta: ProteinMetadata,
cohort: Cohort,
self,
tx_coordinates: TranscriptCoordinates,
protein_meta: ProteinMetadata,
cohort: Cohort,
) -> None:
self._tx_coordinates = tx_coordinates
self._protein_meta = protein_meta
Expand All @@ -19,15 +27,15 @@ def __init__(
transcript_annotations = ProteinVisualizable._get_tx_anns(
cohort.all_variants(), self._tx_coordinates.identifier
)
self._variant_regions_on_protein = list()
variant_regions_on_protein: typing.List[Region] = list()
self._variant_effect = list()
for tx_ann in transcript_annotations:
variant_effects = tx_ann.variant_effects
if len(variant_effects) == 0:
continue
prot_eff_loc = tx_ann.protein_effect_location
if prot_eff_loc is not None:
self._variant_regions_on_protein.append(prot_eff_loc)
variant_regions_on_protein.append(prot_eff_loc)
self._variant_effect.append(variant_effects[0])

self._protein_feature_names = list()
Expand All @@ -40,13 +48,17 @@ def __init__(
self._protein_feature_starts.append(feature.info.start)
self._protein_feature_ends.append(feature.info.end)

self._variant_locations = np.array([item.start for item in self._variant_regions_on_protein])
self._variant_locations = np.array(
[item.start for item in variant_regions_on_protein]
)

#variant_locations = (variant_locations * 3) - 2 + min_exon_limit # to convert from codons to bases
#variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns])
# variant_locations = (variant_locations * 3) - 2 + min_exon_limit # to convert from codons to bases
# variant_effects = np.array([(ann.variant_effects[0]) for ann in tx_anns])
# count marker occurrences and remove duplicates
self._variant_locations_counted_absolute, self._marker_counts = np.unique(
self._variant_locations, axis=0, return_counts=True,
self._variant_locations,
axis=0,
return_counts=True,
)

if protein_meta.protein_length > 0:
Expand All @@ -59,8 +71,8 @@ def __init__(

@staticmethod
def _get_tx_anns(
variants: typing.Iterable[Variant],
tx_id: str,
variants: typing.Iterable[Variant],
tx_id: str,
) -> typing.Sequence[TranscriptAnnotation]:
"""
By default, the API returns transcript annotations for many transcripts.
Expand All @@ -74,7 +86,9 @@ def _get_tx_anns(
tx_ann = ann
break
if tx_ann is None:
raise ValueError(f'The transcript annotation for {tx_id} was not found!')
raise ValueError(
f"The transcript annotation for {tx_id} was not found!"
)
else:
tx_anns.append(tx_ann)

Expand Down Expand Up @@ -103,7 +117,7 @@ def protein_feature_starts(self) -> typing.Sequence[int]:
@property
def protein_feature_ends(self) -> typing.Sequence[int]:
return self._protein_feature_ends

@property
def protein_feature_types(self) -> typing.Sequence[str]:
return self._protein_feature_types
Expand All @@ -129,7 +143,7 @@ def protein_length(self) -> int:
@property
def protein_feature_names(self) -> typing.Sequence[str]:
return self._protein_feature_names

@property
def variant_effects(self) -> typing.Sequence[VariantEffect]:
return self._variant_effect
Expand Down
16 changes: 7 additions & 9 deletions src/gpsea/view/_txp.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
from collections import defaultdict
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from matplotlib.lines import Line2D
import typing
from gpsea.model import Variant, TranscriptCoordinates, ProteinMetadata


Expand All @@ -30,15 +28,15 @@ def draw_variants(self, variants: typing.Iterable[Variant],
tx: TranscriptCoordinates,
protein: ProteinMetadata):
title = f"{protein.protein_id} ({protein.label})"
fig, ax = plt.subplots(1, figsize=(10, 10))
_, ax = plt.subplots(1, figsize=(10, 10))
protein_domains = set()
THRESHOLD = 2
_THRESHOLD = 2
BOTTOM_MARGIN = 20
amino_acid_len = tx.get_codon_count()
# draw a box that is ten aax tall, where aax is the dimension of one amino acid
prot_start = get_interpolated_location_in_protein(1, amino_acid_len)
prot_end = get_interpolated_location_in_protein(amino_acid_len, amino_acid_len)
box_height = 10/amino_acid_len
_box_height = 10/amino_acid_len
prot_width = prot_end - prot_start + 1
protein_height = prot_width/20
#rect = Rectangle((prot_start, BOTTOM_MARGIN), prot_width, protein_height)
Expand All @@ -59,11 +57,11 @@ def draw_variants(self, variants: typing.Iterable[Variant],
hgvs_cdna = hgvs
variant_effects = tx_annot.variant_effects
if len(variant_effects) > 1:
var_effect = "MULTIPLE"
_var_effect = "MULTIPLE"
elif len(variant_effects) == 0:
var_effect = "UNKNOWN"
_var_effect = "UNKNOWN"
else:
var_effect = variant_effects[0].name
_var_effect = variant_effects[0].name
for p in tx_annot.protein_affected:
for f in p.domains():
protein_domains.add(f.info)
Expand Down Expand Up @@ -92,7 +90,7 @@ def draw_variants(self, variants: typing.Iterable[Variant],
start = feature.start
end = feature.end
#print(name, start, end, box_color)
box_height = 10/amino_acid_len
_box_height = 10/amino_acid_len
prot_width = prot_end - prot_start + 1
protein_height = prot_width/20
#rect = Rectangle((prot_start, BOTTOM_MARGIN), prot_width, protein_height)
Expand Down
2 changes: 1 addition & 1 deletion tests/analysis/pscore/test_de_vries_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest

from gpsea.analysis.pscore import DeVriesPhenotypeScorer
from gpsea.model import Patient, SampleLabels, Phenotype, Sex
from gpsea.model import Patient, Phenotype

intrauterine_growth_retardation = 'HP:0001511'
small_for_gestational_age = 'HP:0001518'
Expand Down

0 comments on commit 6d4f65a

Please sign in to comment.