From 57ca4d3c5d490cc0a3140b476b288543967f9255 Mon Sep 17 00:00:00 2001 From: Michael Franklin <22381693+illusional@users.noreply.github.com> Date: Mon, 18 Sep 2023 12:01:39 +1000 Subject: [PATCH 1/3] Address mypy errors (#546) * Address mypy errors * More linting updates * Two tests to fix checks * Rejig lint to build package to mypy scripts * Add strawberry[debug-server] requirement * Revert to the OpenApiGenNoneType for tests * Add extra import --------- Co-authored-by: Michael Franklin --- .github/workflows/lint.yaml | 38 +++- .pre-commit-config.yaml | 174 ++++++++++--------- api/routes/analysis.py | 86 +++++---- db/python/connect.py | 4 +- db/python/enum_tables/enums.py | 10 +- db/python/layers/seqr.py | 14 +- db/python/layers/sequencing_group.py | 13 +- db/python/layers/web.py | 15 +- db/python/tables/analysis.py | 12 +- db/python/tables/project.py | 15 +- db/python/tables/sequencing_group.py | 22 +-- etl/endpoint/main.py | 6 +- metamist/parser/generic_parser.py | 42 ++--- models/base.py | 2 +- models/models/__init__.py | 41 ++--- models/models/analysis.py | 2 +- models/models/assay.py | 19 +- models/models/participant.py | 22 +-- models/models/sample.py | 26 ++- models/models/sequencing_group.py | 24 +-- mypy.ini | 7 + regenerate_api.py | 4 +- requirements-dev.txt | 2 + requirements.txt | 2 +- scripts/20230420_sequencinggroupmigration.py | 10 +- scripts/create_test_subset.py | 26 ++- scripts/parse_ont_sheet.py | 18 +- scripts/parse_ped.py | 10 +- scripts/sync_seqr.py | 27 +-- test/test_analysis.py | 17 +- test/test_assay.py | 30 +++- test/test_generic_auditor.py | 64 ++++--- test/test_generic_filters.py | 6 - test/test_graphql.py | 46 +++-- test/test_import_individual_metadata.py | 20 ++- test/test_parse_generic_metadata.py | 38 ++-- test/test_parse_ont_processor.py | 7 +- test/test_parse_ont_sheet.py | 11 +- test/test_pedigree.py | 11 +- test/test_sample.py | 7 +- test/test_search.py | 98 +++++++---- test/test_web.py | 72 ++++---- web/src/pages/project/ProjectGrid.tsx | 2 +- 43 files changed, 605 insertions(+), 517 deletions(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index d4863658a..68f65dc86 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -4,22 +4,52 @@ on: push jobs: lint: runs-on: ubuntu-latest + env: + DOCKER_BUILDKIT: 1 + BUILDKIT_PROGRESS: plain + CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + # used for generating API + SM_DOCKER: samplemetadata:dev defaults: run: shell: bash -eo pipefail -l {0} - steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@main - uses: actions/setup-python@v2 with: python-version: "3.10" - cache: "pip" - - name: Install packages + - uses: actions/setup-java@v2 + with: + distribution: "temurin" # See 'Supported distributions' for available options + java-version: "17" + + - name: Setup build env + run: | + set -euxo pipefail + + pip install -r requirements-dev.txt + pip install -r requirements.txt + + # openapi-generator + wget https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/5.3.0/openapi-generator-cli-5.3.0.jar -O openapi-generator-cli.jar + + - name: "build image" + run: | + docker build \ + --build-arg SM_ENVIRONMENT=local \ + --tag $SM_DOCKER \ + -f deploy/api/Dockerfile \ + . + + - name: Build + install packages run: | + export OPENAPI_COMMAND="java -jar openapi-generator-cli.jar" + python regenerate_api.py pip install -r requirements-dev.txt pip install . + mkdir .mypy_cache - name: pre-commit run: pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 155d7cda1..c4d4ec406 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,92 +1,98 @@ repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: check-yaml - exclude: '\.*conda/.*' - - id: end-of-file-fixer - - id: trailing-whitespace - exclude: '\.txt$|\.tsv$' - - id: check-case-conflict - - id: check-merge-conflict - - id: detect-private-key - - id: debug-statements - - id: check-added-large-files + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-yaml + exclude: '\.*conda/.*' + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: '\.txt$|\.tsv$' + - id: check-case-conflict + - id: check-merge-conflict + - id: detect-private-key + - id: debug-statements + - id: check-added-large-files - - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.33.0 - hooks: - - id: markdownlint - args: ["--config", ".markdownlint.json"] + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.33.0 + hooks: + - id: markdownlint + args: ["--config", ".markdownlint.json"] - - repo: https://github.com/ambv/black - rev: 23.3.0 - hooks: - - id: black - args: [.] - pass_filenames: false - always_run: true - exclude: ^metamist/ + - repo: https://github.com/ambv/black + rev: 23.3.0 + hooks: + - id: black + args: [.] + pass_filenames: false + always_run: true + exclude: ^metamist/ - - repo: https://github.com/PyCQA/flake8 - rev: "6.0.0" - hooks: - - id: flake8 - additional_dependencies: [flake8-bugbear, flake8-quotes] + - repo: https://github.com/PyCQA/flake8 + rev: "6.0.0" + hooks: + - id: flake8 + additional_dependencies: [flake8-bugbear, flake8-quotes] - # Using system installation of pylint to support checking python module imports - - repo: local - hooks: - - id: pylint - name: pylint - entry: pylint - language: system - types: [python] + # Using system installation of pylint to support checking python module imports + - repo: local + hooks: + - id: pylint + name: pylint + entry: pylint + language: system + types: [python] - # mypy - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.961 - hooks: - - id: mypy - args: - [ - --pretty, - --show-error-codes, - --no-strict-optional, - --ignore-missing-imports, - --install-types, - --non-interactive, - ] - additional_dependencies: - - strawberry-graphql[fastapi]==0.138.1 + # mypy + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.5.1 + hooks: + - id: mypy + args: + [ + --pretty, + --show-error-codes, + --no-strict-optional, + --ignore-missing-imports, + --install-types, + --non-interactive, + --show-error-context, + --check-untyped-defs, + --explicit-package-bases, + --disable-error-code, + operator, + ] + additional_dependencies: + - strawberry-graphql[fastapi]==0.206.0 + - types-PyMySQL==1.1.0.1 - - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.0.0-alpha.4" - hooks: - - id: prettier - # I'm not exactly sure why it changes behaviour, but - # calling `cd web`, then calling `ls src/**/*.tsx` - # returns different results to `cd web && ls src/**/*.tsx` - # so just include both patterns here - entry: bash -c 'cd web && prettier --write --ignore-unknown --check src/*.{ts,tsx,css} src/**/*.{ts,tsx,css}' + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.0.0-alpha.4" + hooks: + - id: prettier + # I'm not exactly sure why it changes behaviour, but + # calling `cd web`, then calling `ls src/**/*.tsx` + # returns different results to `cd web && ls src/**/*.tsx` + # so just include both patterns here + entry: bash -c 'cd web && prettier --write --ignore-unknown --check src/*.{ts,tsx,css} src/**/*.{ts,tsx,css}' - - repo: https://github.com/pre-commit/mirrors-eslint - rev: "v8.33.0" - hooks: - - id: eslint - entry: bash -c 'cd web && eslint' - files: \.[jt]sx?$ - types: [file] - additional_dependencies: - - eslint@^7.32.0 - - eslint-config-airbnb@^19.0.4 - - eslint-config-airbnb-base@^15.0.0 - - eslint-config-airbnb-typescript@^17.0.0 - - eslint-config-prettier@^8.6.0 - - eslint-plugin-import@^2.26.0 - - eslint-plugin-jsx-a11y@^6.6.1 - - eslint-plugin-prettier@^4.2.1 - - eslint-plugin-react@^7.31.11 - - eslint-plugin-react-hooks@^4.6.0 - - "@typescript-eslint/eslint-plugin@^5.48.0" - - "@typescript-eslint/parser@^5.48.0" + - repo: https://github.com/pre-commit/mirrors-eslint + rev: "v8.33.0" + hooks: + - id: eslint + entry: bash -c 'cd web && eslint' + files: \.[jt]sx?$ + types: [file] + additional_dependencies: + - eslint@^7.32.0 + - eslint-config-airbnb@^19.0.4 + - eslint-config-airbnb-base@^15.0.0 + - eslint-config-airbnb-typescript@^17.0.0 + - eslint-config-prettier@^8.6.0 + - eslint-plugin-import@^2.26.0 + - eslint-plugin-jsx-a11y@^6.6.1 + - eslint-plugin-prettier@^4.2.1 + - eslint-plugin-react@^7.31.11 + - eslint-plugin-react-hooks@^4.6.0 + - "@typescript-eslint/eslint-plugin@^5.48.0" + - "@typescript-eslint/parser@^5.48.0" diff --git a/api/routes/analysis.py b/api/routes/analysis.py index 0af9ade7b..1a0057dcc 100644 --- a/api/routes/analysis.py +++ b/api/routes/analysis.py @@ -8,12 +8,11 @@ from pydantic import BaseModel from starlette.responses import StreamingResponse -from api.utils.dates import parse_date_only_string from api.utils.db import ( - get_projectless_db_connection, + Connection, get_project_readonly_connection, get_project_write_connection, - Connection, + get_projectless_db_connection, ) from api.utils.export import ExportType from db.python.layers.analysis import AnalysisLayer @@ -22,20 +21,17 @@ from db.python.utils import GenericFilter from models.enums import AnalysisStatus from models.models.analysis import ( + Analysis, AnalysisInternal, ProjectSizeModel, - SequencingGroupSizeModel, - DateSizeModel, - Analysis, ) from models.utils.sample_id_format import ( sample_id_transform_to_raw_list, - sample_id_format, ) from models.utils.sequencing_group_id_format import ( + sequencing_group_id_format, sequencing_group_id_format_list, sequencing_group_id_transform_to_raw_list, - sequencing_group_id_format, ) router = APIRouter(prefix='/analysis', tags=['analysis']) @@ -326,40 +322,42 @@ async def get_sequencing_group_file_sizes( """ Get the per sample file size by type over the given projects and date range """ - atable = AnalysisLayer(connection) - - # Check access to projects - project_ids = None - pt = ProjectPermissionsTable(connection=connection.connection) - project_ids = await pt.get_project_ids_from_names_and_user( - connection.author, project_names, readonly=True - ) - - # Map from internal pids to project name - prj_name_map = dict(zip(project_ids, project_names)) - - # Convert dates - start = parse_date_only_string(start_date) - end = parse_date_only_string(end_date) - - # Get results with internal ids as keys - results = await atable.get_sequencing_group_file_sizes( - project_ids=project_ids, start_date=start, end_date=end - ) - - # Convert to the correct output type, converting internal ids to external - fixed_pids: list[Any] = [ - ProjectSizeModel( - project=prj_name_map[project_data['project']], - samples=[ - SequencingGroupSizeModel( - sample=sample_id_format(s['sample']), - dates=[DateSizeModel(**d) for d in s['dates']], - ) - for s in project_data['samples'] - ], - ) - for project_data in results - ] - return fixed_pids + raise NotImplementedError('This route is broken, and not properly implemented yet') + # atable = AnalysisLayer(connection) + + # # Check access to projects + # project_ids = None + # pt = ProjectPermissionsTable(connection=connection.connection) + # project_ids = await pt.get_project_ids_from_names_and_user( + # connection.author, project_names, readonly=True + # ) + + # # Map from internal pids to project name + # prj_name_map = dict(zip(project_ids, project_names)) + + # # Convert dates + # start = parse_date_only_string(start_date) + # end = parse_date_only_string(end_date) + + # # Get results with internal ids as keys + # results = await atable.get_sequencing_group_file_sizes( + # project_ids=project_ids, start_date=start, end_date=end + # ) + + # # Convert to the correct output type, converting internal ids to external + # fixed_pids: list[Any] = [ + # ProjectSizeModel( + # project=prj_name_map[project_data['project']], + # samples=[ + # SequencingGroupSizeModel( + # sample=sample_id_format(s['sample']), + # dates=[DateSizeModel(**d) for d in s['dates']], + # ) + # for s in project_data['samples'] + # ], + # ) + # for project_data in results + # ] + + # return fixed_pids diff --git a/db/python/connect.py b/db/python/connect.py index 0b9acf57a..8a5a7811e 100644 --- a/db/python/connect.py +++ b/db/python/connect.py @@ -121,7 +121,9 @@ def get_connection_string(self): if self.port: _host += f':{self.port}' - options = {} # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size} + options: dict[ + str, str | int + ] = {} # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size} _options = '&'.join(f'{k}={v}' for k, v in options.items()) url = f'mysql://{u_p}@{_host}/{self.dbname}?{_options}' diff --git a/db/python/enum_tables/enums.py b/db/python/enum_tables/enums.py index e6419b98b..113daf3d6 100644 --- a/db/python/enum_tables/enums.py +++ b/db/python/enum_tables/enums.py @@ -1,6 +1,7 @@ -import re import abc +import re from functools import lru_cache + from async_lru import alru_cache from db.python.connect import DbBase @@ -36,7 +37,8 @@ def _get_table_name(cls): matcher = table_name_matcher.match(tn) if not matcher: raise ValueError( - f'The tablename {tn} is not valid (must match {table_name_matcher.pattern})' + f'The tablename {tn} is not valid (must match ' + f'{table_name_matcher.pattern})' ) return tn @@ -47,9 +49,9 @@ async def get(self) -> list[str]: """ _query = f'SELECT DISTINCT name FROM {self._get_table_name()}' rows = await self.connection.fetch_all(_query) - rows = [r['name'] for r in rows] + nrows = [r['name'] for r in rows] - return rows + return nrows async def insert(self, value: str): """ diff --git a/db/python/layers/seqr.py b/db/python/layers/seqr.py index 8c203979a..32ef5f5e2 100644 --- a/db/python/layers/seqr.py +++ b/db/python/layers/seqr.py @@ -1,8 +1,8 @@ # pylint: disable=unnecessary-lambda-assignment,too-many-locals,broad-exception-caught +import asyncio import os import re -import asyncio import traceback from collections import defaultdict from datetime import datetime @@ -15,13 +15,14 @@ from cpg_utils.cloud import get_google_identity_token from api.settings import ( - SEQR_URL, SEQR_AUDIENCE, SEQR_MAP_LOCATION, SEQR_SLACK_NOTIFICATION_CHANNEL, + SEQR_URL, get_slack_token, ) from db.python.connect import Connection +from db.python.enum_tables import SequencingTypeTable from db.python.layers.analysis import AnalysisLayer from db.python.layers.base import BaseLayer from db.python.layers.family import FamilyLayer @@ -29,15 +30,14 @@ from db.python.layers.sequencing_group import SequencingGroupLayer from db.python.tables.analysis import AnalysisFilter from db.python.tables.project import ProjectPermissionsTable -from db.python.enum_tables import SequencingTypeTable -from db.python.utils import ProjectId, GenericFilter +from db.python.utils import GenericFilter, ProjectId from models.enums import AnalysisStatus # literally the most temporary thing ever, but for complete # automation need to have sample inclusion / exclusion from models.utils.sequencing_group_id_format import ( - sequencing_group_id_format_list, sequencing_group_id_format, + sequencing_group_id_format_list, ) SEQUENCING_GROUPS_TO_IGNORE = {22735, 22739} @@ -421,9 +421,9 @@ async def update_es_index( ) if len(es_index_analyses) == 0: - return [f'No ES index to synchronise'] + return ['No ES index to synchronise'] - with AnyPath(fn_path).open('w+') as f: + with AnyPath(fn_path).open('w+') as f: # type: ignore f.write('\n'.join(rows_to_write)) es_index = es_index_analyses[-1].output diff --git a/db/python/layers/sequencing_group.py b/db/python/layers/sequencing_group.py index 6d6a02a55..5ff5b133b 100644 --- a/db/python/layers/sequencing_group.py +++ b/db/python/layers/sequencing_group.py @@ -6,13 +6,13 @@ from db.python.tables.assay import AssayTable, NoOpAenter from db.python.tables.sample import SampleTable from db.python.tables.sequencing_group import ( - SequencingGroupTable, SequencingGroupFilter, + SequencingGroupTable, ) from db.python.utils import ProjectId from models.models.sequencing_group import ( - SequencingGroupUpsertInternal, SequencingGroupInternal, + SequencingGroupUpsertInternal, ) from models.utils.sequencing_group_id_format import sequencing_group_id_format @@ -133,7 +133,7 @@ async def get_participant_ids_sequencing_group_ids_for_sequencing_type( ( projects, pids, - ) = await self.seqgt.get_participant_ids_and_sequence_group_ids_for_sequencing_type( + ) = await self.seqgt.get_participant_ids_and_sequencing_group_ids_for_sequencing_type( sequencing_type ) if not pids: @@ -209,7 +209,7 @@ async def create_sequencing_group_from_assays( type_=next(iter(sequencing_types)), technology=next(iter(sequencing_technologies)), platform=next(iter(sequencing_platforms)), - sequence_ids=assay_ids, + assay_ids=assay_ids, meta=meta, ) return SequencingGroupInternal( @@ -217,7 +217,6 @@ async def create_sequencing_group_from_assays( type=next(iter(sequencing_types)), technology=next(iter(sequencing_technologies)), platform=next(iter(sequencing_platforms)), - sequence_ids=assay_ids, sample_id=next(iter(sample_ids)), meta=meta, assays=assays, @@ -249,7 +248,7 @@ async def recreate_sequencing_group_with_new_assays( technology=seqgroup.technology, platform=seqgroup.platform, meta={**seqgroup.meta, **meta}, - sequence_ids=assays, + assay_ids=assays, author=self.author, open_transaction=False, ) @@ -324,7 +323,7 @@ async def upsert_sequencing_groups( technology=sg.technology, platform=sg.platform, meta=sg.meta, - sequence_ids=assay_ids, + assay_ids=assay_ids, open_transaction=False, ) diff --git a/db/python/layers/web.py b/db/python/layers/web.py index 6fffe2e20..95979457e 100644 --- a/db/python/layers/web.py +++ b/db/python/layers/web.py @@ -16,12 +16,12 @@ from db.python.tables.project import ProjectPermissionsTable from db.python.tables.sequencing_group import SequencingGroupTable from models.models import ( + AssayInternal, + FamilySimpleInternal, NestedParticipantInternal, NestedSampleInternal, NestedSequencingGroupInternal, - AssayInternal, SearchItem, - FamilySimpleInternal, ) from models.models.web import ProjectSummaryInternal, WebProject @@ -82,7 +82,7 @@ def _project_summary_sample_query(self, grid_filter: list[SearchItem]): # the query to determine the total count, then take the selection of samples # for the current page. This is more efficient than doing 2 queries separately. sample_query = f""" - SELECT s.id, s.external_id, s.type, s.meta, s.participant_id + SELECT s.id, s.external_id, s.type, s.meta, s.participant_id, s.active FROM sample s LEFT JOIN assay a ON s.id = a.sample_id LEFT JOIN participant p ON p.id = s.participant_id @@ -189,6 +189,7 @@ def _project_summary_process_sample_rows( created_date=str(sample_id_start_times.get(s['id'], '')), sequencing_groups=sg_models_by_sample_id.get(s['id'], []), non_sequencing_assays=filtered_assay_models_by_sid.get(s['id'], []), + active=bool(ord(s['active'])), ) for s in sample_rows ] @@ -402,8 +403,8 @@ async def get_project_summary( sg_models_by_sample_id=seq_group_models_by_sample_id, sample_id_start_times=sample_id_start_times, ) - # the pydantic model is casting to the id to a str, as that makes sense on the front end - # but cast back here to do the lookup + # the pydantic model is casting to the id to a str, as that makes sense on + # the front end but cast back here to do the lookup sid_to_pid = {s['id']: s['participant_id'] for s in sample_rows} smodels_by_pid = group_by(smodels, lambda s: sid_to_pid[int(s.id)]) @@ -429,7 +430,7 @@ async def get_project_summary( reported_sex=None, reported_gender=None, karyotype=None, - project=self.project, + # project=self.project, ) ) elif pid not in pid_seen: @@ -445,7 +446,7 @@ async def get_project_summary( reported_sex=p['reported_sex'], reported_gender=p['reported_gender'], karyotype=p['karyotype'], - project=self.project, + # project=self.project, ) ) diff --git a/db/python/tables/analysis.py b/db/python/tables/analysis.py index d6d2fe41d..a1d4a4c82 100644 --- a/db/python/tables/analysis.py +++ b/db/python/tables/analysis.py @@ -2,15 +2,15 @@ import dataclasses from collections import defaultdict from datetime import datetime -from typing import List, Optional, Set, Tuple, Dict, Any +from typing import Any, Dict, List, Optional, Set, Tuple from db.python.connect import DbBase, NotFoundError from db.python.tables.project import ProjectId from db.python.utils import ( - to_db_json, - GenericFilterModel, GenericFilter, + GenericFilterModel, GenericMetaFilter, + to_db_json, ) from models.enums import AnalysisStatus from models.models.analysis import AnalysisInternal @@ -285,7 +285,7 @@ async def get_incomplete_analyses( """ Gets details of analysis with status queued or in-progress """ - _query = f""" + _query = """ SELECT a.id as id, a.type as type, a.status as status, a.output as output, a_sg.sequencing_group_id as sequencing_group_id, a.project as project, a.meta as meta @@ -339,7 +339,7 @@ async def get_latest_complete_analysis_for_sequencing_group_ids_by_type( if row['sequencing_group_id'] in seen_sequencing_group_ids: continue seen_sequencing_group_ids.add(row['sequencing_group_id']) - analyses.append(AnalysisInternal.from_db(**row)) + analyses.append(AnalysisInternal.from_db(**dict(row))) # reverse after timestamp_completed return analyses[::-1] @@ -439,7 +439,7 @@ async def get_sample_cram_path_map_for_seqr( seq_check = 'IN :seq_types' values['seq_types'] = sequencing_types - filters.append(f'JSON_VALUE(a.meta, "$.sequencing_type") ' + seq_check) + filters.append('JSON_VALUE(a.meta, "$.sequencing_type") ' + seq_check) if participant_ids: filters.append('p.id IN :pids') diff --git a/db/python/tables/project.py b/db/python/tables/project.py index d7c002c28..ac01e18bb 100644 --- a/db/python/tables/project.py +++ b/db/python/tables/project.py @@ -1,22 +1,21 @@ # pylint: disable=global-statement import asyncio -from typing import Dict, List, Set, Iterable, Optional, Tuple, Any - import json from datetime import datetime, timedelta +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple +from cpg_utils.cloud import get_cached_group_members from databases import Database from google.cloud import secretmanager -from cpg_utils.cloud import get_cached_group_members from api.settings import MEMBERS_CACHE_LOCATION, is_all_access from db.python.utils import ( - ProjectId, Forbidden, + InternalError, NoProjectAccess, + ProjectId, get_logger, to_db_json, - InternalError, ) from models.models.project import Project @@ -440,9 +439,9 @@ async def get_seqr_projects(self) -> list[dict[str, Any]]: projects = [] for r in await self.connection.fetch_all(_query): - r = dict(r) - r['meta'] = json.loads(r['meta'] or '{}') - projects.append(r) + row = dict(r) + row['meta'] = json.loads(row['meta'] or '{}') + projects.append(row) return projects diff --git a/db/python/tables/sequencing_group.py b/db/python/tables/sequencing_group.py index a3a83e864..fefa36116 100644 --- a/db/python/tables/sequencing_group.py +++ b/db/python/tables/sequencing_group.py @@ -6,11 +6,11 @@ from db.python.connect import DbBase, NoOpAenter, NotFoundError from db.python.utils import ( - ProjectId, - to_db_json, - GenericFilterModel, GenericFilter, + GenericFilterModel, GenericMetaFilter, + ProjectId, + to_db_json, ) from models.models.sequencing_group import SequencingGroupInternal @@ -125,10 +125,10 @@ async def get_sequencing_groups_by_ids( f'Couldn\'t find sequencing groups with internal id {ids})' ) - rows = [SequencingGroupInternal.from_db(**dict(r)) for r in rows] - projects = set(r.project for r in rows) + sg_rows = [SequencingGroupInternal.from_db(**dict(r)) for r in rows] + projects = set(r.project for r in sg_rows) - return projects, rows + return projects, sg_rows async def get_assay_ids_by_sequencing_group_ids( self, ids: list[int] @@ -172,7 +172,7 @@ async def get_all_sequencing_group_ids_by_sample_ids_by_type( return sequencing_group_ids_by_sample_ids_by_type - async def get_participant_ids_and_sequence_group_ids_for_sequencing_type( + async def get_participant_ids_and_sequencing_group_ids_for_sequencing_type( self, sequencing_type: str ) -> tuple[set[ProjectId], dict[int, list[int]]]: """ @@ -252,7 +252,7 @@ async def create_sequencing_group( type_: str, technology: str, platform: str, - sequence_ids: list[int], + assay_ids: list[int], meta: dict = None, author: str = None, open_transaction=True, @@ -319,16 +319,16 @@ async def create_sequencing_group( _query, {**values, 'author': author or self.author}, ) - sequence_insert_values = [ + assay_id_insert_values = [ { 'seqgroup': id_of_seq_group, 'assayid': s, 'author': author or self.author, } - for s in sequence_ids + for s in assay_ids ] await self.connection.execute_many( - _seqg_linker_query, sequence_insert_values + _seqg_linker_query, assay_id_insert_values ) return id_of_seq_group diff --git a/etl/endpoint/main.py b/etl/endpoint/main.py index e56679bd0..7a53fca68 100644 --- a/etl/endpoint/main.py +++ b/etl/endpoint/main.py @@ -3,12 +3,12 @@ import logging import os import uuid -import functions_framework + import flask +import functions_framework import google.cloud.bigquery as bq -from google.cloud import pubsub_v1 - from cpg_utils.cloud import email_from_id_token +from google.cloud import pubsub_v1 # type: ignore BIGQUERY_TABLE = os.getenv('BIGQUERY_TABLE') PUBSUB_TOPIC = os.getenv('PUBSUB_TOPIC') diff --git a/metamist/parser/generic_parser.py b/metamist/parser/generic_parser.py index cfdf6926f..a31fea483 100644 --- a/metamist/parser/generic_parser.py +++ b/metamist/parser/generic_parser.py @@ -1,47 +1,45 @@ # pylint: disable=too-many-lines,too-many-instance-attributes,too-many-locals,unused-argument,assignment-from-none,invalid-name,ungrouped-imports -import json -import sys import asyncio import csv +import json import logging import os import re +import sys from abc import abstractmethod from collections import defaultdict +from functools import wraps from io import StringIO from typing import ( - List, + Any, + Coroutine, Dict, - Union, - Optional, - Tuple, + Hashable, + Iterable, + Iterator, + List, Match, - Any, + Optional, Sequence, - TypeVar, - Iterator, - Coroutine, Set, - Iterable, - Hashable, + Tuple, + TypeVar, + Union, ) -from functools import wraps from cloudpathlib import AnyPath -from metamist.graphql import query_async, gql -from metamist.parser.cloudhelper import CloudHelper, group_by - -from metamist.apis import SampleApi, AssayApi, AnalysisApi, ParticipantApi +from metamist.apis import AnalysisApi, AssayApi, ParticipantApi, SampleApi +from metamist.graphql import gql, query_async from metamist.models import ( Analysis, AnalysisStatus, + AssayUpsert, ParticipantUpsert, SampleUpsert, SequencingGroupUpsert, - AssayUpsert, ) - +from metamist.parser.cloudhelper import CloudHelper, group_by # https://mypy.readthedocs.io/en/stable/runtime_troubles.html#using-new-additions-to-the-typing-module if sys.version_info >= (3, 8): @@ -322,8 +320,8 @@ def __init__( def to_sm(self) -> AssayUpsert: """Convert to SM upsert model""" return AssayUpsert( - type=self.assay_type, id=self.internal_id, + type=self.assay_type, external_ids=self.external_ids, # sample_id=self.s, meta=self.meta, @@ -1080,7 +1078,9 @@ async def add_analyses(self, analyses_to_add, external_to_internal_id_map): for external_id, analysis in chunked_analysis: # TODO: resolve this external_to_internal_id_map # this one is going to be slightly harder : - analysis.sequence_group_ids = [external_to_internal_id_map[external_id]] + analysis.sequencing_group_ids = [ + external_to_internal_id_map[external_id] + ] promises.append( analysisapi.create_analysis_async( project=proj, analysis_model=analysis diff --git a/models/base.py b/models/base.py index 133671761..389c38a56 100644 --- a/models/base.py +++ b/models/base.py @@ -2,7 +2,7 @@ # annotate any external objects that must be instantiated with this # type to force openapi generator to allow for Nones (it will actually allow Any) -OpenApiGenNoneType = bytes +OpenApiGenNoneType = bytes | None class SMBase(BaseModel): diff --git a/models/models/__init__.py b/models/models/__init__.py index 24069f708..37f1068a9 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -1,60 +1,61 @@ from models.models.analysis import ( - AnalysisInternal, Analysis, + AnalysisInternal, DateSizeModel, - SequencingGroupSizeModel, ProjectSizeModel, + SequencingGroupSizeModel, ) from models.models.assay import ( - AssayInternal, - AssayUpsertInternal, Assay, + AssayInternal, AssayUpsert, + AssayUpsertInternal, ) from models.models.family import ( - FamilySimpleInternal, + Family, FamilyInternal, FamilySimple, - Family, + FamilySimpleInternal, PedRowInternal, ) from models.models.participant import ( - ParticipantInternal, + NestedParticipant, NestedParticipantInternal, - ParticipantUpsertInternal, Participant, - NestedParticipant, + ParticipantInternal, ParticipantUpsert, + ParticipantUpsertInternal, ) from models.models.project import Project from models.models.sample import ( - SampleInternal, + NestedSample, NestedSampleInternal, - SampleUpsertInternal, Sample, - NestedSample, + SampleInternal, SampleUpsert, + SampleUpsertInternal, ) from models.models.search import ( - SearchResponseData, + ErrorResponse, FamilySearchResponseData, ParticipantSearchResponseData, SampleSearchResponseData, - ErrorResponse, - SearchResponse, SearchItem, + SearchResponse, + SearchResponseData, + SequencingGroupSearchResponseData, ) from models.models.sequencing_group import ( - SequencingGroupInternal, + NestedSequencingGroup, NestedSequencingGroupInternal, - SequencingGroupUpsertInternal, SequencingGroup, - NestedSequencingGroup, + SequencingGroupInternal, SequencingGroupUpsert, + SequencingGroupUpsertInternal, ) from models.models.web import ( + PagingLinks, + ProjectSummary, ProjectSummaryInternal, WebProject, - ProjectSummary, - PagingLinks, ) diff --git a/models/models/analysis.py b/models/models/analysis.py index a88d6dd99..f35e95a48 100644 --- a/models/models/analysis.py +++ b/models/models/analysis.py @@ -15,7 +15,7 @@ class AnalysisInternal(SMBase): """Model for Analysis""" - id: int | None + id: int | None = None type: str status: AnalysisStatus output: str = None diff --git a/models/models/assay.py b/models/models/assay.py index 6e17a6ef7..04658ad44 100644 --- a/models/models/assay.py +++ b/models/models/assay.py @@ -1,11 +1,8 @@ import json from typing import Any -from models.base import SMBase, OpenApiGenNoneType -from models.utils.sample_id_format import ( - sample_id_format, - sample_id_transform_to_raw, -) +from models.base import OpenApiGenNoneType, SMBase +from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw class AssayInternal(SMBase): @@ -104,12 +101,12 @@ def to_internal(self): _sample_id = None if self.sample_id: # but may be provided directly when inserting directly - _sample_id = sample_id_transform_to_raw(self.sample_id) + _sample_id = sample_id_transform_to_raw(self.sample_id) # type: ignore return AssayUpsertInternal( - id=self.id, - type=self.type, - external_ids=self.external_ids, - sample_id=_sample_id, - meta=self.meta, + id=self.id, # type: ignore + type=self.type, # type: ignore + external_ids=self.external_ids, # type: ignore + sample_id=_sample_id, # type: ignore + meta=self.meta, # type: ignore ) diff --git a/models/models/participant.py b/models/models/participant.py index c72451274..ab843e343 100644 --- a/models/models/participant.py +++ b/models/models/participant.py @@ -1,14 +1,14 @@ import json from db.python.utils import ProjectId -from models.base import SMBase, OpenApiGenNoneType +from models.base import OpenApiGenNoneType, SMBase +from models.models.family import FamilySimple, FamilySimpleInternal from models.models.sample import ( - SampleUpsertInternal, - SampleUpsert, - NestedSampleInternal, NestedSample, + NestedSampleInternal, + SampleUpsert, + SampleUpsertInternal, ) -from models.models.family import FamilySimple, FamilySimpleInternal class ParticipantInternal(SMBase): @@ -135,12 +135,12 @@ class ParticipantUpsert(SMBase): def to_internal(self): """Convert to internal model, doesn't really do much""" p = ParticipantUpsertInternal( - id=self.id, - external_id=self.external_id, - reported_sex=self.reported_sex, - reported_gender=self.reported_gender, - karyotype=self.karyotype, - meta=self.meta, + id=self.id, # type: ignore + external_id=self.external_id, # type: ignore + reported_sex=self.reported_sex, # type: ignore + reported_gender=self.reported_gender, # type: ignore + karyotype=self.karyotype, # type: ignore + meta=self.meta, # type: ignore ) if self.samples: diff --git a/models/models/sample.py b/models/models/sample.py index 94fd3a901..a41c06463 100644 --- a/models/models/sample.py +++ b/models/models/sample.py @@ -1,17 +1,14 @@ import json -from models.base import SMBase, OpenApiGenNoneType -from models.models.assay import AssayUpsertInternal, AssayUpsert, AssayInternal, Assay +from models.base import OpenApiGenNoneType, SMBase +from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal from models.models.sequencing_group import ( - SequencingGroupUpsert, NestedSequencingGroup, - SequencingGroupUpsertInternal, NestedSequencingGroupInternal, + SequencingGroupUpsert, + SequencingGroupUpsertInternal, ) -from models.utils.sample_id_format import ( - sample_id_format, - sample_id_transform_to_raw, -) +from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw class SampleInternal(SMBase): @@ -143,6 +140,7 @@ def to_internal(self): type=self.type, participant_id=self.participant_id, active=self.active, + author='', ) @@ -180,12 +178,12 @@ def to_internal(self) -> SampleUpsertInternal: sample_upsert = SampleUpsertInternal( id=_id, - external_id=self.external_id, - meta=self.meta, - project=self.project, - type=self.type, - participant_id=self.participant_id, - active=self.active, + external_id=self.external_id, # type: ignore + meta=self.meta, # type: ignore + project=self.project, # type: ignore + type=self.type, # type: ignore + participant_id=self.participant_id, # type: ignore + active=self.active, # type: ignore ) if self.sequencing_groups: diff --git a/models/models/sequencing_group.py b/models/models/sequencing_group.py index 36e250c73..1b0bbd3f4 100644 --- a/models/models/sequencing_group.py +++ b/models/models/sequencing_group.py @@ -1,11 +1,11 @@ import json -from models.base import SMBase, OpenApiGenNoneType -from models.models.assay import AssayUpsert, AssayUpsertInternal, Assay, AssayInternal -from models.utils.sample_id_format import sample_id_transform_to_raw, sample_id_format +from models.base import OpenApiGenNoneType, SMBase +from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal +from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw from models.utils.sequencing_group_id_format import ( - sequencing_group_id_transform_to_raw, sequencing_group_id_format, + sequencing_group_id_transform_to_raw, ) @@ -60,6 +60,7 @@ def to_external(self): type=self.type, technology=self.technology, platform=self.platform, + external_ids=self.external_ids, meta=self.meta, sample_id=sample_id_format(self.sample_id), assays=[a.to_external() for a in self.assays or []], @@ -141,6 +142,7 @@ class SequencingGroup(SMBase): sample_id: str external_ids: dict[str, str] archived: bool + assays: list[Assay] class NestedSequencingGroup(SMBase): @@ -169,7 +171,7 @@ class SequencingGroupUpsert(SMBase): sample_id: str | OpenApiGenNoneType = None external_ids: dict[str, str] | OpenApiGenNoneType = None - assays: list[AssayUpsert] | None = None + assays: list[AssayUpsert] | OpenApiGenNoneType = None def to_internal(self) -> SequencingGroupUpsertInternal: """ @@ -185,15 +187,15 @@ def to_internal(self) -> SequencingGroupUpsertInternal: sg_internal = SequencingGroupUpsertInternal( id=_id, - type=self.type, - technology=self.technology, - platform=self.platform.lower() if self.platform else None, - meta=self.meta, + type=self.type, # type: ignore + technology=self.technology, # type: ignore + platform=self.platform.lower() if self.platform else None, # type: ignore + meta=self.meta, # type: ignore sample_id=_sample_id, - external_ids=self.external_ids or {}, + external_ids=self.external_ids or {}, # type: ignore ) if self.assays is not None: - sg_internal.assays = [a.to_internal() for a in self.assays] + sg_internal.assays = [a.to_internal() for a in self.assays] # type: ignore return sg_internal diff --git a/mypy.ini b/mypy.ini index 3403e0acb..418e757ed 100644 --- a/mypy.ini +++ b/mypy.ini @@ -4,10 +4,15 @@ python_version = 3.10 ; warn_return_any = True ; warn_unused_configs = True + exclude = (build|update_sample_status) + # Per-module options: plugins = strawberry.ext.mypy_plugin +[mypy.db] +disable_error_code = operator + [mypy-sample_metadata.*] ignore_errors = true @@ -44,3 +49,5 @@ ignore_missing_imports=True ignore_missing_imports = True [mypy-graphql] ignore_missing_imports = True +[mypy-strawberry] +ignore_errors = True diff --git a/regenerate_api.py b/regenerate_api.py index 07112163e..8e6a9fece 100755 --- a/regenerate_api.py +++ b/regenerate_api.py @@ -184,7 +184,7 @@ def generate_schema_file(): Generate schema file and place in the metamist/graphql/ directory """ command = ['strawberry', 'export-schema', 'api.graphql.schema:schema'] - schema = subprocess.check_output(command, stderr=subprocess.STDOUT).decode() + schema = subprocess.check_output(command).decode() with open(os.path.join(MODULE_DIR, 'graphql/schema.graphql'), 'w+') as f: f.write(schema) @@ -317,7 +317,7 @@ def main(): while (not check_if_server_is_accessible()) and startup_tries > 0: startup_tries -= 1 logger.info( - f'Dockerised API server is not ready yet. ' + 'Dockerised API server is not ready yet. ' + f'Retrying in {wait_time_in_seconds} seconds. ' + f'Remaining tries: {startup_tries}' ) diff --git a/requirements-dev.txt b/requirements-dev.txt index 1113f5753..0202733d1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,3 +9,5 @@ pre-commit pylint testcontainers[mariadb] types-PyMySQL +# some strawberry dependency +strawberry-graphql[debug-server]==0.206.0 diff --git a/requirements.txt b/requirements.txt index b5cc04e02..2affefbb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ google-cloud-logging==2.7.0 google-cloud-storage==1.43.0 uvicorn==0.18.3 fastapi[all]==0.85.1 -strawberry-graphql[fastapi]==0.177.1 +strawberry-graphql[fastapi]==0.206.0 python-multipart==0.0.5 databases[mysql]==0.6.1 SQLAlchemy==1.4.41 diff --git a/scripts/20230420_sequencinggroupmigration.py b/scripts/20230420_sequencinggroupmigration.py index a3d2a705a..3feeecdd5 100644 --- a/scripts/20230420_sequencinggroupmigration.py +++ b/scripts/20230420_sequencinggroupmigration.py @@ -27,7 +27,7 @@ import json from collections import defaultdict from textwrap import dedent -from typing import Any, List, Dict, Tuple +from typing import Any, Dict, List, Tuple import click from databases import Database @@ -338,7 +338,7 @@ async def migrate_analyses(connection: Database, dry_run: bool = True): ) analysis_samples = await connection.fetch_all(analyses_query) - sequence_group_ids_of_duplicate_samples_query = dedent( + sequencing_group_ids_of_duplicate_samples_query = dedent( """ SELECT sg.sample_id, sg.id, sg.type FROM sequencing_group sg @@ -352,13 +352,13 @@ async def migrate_analyses(connection: Database, dry_run: bool = True): ORDER BY sg.sample_id DESC; """ ) - sequence_group_ids_of_duplicate_samples = await connection.fetch_all( - sequence_group_ids_of_duplicate_samples_query + sequencing_group_ids_of_duplicate_samples = await connection.fetch_all( + sequencing_group_ids_of_duplicate_samples_query ) duplicate_sg_id_map: Dict[ SampleId, Dict[SequenceType, SequenceGroupId] ] = defaultdict(dict) - for row in sequence_group_ids_of_duplicate_samples: + for row in sequencing_group_ids_of_duplicate_samples: duplicate_sg_id_map[row['sample_id']][row['type']] = row['id'] values_to_insert: List[Tuple[int, SequenceGroupId]] = [] diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py index fd1e74381..c14f13d79 100755 --- a/scripts/create_test_subset.py +++ b/scripts/create_test_subset.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 -# pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument,too-many-arguments +# type: ignore +# pylint: skip-file + + +# # pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument,too-many-arguments """ Example Invocation @@ -11,7 +15,7 @@ This example will populate acute-care-test with the metamist data for 4 families. """ -from typing import Optional +import csv import logging import os import random @@ -19,25 +23,19 @@ import traceback import typing from collections import Counter -import csv +from typing import Optional import click from google.cloud import storage from metamist import exceptions -from metamist.apis import ( - AnalysisApi, - AssayApi, - SampleApi, - FamilyApi, - ParticipantApi, -) +from metamist.apis import AnalysisApi, AssayApi, FamilyApi, ParticipantApi, SampleApi from metamist.models import ( - BodyGetAssaysByCriteria, - AssayUpsert, - SampleUpsert, Analysis, AnalysisStatus, + AssayUpsert, + BodyGetAssaysByCriteria, + SampleUpsert, ) logger = logging.getLogger(__file__) @@ -313,7 +311,7 @@ def main( ) logger.info(f'Creating {a_type} analysis entry in test') aapi.create_analysis(project=target_project, analysis=am) - logger.info(f'-') + logger.info('-') def transfer_families( diff --git a/scripts/parse_ont_sheet.py b/scripts/parse_ont_sheet.py index 82998215d..a157e0ff6 100644 --- a/scripts/parse_ont_sheet.py +++ b/scripts/parse_ont_sheet.py @@ -1,15 +1,15 @@ #!/usr/bin/env python3 # pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument -from typing import List import logging +from typing import List import click -from metamist.parser.generic_parser import ParsedSample, ParsedSequencingGroup from metamist.parser.generic_metadata_parser import ( - run_as_sync, GenericMetadataParser, + run_as_sync, ) +from metamist.parser.generic_parser import ParsedSample, ParsedSequencingGroup logger = logging.getLogger(__file__) logger.addHandler(logging.StreamHandler()) @@ -97,14 +97,14 @@ def parse_fastqs_structure(fastqs) -> List[List[str]]: return [fastqs] async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup]: - sequence_groups = await super().group_assays(sample) + sequencing_groups = await super().group_assays(sample) - for sequence_group in sequence_groups: + for sequencing_group in sequencing_groups: failed_fastqs: list[str] = [] - for r in sequence_group.rows: + for r in sequencing_group.rows: parsed_failed_fastqs = await self.parse_files( - sequence_group.sample.external_sid, r[Columns.FAIL_FASTQ_FILENAME] + sequencing_group.sample.external_sid, r[Columns.FAIL_FASTQ_FILENAME] ) if 'reads' not in parsed_failed_fastqs: raise ValueError( @@ -120,9 +120,9 @@ async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup ) failed_fastqs.extend(parsed_failed_fastq_reads['fastq']) - sequence_group.meta['failed_reads'] = failed_fastqs + sequencing_group.meta['failed_reads'] = failed_fastqs - return sequence_groups + return sequencing_groups @click.command() diff --git a/scripts/parse_ped.py b/scripts/parse_ped.py index 894d0c241..f587e483b 100644 --- a/scripts/parse_ped.py +++ b/scripts/parse_ped.py @@ -1,12 +1,9 @@ """ A really simple script to import a pedigree file """ import click +from cloudpathlib import AnyPath -from cloudpathlib import CloudPath - -from metamist.apis import ( - FamilyApi, -) +from metamist.apis import FamilyApi @click.command() @@ -17,7 +14,8 @@ def main(ped_file_path: str, project: str): fapi = FamilyApi() - with CloudPath(ped_file_path).open() as ped_file: + # pylint: disable=no-member + with AnyPath(ped_file_path).open() as ped_file: # type: ignore fapi.import_pedigree( file=ped_file, has_header=True, diff --git a/scripts/sync_seqr.py b/scripts/sync_seqr.py index 67967e437..12be0fd93 100644 --- a/scripts/sync_seqr.py +++ b/scripts/sync_seqr.py @@ -1,27 +1,28 @@ # pylint: disable=missing-timeout,unnecessary-lambda-assignment,import-outside-toplevel,too-many-locals import asyncio -import os -import re -import json import datetime +import json import logging +import os +import re import traceback from collections import defaultdict -from typing import Any from io import StringIO +from typing import Any import aiohttp import yaml from cloudpathlib import AnyPath -from metamist.graphql import query_async -from metamist.model.analysis_status import AnalysisStatus -from metamist.model.export_type import ExportType -from metamist.model.analysis_query_model import AnalysisQueryModel + from metamist.apis import ( - SeqrApi, - ProjectApi, AnalysisApi, + ProjectApi, + SeqrApi, ) +from metamist.graphql import query_async +from metamist.model.analysis_query_model import AnalysisQueryModel +from metamist.model.analysis_status import AnalysisStatus +from metamist.model.export_type import ExportType from metamist.parser.generic_parser import chunk loggers_to_silence = [ @@ -410,7 +411,7 @@ async def update_es_index( fn_path = os.path.join(MAP_LOCATION, filename) # pylint: disable=no-member - with AnyPath(fn_path).open('w+') as f: + with AnyPath(fn_path).open('w+') as f: # type: ignore f.write('\n'.join(rows_to_write)) if check_metamist: # len(es_index_analyses) > 0: @@ -678,7 +679,9 @@ def sync_all_datasets(sequencing_type: str, ignore: set[str] = None): continue try: el.run_until_complete( - sync_dataset_async(project_name, seqr_guid, sequencing_type=sequencing_type) + sync_dataset_async( + project_name, seqr_guid, sequencing_type=sequencing_type + ) ) except Exception as e: # pylint: disable=broad-exception-caught print( diff --git a/test/test_analysis.py b/test/test_analysis.py index ec03f0702..ef7e1acac 100644 --- a/test/test_analysis.py +++ b/test/test_analysis.py @@ -1,21 +1,20 @@ # pylint: disable=invalid-overridden-method -from datetime import timedelta, datetime - +from datetime import datetime, timedelta from test.testbase import DbIsolatedTest, run_as_sync -from db.python.tables.analysis import AnalysisFilter -from db.python.utils import GenericFilter -from db.python.layers.assay import AssayLayer from db.python.layers.analysis import AnalysisLayer +from db.python.layers.assay import AssayLayer from db.python.layers.sample import SampleLayer from db.python.layers.sequencing_group import SequencingGroupLayer +from db.python.tables.analysis import AnalysisFilter +from db.python.utils import GenericFilter +from models.enums import AnalysisStatus from models.models import ( AnalysisInternal, AssayUpsertInternal, - SequencingGroupUpsertInternal, SampleUpsertInternal, + SequencingGroupUpsertInternal, ) -from models.enums import AnalysisStatus class TestAnalysis(DbIsolatedTest): @@ -132,7 +131,7 @@ async def test_get_analysis(self): AnalysisInternal( type='analysis-runner', status=AnalysisStatus.UNKNOWN, - sequence_group_ids=[], + sequencing_group_ids=[], meta={}, ) ) @@ -148,7 +147,7 @@ async def test_get_analysis(self): id=a_id, type='analysis-runner', status=AnalysisStatus.UNKNOWN, - sequence_group_ids=[], + sequencing_group_ids=[], output=None, timestamp_completed=None, project=1, diff --git a/test/test_assay.py b/test/test_assay.py index af641a278..cde5f242a 100644 --- a/test/test_assay.py +++ b/test/test_assay.py @@ -1,10 +1,11 @@ from test.testbase import DbIsolatedTest, run_as_sync + from pymysql.err import IntegrityError from db.python.connect import NotFoundError -from db.python.layers.sample import SampleLayer -from db.python.layers.assay import AssayLayer from db.python.enum_tables import AssayTypeTable +from db.python.layers.assay import AssayLayer +from db.python.layers.sample import SampleLayer from db.python.tables.assay import AssayFilter from db.python.utils import GenericFilter from models.models.assay import AssayUpsertInternal @@ -200,11 +201,20 @@ async def test_getting_assay_by_external_id(self): ) ) - fquery_1 = AssayFilter(external_id='SEQ01', project=self.project_id) + fquery_1 = AssayFilter( + external_id=GenericFilter(eq='SEQ01'), + project=GenericFilter(eq=self.project_id), + ) self.assertEqual(seq1.id, (await self.assaylayer.query(fquery_1))[0].id) - fquery_2 = AssayFilter(external_id='EXT_SEQ1', project=self.project_id) + fquery_2 = AssayFilter( + external_id=GenericFilter(eq='EXT_SEQ1'), + project=GenericFilter(eq=self.project_id), + ) self.assertEqual(seq1.id, (await self.assaylayer.query(fquery_2))[0].id) - fquery_3 = AssayFilter(external_id='SEQ02', project=self.project_id) + fquery_3 = AssayFilter( + external_id=GenericFilter(eq='SEQ02'), + project=GenericFilter(eq=self.project_id), + ) self.assertEqual(seq2.id, (await self.assaylayer.query(fquery_3))[0].id) @run_as_sync @@ -285,20 +295,22 @@ async def search_result_to_ids(filter_: AssayFilter): ) self.assertSetEqual( {seq1_id, seq2_id}, - await search_result_to_ids(AssayFilter(meta={'common': 'common'})), + await search_result_to_ids( + AssayFilter(meta={'common': GenericFilter(eq='common')}) + ), ) # sample meta self.assertSetEqual( {seq1_id, seq2_id}, await search_result_to_ids( - AssayFilter(sample_meta={'collection-year': '2022'}) + AssayFilter(sample_meta={'collection-year': GenericFilter(eq='2022')}) ), ) self.assertSetEqual( set(), await search_result_to_ids( - AssayFilter(sample_meta={'unknown_key': '2022'}) + AssayFilter(sample_meta={'unknown_key': GenericFilter(eq='2022')}) ), ) @@ -315,7 +327,7 @@ async def search_result_to_ids(filter_: AssayFilter): {seq2_id}, await search_result_to_ids( AssayFilter( - sample_meta={'collection-year': '2022'}, + sample_meta={'collection-year': GenericFilter(eq='2022')}, external_id=GenericFilter(in_=['SEQ02']), ) ), diff --git a/test/test_generic_auditor.py b/test/test_generic_auditor.py index 4fb526dc9..21ef3f6b5 100644 --- a/test/test_generic_auditor.py +++ b/test/test_generic_auditor.py @@ -1,16 +1,16 @@ -from collections import namedtuple import unittest -from unittest.mock import MagicMock, patch +import unittest.mock +from collections import namedtuple + from metamist.audit.generic_auditor import GenericAuditor -# pylint: disable=dangerous-default-value # noqa: B006 class TestGenericAuditor(unittest.TestCase): """Test the audit helper functions""" - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_get_participant_data_for_dataset(self, mock_query): """Only participants with a non-empty samples field should be returned""" auditor = GenericAuditor( @@ -360,7 +360,7 @@ def test_get_sequence_mapping_warning_logging(self): log.output[0], ) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_genome_analyses_crams(self, mock_query): """Test that only the genome analysis crams for a sample map dictionary are returned""" auditor = GenericAuditor( @@ -412,7 +412,7 @@ def test_query_genome_analyses_crams(self, mock_query): self.assertDictEqual(test_result, expected_result) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_genome_and_exome_analyses_crams(self, mock_query): """Test that both the genome and exome analysis crams for a sample map dictionary are returned""" auditor = GenericAuditor( @@ -472,7 +472,7 @@ def test_query_genome_and_exome_analyses_crams(self, mock_query): self.assertDictEqual(test_result, expected_result) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_broken_analyses_crams(self, mock_query): """ All analysis crams must have 'sequencing_type' meta field, @@ -506,7 +506,7 @@ def test_query_broken_analyses_crams(self, mock_query): assay_sg_id_map={1: 'CPG123'} ) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_analyses_crams_warning(self, mock_query): """Warn if the sample_ids field is absent and the sample meta field is used instead""" auditor = GenericAuditor( @@ -541,7 +541,7 @@ def test_query_analyses_crams_warning(self, mock_query): log.output[0], ) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_analyses_for_sgs_without_crams(self, mock_query): """Log any analyses found for samples without completed CRAMs""" auditor = GenericAuditor( @@ -569,7 +569,9 @@ def test_analyses_for_sgs_without_crams(self, mock_query): } with self.assertLogs(level='WARNING') as log: - _ = auditor.analyses_for_sgs_without_crams(sgs_without_crams) + # catch the warning logs from here and check below + auditor.analyses_for_sgs_without_crams(sgs_without_crams) + self.assertEqual(len(log.output), 8) # 8 analysis types checked self.assertEqual(len(log.records), 8) self.assertIn( @@ -577,7 +579,21 @@ def test_analyses_for_sgs_without_crams(self, mock_query): log.output[0], ) - def test_get_complete_and_incomplete_sgs(self): + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.get_gcs_bucket_subdirs_to_search' + ) + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.find_files_in_gcs_buckets_subdirs' + ) + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.analyses_for_sgs_without_crams' + ) + def test_get_complete_and_incomplete_sgs( + self, + mock_analyses_for_sgs_without_crams, + mock_find_files_in_gcs_buckets_subdirs, + mock_get_gcs_bucket_subdirs, + ): """Report on samples that have completed CRAMs and those that dont""" assay_sg_id_map = { # noqa: B006 1: 'CPG123', @@ -591,17 +607,15 @@ def test_get_complete_and_incomplete_sgs(self): auditor = GenericAuditor( dataset='dev', sequencing_type=['genome', 'exome'], file_types=('fastq',) ) - auditor.get_gcs_bucket_subdirs_to_search = MagicMock() - auditor.find_files_in_gcs_buckets_subdirs = MagicMock() - auditor.analyses_for_sgs_without_crams = MagicMock() - auditor.get_gcs_bucket_subdirs_to_search.return_value = { + mock_get_gcs_bucket_subdirs.return_value = { 'cpg-dataset-main': ['cram', 'exome/cram'] } - auditor.find_files_in_gcs_buckets_subdirs.return_value = [ + mock_find_files_in_gcs_buckets_subdirs.return_value = [ 'gs://cpg-dataset-main/cram/CPG123.cram', 'gs://cpg-dataset-main/exome/cram/CPG456.cram', ] + mock_analyses_for_sgs_without_crams.return_value = None result = auditor.get_complete_and_incomplete_sgs( assay_sg_id_map=assay_sg_id_map, @@ -615,8 +629,16 @@ def test_get_complete_and_incomplete_sgs(self): self.assertDictEqual(result, expected_result) - async def test_check_for_uningested_or_moved_assays(self): - """Test 2 ingested reads, one ingested and moved read, and one uningested read""" + @unittest.mock.patch('metamist.audit.generic_auditor.GenericAuditor.file_size') + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.find_sequence_files_in_gcs_bucket' + ) + async def test_check_for_uningested_or_moved_assays( + self, mock_find_sequence_files_in_gcs_bucket, mock_file_size + ): + """ + Test 2 ingested reads, one ingested and moved read, and one uningested read + """ auditor = GenericAuditor( dataset='dev', sequencing_type=['genome'], file_types=('fastq',) ) @@ -627,16 +649,14 @@ async def test_check_for_uningested_or_moved_assays(self): sg_sample_id_map = {'CPG123': 'EXT123'} assay_sg_id_map = {1: 'CPG123'} sample_internal_external_id_map = {'CPG123': 'EXT123'} - auditor.find_sequence_files_in_gcs_bucket = MagicMock() - auditor.find_sequence_files_in_gcs_bucket.return_value = [ + mock_find_sequence_files_in_gcs_bucket.return_value = [ 'read1.fq', 'read2.fq', 'dir2/read3.fq', 'read4.fq', ] - auditor.file_size = MagicMock() - auditor.file_size.return_value = 12 + mock_file_size.return_value = 12 ( uningested_sequence_paths, diff --git a/test/test_generic_filters.py b/test/test_generic_filters.py index 343047c96..cc01e7f80 100644 --- a/test/test_generic_filters.py +++ b/test/test_generic_filters.py @@ -15,12 +15,6 @@ class GenericFilterTest(GenericFilterModel): class TestGenericFilters(unittest.TestCase): """Test generic filters SQL generation""" - def test_post_init_correction(self): - """Test that the post init correction works""" - filter_ = GenericFilterTest(test_string='test') - self.assertIsInstance(filter_.test_string, GenericFilter) - self.assertEqual(filter_.test_string.eq, 'test') - def test_basic_no_override(self): """Test that the basic filter converts to SQL as expected""" filter_ = GenericFilterTest(test_string=GenericFilter(eq='test')) diff --git a/test/test_graphql.py b/test/test_graphql.py index 95b817931..c61eed238 100644 --- a/test/test_graphql.py +++ b/test/test_graphql.py @@ -1,20 +1,19 @@ from test.testbase import DbIsolatedTest, run_as_sync + from graphql.error import GraphQLError, GraphQLSyntaxError import api.graphql.schema -from db.python.layers import ParticipantLayer, AnalysisLayer +from db.python.layers import AnalysisLayer, ParticipantLayer +from metamist.graphql import configure_sync_client, gql, validate +from models.enums import AnalysisStatus from models.models import ( - SampleUpsertInternal, + AnalysisInternal, + AssayUpsertInternal, ParticipantUpsertInternal, + SampleUpsertInternal, SequencingGroupUpsertInternal, - AssayUpsertInternal, - AnalysisInternal, ) from models.utils.sequencing_group_id_format import sequencing_group_id_format -from models.enums import AnalysisStatus - -from metamist.graphql import gql, validate, configure_sync_client - default_assay_meta = { 'sequencing_type': 'genome', @@ -24,7 +23,6 @@ def _get_single_participant_upsert(): - return ParticipantUpsertInternal( external_id='Demeter', meta={}, @@ -43,20 +41,20 @@ def _get_single_participant_upsert(): type='sequencing', meta={ 'reads': [ - { - 'basename': 'sample_id001.filename-R1.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R1.fastq.gz', - 'size': 111, - }, - { - 'basename': 'sample_id001.filename-R2.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R2.fastq.gz', - 'size': 111, - }, + { + 'basename': 'sample_id001.filename-R1.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R1.fastq.gz', + 'size': 111, + }, + { + 'basename': 'sample_id001.filename-R2.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R2.fastq.gz', + 'size': 111, + }, ], 'reads_type': 'fastq', 'batch': 'M001', @@ -114,7 +112,7 @@ def test_validate_provided_schema(self): (strawberry has an as_str() method) """ client = configure_sync_client( - schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' + schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' # type: ignore ) validate(TEST_QUERY, client=client) diff --git a/test/test_import_individual_metadata.py b/test/test_import_individual_metadata.py index ae51f7a20..adb65622e 100644 --- a/test/test_import_individual_metadata.py +++ b/test/test_import_individual_metadata.py @@ -1,5 +1,7 @@ from test.testbase import DbIsolatedTest, run_as_sync +from databases.interfaces import Record + from db.python.layers.participant import ParticipantLayer from models.models.participant import ParticipantUpsertInternal @@ -21,20 +23,22 @@ async def test_import_many_hpo_terms(self): 'HPO Term 3', 'HPO Term 20', ] - rows = [['TP01', 'HP:0000001', 'HP:0000002', 'HP:0000003', 'HP:0000004']] + rows_to_insert = [ + ['TP01', 'HP:0000001', 'HP:0000002', 'HP:0000003', 'HP:0000004'] + ] - await pl.generic_individual_metadata_importer(headers, rows) + await pl.generic_individual_metadata_importer(headers, rows_to_insert) - rows = list( + db_rows: list[Record] = list( await self.connection.connection.fetch_all( 'SELECT participant_id, description, value FROM participant_phenotypes' ) ) - self.assertEqual(1, len(rows)) - self.assertEqual('HPO Terms (present)', rows[0]['description']) + self.assertEqual(1, len(db_rows)) + self.assertEqual('HPO Terms (present)', db_rows[0]['description']) self.assertEqual( - '"HP:0000001,HP:0000002,HP:0000003,HP:0000004"', rows[0]['value'] + '"HP:0000001,HP:0000002,HP:0000003,HP:0000004"', db_rows[0]['value'] ) @run_as_sync @@ -50,12 +54,12 @@ async def test_import_basic_metadata(self): ) headers = ['Individual ID', 'HPO Term 20', 'Age of Onset'] - rows = [ + rows_to_insert = [ ['TP01', 'HP:0000020', 'Congenital'], ['TP02', 'HP:00000021; HP:023', 'Infantile'], ] - await pl.generic_individual_metadata_importer(headers, rows) + await pl.generic_individual_metadata_importer(headers, rows_to_insert) rows = list( await self.connection.connection.fetch_all( diff --git a/test/test_parse_generic_metadata.py b/test/test_parse_generic_metadata.py index ed95a4fbc..ebcc5ec75 100644 --- a/test/test_parse_generic_metadata.py +++ b/test/test_parse_generic_metadata.py @@ -1,33 +1,31 @@ import unittest from datetime import datetime from io import StringIO +from test.testbase import DbIsolatedTest, run_as_sync from unittest.mock import patch -from test.testbase import run_as_sync, DbIsolatedTest - import api.graphql.schema from db.python.layers import ParticipantLayer +from metamist.graphql import configure_sync_client, validate +from metamist.parser.generic_metadata_parser import GenericMetadataParser +from metamist.parser.generic_parser import ( + QUERY_MATCH_ASSAYS, + QUERY_MATCH_PARTICIPANTS, + QUERY_MATCH_SAMPLES, + QUERY_MATCH_SEQUENCING_GROUPS, + ParsedParticipant, + ParsedSample, + ParsedSequencingGroup, +) from models.models import ( + AssayUpsertInternal, ParticipantUpsertInternal, SampleUpsertInternal, SequencingGroupUpsertInternal, - AssayUpsertInternal, ) from models.utils.sample_id_format import sample_id_format from models.utils.sequencing_group_id_format import sequencing_group_id_format -from metamist.graphql import validate, configure_sync_client -from metamist.parser.generic_parser import ( - ParsedParticipant, - ParsedSample, - QUERY_MATCH_PARTICIPANTS, - QUERY_MATCH_SAMPLES, - QUERY_MATCH_SEQUENCING_GROUPS, - QUERY_MATCH_ASSAYS, - ParsedSequencingGroup, -) -from metamist.parser.generic_metadata_parser import GenericMetadataParser - def _get_basic_participant_to_upsert(): default_assay_meta = { @@ -96,7 +94,7 @@ def test_queries(self): # only need to apply schema to the first client to create, then it gets cached client = configure_sync_client( - schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' + schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' # type: ignore ) validate(QUERY_MATCH_PARTICIPANTS) validate(QUERY_MATCH_SAMPLES, client=client) @@ -332,11 +330,11 @@ async def test_rows_with_participants(self, mock_graphql_query): # Call generic parser file_contents = '\n'.join(rows) - summary, participants = await parser.parse_manifest( + summary, prows = await parser.parse_manifest( StringIO(file_contents), delimiter='\t', dry_run=True ) - participants: list[ParsedParticipant] = participants + participants: list[ParsedParticipant] = prows self.assertEqual(3, summary['participants']['insert']) self.assertEqual(0, summary['participants']['update']) @@ -749,7 +747,9 @@ async def test_matching_sequencing_groups_and_assays( mock_datetime_added.return_value = datetime.fromisoformat('2022-02-02T22:22:22') player = ParticipantLayer(self.connection) - participant = await player.upsert_participant(_get_basic_participant_to_upsert()) + participant = await player.upsert_participant( + _get_basic_participant_to_upsert() + ) filenames = [ 'sample_id001.filename-R1.fastq.gz', diff --git a/test/test_parse_ont_processor.py b/test/test_parse_ont_processor.py index c14d95e73..a3d301754 100644 --- a/test/test_parse_ont_processor.py +++ b/test/test_parse_ont_processor.py @@ -1,8 +1,7 @@ import unittest from io import StringIO -from unittest.mock import patch - from test.testbase import run_as_sync +from unittest.mock import patch from scripts.process_ont_products import OntProductParser @@ -36,7 +35,7 @@ async def test_single_row_all_files_exist( dry_run=True, ) - parser.skip_checking_gcs_objects = True + # parser.skip_checking_gcs_objects = True fs = [ 'Sample01.bam', 'Sample01.sv.vcf.gz', @@ -44,7 +43,7 @@ async def test_single_row_all_files_exist( 'Sample01.indels.vcf.gz', ] parser.filename_map = {k: 'gs://BUCKET/FAKE/' + k for k in fs} - parser.skip_checking_gcs_objects = True + # parser.skip_checking_gcs_objects = True file_contents = '\n'.join(rows) analyses = await parser.parse_manifest( diff --git a/test/test_parse_ont_sheet.py b/test/test_parse_ont_sheet.py index 72b1acdc0..534bb50f1 100644 --- a/test/test_parse_ont_sheet.py +++ b/test/test_parse_ont_sheet.py @@ -1,11 +1,10 @@ from io import StringIO +from test.testbase import DbIsolatedTest, run_as_sync from unittest.mock import patch -from test.testbase import run_as_sync, DbIsolatedTest - from db.python.layers import ParticipantLayer -from models.models import ParticipantUpsertInternal, SampleUpsertInternal from metamist.parser.generic_parser import ParsedParticipant +from models.models import ParticipantUpsertInternal, SampleUpsertInternal from scripts.parse_ont_sheet import OntParser @@ -125,6 +124,6 @@ async def test_simple_sheet(self, mock_graphql_query): ], } self.maxDiff = None - sequence_group = participants[0].samples[0].sequencing_groups[0] - self.assertDictEqual(seqgroup_meta, sequence_group.meta) - self.assertDictEqual(meta_dict, sequence_group.assays[0].meta) + sequencing_group = participants[0].samples[0].sequencing_groups[0] + self.assertDictEqual(seqgroup_meta, sequencing_group.meta) + self.assertDictEqual(meta_dict, sequencing_group.assays[0].meta) diff --git a/test/test_pedigree.py b/test/test_pedigree.py index 77d395863..8966c8cc3 100644 --- a/test/test_pedigree.py +++ b/test/test_pedigree.py @@ -1,9 +1,8 @@ from test.testbase import DbIsolatedTest, run_as_sync -from models.models.participant import ParticipantUpsertInternal - from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer +from models.models.participant import ParticipantUpsertInternal class TestPedigree(DbIsolatedTest): @@ -14,10 +13,10 @@ async def test_import_get_pedigree(self): """Test import + get pedigree""" fl = FamilyLayer(self.connection) - rows = [ - ['FAM01', 'EX01_father', '', '', 1, 1], - ['FAM01', 'EX01_mother', '', '', 2, 1], - ['FAM01', 'EX01_subject', 'EX01_father', 'EX01_mother', 1, 2], + rows: list[list[str]] = [ + ['FAM01', 'EX01_father', '', '', '1', '1'], + ['FAM01', 'EX01_mother', '', '', '2', '1'], + ['FAM01', 'EX01_subject', 'EX01_father', 'EX01_mother', '1', '2'], ] await fl.import_pedigree( diff --git a/test/test_sample.py b/test/test_sample.py index b256a2ae0..e5b8639b7 100644 --- a/test/test_sample.py +++ b/test/test_sample.py @@ -1,7 +1,7 @@ from test.testbase import DbIsolatedTest, run_as_sync -from models.models.sample import SampleUpsertInternal from db.python.layers.sample import SampleLayer +from models.models.sample import SampleUpsertInternal class TestSample(DbIsolatedTest): @@ -17,7 +17,7 @@ async def setUp(self) -> None: @run_as_sync async def test_add_sample(self): """Test inserting a sample""" - s = await self.slayer.upsert_sample( + sample = await self.slayer.upsert_sample( SampleUpsertInternal( external_id='Test01', type='blood', @@ -30,8 +30,7 @@ async def test_add_sample(self): 'SELECT id, type, meta, project FROM sample' ) self.assertEqual(1, len(samples)) - s = samples[0] - self.assertEqual(1, s['id']) + self.assertEqual(sample.id, samples[0]['id']) @run_as_sync async def test_get_sample(self): diff --git a/test/test_search.py b/test/test_search.py index fa92cf80f..ca6718ec5 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -1,18 +1,25 @@ from test.testbase import DbIsolatedTest, run_as_sync +from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer from db.python.layers.sample import SampleLayer from db.python.layers.search import SearchLayer -from db.python.layers.family import FamilyLayer from db.python.layers.sequencing_group import SequencingGroupLayer from db.python.tables.family_participant import FamilyParticipantTable - from models.enums import SearchResponseType -from models.models.family import PedRowInternal -from models.models.sample import sample_id_format, SampleUpsertInternal -from models.models.participant import ParticipantUpsertInternal -from models.models.sequencing_group import SequencingGroupUpsertInternal, sequencing_group_id_format -from models.models.assay import AssayUpsertInternal +from models.models import ( + AssayUpsertInternal, + FamilySearchResponseData, + ParticipantSearchResponseData, + ParticipantUpsertInternal, + PedRowInternal, + SampleSearchResponseData, + SampleUpsertInternal, + SequencingGroupSearchResponseData, + SequencingGroupUpsertInternal, +) +from models.models.sample import sample_id_format +from models.models.sequencing_group import sequencing_group_id_format class TestSample(DbIsolatedTest): @@ -68,7 +75,11 @@ async def test_search_isolated_sample_by_id(self): self.assertEqual(1, len(results)) self.assertEqual(cpg_id, results[0].title) self.assertEqual(cpg_id, results[0].data.id) - self.assertListEqual(['EX001'], results[0].data.sample_external_ids) + + result_data = results[0].data + self.assertIsInstance(result_data, SampleSearchResponseData) + assert isinstance(result_data, SampleSearchResponseData) + self.assertListEqual(['EX001'], result_data.sample_external_ids) @run_as_sync async def test_search_isolated_sequencing_group_by_id(self): @@ -97,19 +108,24 @@ async def test_search_isolated_sequencing_group_by_id(self): 'sequencing_type': 'transcriptome', 'sequencing_technology': 'long-read', 'sequencing_platform': 'illumina', - } + }, ) - ] + ], ) ] ) cpg_sg_id = sequencing_group_id_format(sg[0].id) - results = await self.schlay.search(query=cpg_sg_id, project_ids=[self.project_id]) + results = await self.schlay.search( + query=cpg_sg_id, project_ids=[self.project_id] + ) self.assertEqual(1, len(results)) self.assertEqual(cpg_sg_id, results[0].title) - self.assertEqual(cpg_sg_id, results[0].data.id) - self.assertEqual(cpg_sg_id, results[0].data.sg_external_id) + result_data = results[0].data + assert isinstance(result_data, SequencingGroupSearchResponseData) + self.assertIsInstance(result_data, SequencingGroupSearchResponseData) + self.assertEqual(cpg_sg_id, result_data.id) + self.assertEqual(cpg_sg_id, result_data.sg_external_id) @run_as_sync async def test_search_isolated_sample_by_external_id(self): @@ -125,12 +141,16 @@ async def test_search_isolated_sample_by_external_id(self): cpg_id = sample_id_format(sample.id) self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(cpg_id, result.title) - self.assertEqual(cpg_id, result.data.id) - self.assertListEqual(['EX001'], result.data.sample_external_ids) - self.assertListEqual([], result.data.participant_external_ids) - self.assertListEqual([], result.data.family_external_ids) + + self.assertEqual(cpg_id, results[0].title) + result_data = results[0].data + + self.assertIsInstance(result_data, SampleSearchResponseData) + assert isinstance(result_data, SampleSearchResponseData) + self.assertEqual(cpg_id, result_data.id) + self.assertListEqual(['EX001'], result_data.sample_external_ids) + self.assertListEqual([], result_data.participant_external_ids) + self.assertListEqual([], result_data.family_external_ids) @run_as_sync async def test_search_participant_isolated(self): @@ -145,12 +165,13 @@ async def test_search_participant_isolated(self): query='PART01', project_ids=[self.project_id] ) self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(p.id, result.data.id) - self.assertEqual('PART01', result.title) - self.assertListEqual(['PART01'], result.data.participant_external_ids) - self.assertListEqual([], result.data.family_external_ids) - self.assertRaises(AttributeError, lambda: result.data.sample_external_ids) + + self.assertEqual('PART01', results[0].title) + result_data = results[0].data + assert isinstance(result_data, ParticipantSearchResponseData) + self.assertEqual(p.id, result_data.id) + self.assertListEqual(['PART01'], result_data.participant_external_ids) + self.assertListEqual([], result_data.family_external_ids) @run_as_sync async def test_search_family(self): @@ -164,11 +185,11 @@ async def test_search_family(self): ) self.assertEqual(1, len(results)) result = results[0] - self.assertEqual(f_id, result.data.id) self.assertEqual('FAMXX01', result.title) - self.assertListEqual(['FAMXX01'], result.data.family_external_ids) - self.assertRaises(AttributeError, lambda: result.data.participant_external_ids) - self.assertRaises(AttributeError, lambda: result.data.sample_external_ids) + result_data = result.data + assert isinstance(result_data, FamilySearchResponseData) + self.assertEqual(f_id, result_data.id) + self.assertListEqual(['FAMXX01'], result_data.family_external_ids) @run_as_sync async def test_search_mixed(self): @@ -195,7 +216,7 @@ async def test_search_mixed(self): sample = await self.slayer.upsert_sample( SampleUpsertInternal( external_id='X:SAM001', - sample_type='blood', + type='blood', participant_id=p.id, ) ) @@ -214,19 +235,26 @@ async def test_search_mixed(self): sample_result = next( r for r in all_results if r.type == SearchResponseType.SAMPLE ) + family_result_data = family_result.data + participant_result_data = participant_result.data + sample_result_data = sample_result.data + + assert isinstance(family_result_data, FamilySearchResponseData) + assert isinstance(participant_result_data, ParticipantSearchResponseData) + assert isinstance(sample_result_data, SampleSearchResponseData) # linked family matches self.assertEqual('X:FAM01', family_result.title) # linked participant matches self.assertEqual('X:PART01', participant_result.title) - self.assertListEqual(['X:FAM01'], participant_result.data.family_external_ids) + self.assertListEqual(['X:FAM01'], participant_result_data.family_external_ids) # linked sample matches cpg_id = sample_id_format(sample.id) - self.assertEqual(cpg_id, sample_result.data.id) - self.assertListEqual(['X:SAM001'], sample_result.data.sample_external_ids) - self.assertListEqual(['X:FAM01'], participant_result.data.family_external_ids) + self.assertEqual(cpg_id, sample_result_data.id) + self.assertListEqual(['X:SAM001'], sample_result_data.sample_external_ids) + self.assertListEqual(['X:FAM01'], participant_result_data.family_external_ids) self.assertListEqual( - ['X:PART01'], participant_result.data.participant_external_ids + ['X:PART01'], participant_result_data.participant_external_ids ) diff --git a/test/test_web.py b/test/test_web.py index 4e6cb53f6..cc5a0bb63 100644 --- a/test/test_web.py +++ b/test/test_web.py @@ -1,27 +1,27 @@ from test.testbase import DbIsolatedTest, run_as_sync +from db.python.layers import ( + AssayLayer, + ParticipantLayer, + SampleLayer, + SequencingGroupLayer, + WebLayer, +) from models.enums import MetaSearchEntityPrefix from models.models import ( + Assay, + AssayInternal, + AssayUpsertInternal, ParticipantUpsertInternal, + ProjectSummaryInternal, SampleUpsertInternal, + SearchItem, SequencingGroupUpsertInternal, - AssayUpsertInternal, - ProjectSummaryInternal, - AssayInternal, - Assay, + WebProject, ) -from models.models import WebProject, SearchItem from models.utils.sample_id_format import sample_id_transform_to_raw from models.utils.sequencing_group_id_format import sequencing_group_id_transform_to_raw -from db.python.layers import ( - AssayLayer, - SequencingGroupLayer, - SampleLayer, - ParticipantLayer, - WebLayer, -) - default_assay_meta = { 'sequencing_type': 'genome', 'sequencing_technology': 'short-read', @@ -71,20 +71,20 @@ def get_test_participant(): type='sequencing', meta={ 'reads': [ - { - 'basename': 'sample_id001.filename-R1.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R1.fastq.gz', - 'size': 111, - }, - { - 'basename': 'sample_id001.filename-R2.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R2.fastq.gz', - 'size': 111, - }, + { + 'basename': 'sample_id001.filename-R1.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R1.fastq.gz', + 'size': 111, + }, + { + 'basename': 'sample_id001.filename-R2.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R2.fastq.gz', + 'size': 111, + }, ], 'reads_type': 'fastq', 'batch': 'M001', @@ -204,9 +204,7 @@ async def test_project_summary_empty(self): # Expect an empty project expected = ProjectSummaryInternal( - project=WebProject( - **{'id': 1, 'name': 'test', 'meta': {}, 'dataset': 'test'} - ), + project=WebProject(id=1, name='test', meta={}, dataset='test'), total_samples=0, total_samples_in_query=0, total_participants=0, @@ -308,9 +306,7 @@ async def project_summary_with_filter_no_results(self): ], ) empty_result = ProjectSummaryInternal( - project=WebProject( - **{'id': 1, 'name': 'test', 'meta': {}, 'dataset': 'test'} - ), + project=WebProject(id=1, name='test', meta={}, dataset='test'), total_samples=0, total_samples_in_query=0, total_participants=0, @@ -455,12 +451,10 @@ async def test_field_with_space(self): token=0, grid_filter=[ SearchItem( - **{ - 'model_type': MetaSearchEntityPrefix.ASSAY, - 'query': 'field wi', - 'field': 'field with spaces', - 'is_meta': True, - } + model_type=MetaSearchEntityPrefix.ASSAY, + query='field wi', + field='field with spaces', + is_meta=True, ) ], ) diff --git a/web/src/pages/project/ProjectGrid.tsx b/web/src/pages/project/ProjectGrid.tsx index 52dbeaa64..83a9fda74 100644 --- a/web/src/pages/project/ProjectGrid.tsx +++ b/web/src/pages/project/ProjectGrid.tsx @@ -372,7 +372,7 @@ const ProjectGrid: React.FunctionComponent = ({ : '1px solid var(--color-border-default)', backgroundColor, }} - key={`${s.id}sequence_group.${k}`} + key={`${s.id}sequencing_group.${k}`} rowSpan={(seq.assays ?? []).length} > {k === 'id' ? ( From 78cafc7970ef1adeafd7e4a5005a7f7b7d31e500 Mon Sep 17 00:00:00 2001 From: Michael Franklin <22381693+illusional@users.noreply.github.com> Date: Mon, 18 Sep 2023 12:27:09 +1000 Subject: [PATCH 2/3] Add participant phenotypes to graphql (#545) * Add participant phenotypes to graphql * Add test for graphql phenotypes * Fix unrelated linting issues * Slight linting updates * PR cleanup --------- Co-authored-by: Michael Franklin --- api/graphql/loaders.py | 38 +++++++++++++++++++++++---------- api/graphql/schema.py | 33 ++++++++++++++-------------- db/python/layers/participant.py | 30 ++++++++++++++++++++++++-- scripts/parse_ped.py | 2 +- test/test_graphql.py | 27 +++++++++++++++++++++++ 5 files changed, 99 insertions(+), 31 deletions(-) diff --git a/api/graphql/loaders.py b/api/graphql/loaders.py index 2a54fc514..905297009 100644 --- a/api/graphql/loaders.py +++ b/api/graphql/loaders.py @@ -13,26 +13,26 @@ from db.python.connect import NotFoundError from db.python.layers import ( AnalysisLayer, - SampleLayer, AssayLayer, + FamilyLayer, ParticipantLayer, + SampleLayer, SequencingGroupLayer, - FamilyLayer, ) from db.python.tables.analysis import AnalysisFilter from db.python.tables.assay import AssayFilter from db.python.tables.project import ProjectPermissionsTable from db.python.tables.sample import SampleFilter from db.python.tables.sequencing_group import SequencingGroupFilter -from db.python.utils import ProjectId, GenericFilter +from db.python.utils import GenericFilter, ProjectId from models.models import ( - AssayInternal, - SampleInternal, - SequencingGroupInternal, AnalysisInternal, - ParticipantInternal, + AssayInternal, FamilyInternal, + ParticipantInternal, Project, + SampleInternal, + SequencingGroupInternal, ) @@ -53,6 +53,8 @@ class LoaderKeys(enum.Enum): SAMPLES_FOR_PARTICIPANTS = 'samples_for_participants' SAMPLES_FOR_PROJECTS = 'samples_for_projects' + PHENOTYPES_FOR_PARTICIPANTS = 'phenotypes_for_participants' + PARTICIPANTS_FOR_IDS = 'participants_for_ids' PARTICIPANTS_FOR_FAMILIES = 'participants_for_families' PARTICIPANTS_FOR_PROJECTS = 'participants_for_projects' @@ -291,9 +293,7 @@ async def load_participants_for_ids( p_by_id = {p.id: p for p in persons} missing_pids = set(participant_ids) - set(p_by_id.keys()) if missing_pids: - raise NotFoundError( - f'Could not find participants with ids {missing_pids}' - ) + raise NotFoundError(f'Could not find participants with ids {missing_pids}') return [p_by_id.get(p) for p in participant_ids] @@ -400,7 +400,23 @@ async def load_analyses_for_sequencing_groups( return by_sg_id -async def get_context(request: Request, connection=get_projectless_db_connection): # pylint: disable=unused-argument +@connected_data_loader(LoaderKeys.PHENOTYPES_FOR_PARTICIPANTS) +async def load_phenotypes_for_participants( + participant_ids: list[int], connection +) -> list[dict]: + """ + Data loader for phenotypes for participants + """ + player = ParticipantLayer(connection) + participant_phenotypes = await player.get_phenotypes_for_participants( + participant_ids=participant_ids + ) + return [participant_phenotypes.get(pid, {}) for pid in participant_ids] + + +async def get_context( + request: Request, connection=get_projectless_db_connection +): # pylint: disable=unused-argument """Get loaders / cache context for strawberyy GraphQL""" mapped_loaders = {k: fn(connection) for k, fn in loaders.items()} return { diff --git a/api/graphql/schema.py b/api/graphql/schema.py index 8befa8867..7255821ed 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -14,16 +14,10 @@ from strawberry.fastapi import GraphQLRouter from strawberry.types import Info -from api.graphql.filters import ( - GraphQLFilter, - GraphQLMetaFilter, -) -from api.graphql.loaders import ( - get_context, - LoaderKeys, -) +from api.graphql.filters import GraphQLFilter, GraphQLMetaFilter +from api.graphql.loaders import LoaderKeys, get_context from db.python import enum_tables -from db.python.layers import AnalysisLayer, SequencingGroupLayer, SampleLayer +from db.python.layers import AnalysisLayer, SampleLayer, SequencingGroupLayer from db.python.layers.assay import AssayLayer from db.python.layers.family import FamilyLayer from db.python.tables.analysis import AnalysisFilter @@ -34,21 +28,19 @@ from db.python.utils import GenericFilter from models.enums import AnalysisStatus from models.models import ( - SampleInternal, - ParticipantInternal, - Project, AnalysisInternal, + AssayInternal, FamilyInternal, + ParticipantInternal, + Project, + SampleInternal, SequencingGroupInternal, - AssayInternal, ) from models.models.sample import sample_id_transform_to_raw -from models.utils.sample_id_format import ( - sample_id_format, -) +from models.utils.sample_id_format import sample_id_format from models.utils.sequencing_group_id_format import ( - sequencing_group_id_transform_to_raw, sequencing_group_id_format, + sequencing_group_id_transform_to_raw, ) enum_methods = {} @@ -336,6 +328,13 @@ async def samples( samples = await info.context[LoaderKeys.SAMPLES_FOR_PARTICIPANTS].load(q) return [GraphQLSample.from_internal(s) for s in samples] + @strawberry.field + async def phenotypes( + self, info: Info, root: 'GraphQLParticipant' + ) -> strawberry.scalars.JSON: + loader = info.context[LoaderKeys.PHENOTYPES_FOR_PARTICIPANTS] + return await loader.load(root.id) + @strawberry.field async def families( self, info: Info, root: 'GraphQLParticipant' diff --git a/db/python/layers/participant.py b/db/python/layers/participant.py index 216427e31..44d6d4db2 100644 --- a/db/python/layers/participant.py +++ b/db/python/layers/participant.py @@ -2,9 +2,9 @@ import re from collections import defaultdict from enum import Enum -from typing import Dict, List, Tuple, Optional, Any +from typing import Any, Dict, List, Optional, Tuple -from db.python.connect import NotFoundError, NoOpAenter +from db.python.connect import NoOpAenter, NotFoundError from db.python.layers.base import BaseLayer from db.python.layers.sample import SampleLayer from db.python.tables.family import FamilyTable @@ -335,6 +335,21 @@ async def fill_in_missing_participants(self): return f'Updated {len(sample_ids_to_update)} records' + async def insert_participant_phenotypes( + self, participant_phenotypes: dict[int, dict] + ): + """ + Insert participant phenotypes, with format: {pid: {key: value}} + """ + ppttable = ParticipantPhenotypeTable(self.connection) + return await ppttable.add_key_value_rows( + [ + (pid, pk, pv) + for pid, phenotypes in participant_phenotypes.items() + for pk, pv in phenotypes.items() + ] + ) + async def generic_individual_metadata_importer( self, headers: List[str], @@ -653,6 +668,17 @@ async def update_many_participant_external_ids( # region PHENOTYPES / SEQR + async def get_phenotypes_for_participants( + self, participant_ids: list[int] + ) -> dict[int, dict[str, Any]]: + """ + Get phenotypes for participants keyed by by pid + """ + ppttable = ParticipantPhenotypeTable(self.connection) + return await ppttable.get_key_value_rows_for_participant_ids( + participant_ids=participant_ids + ) + async def get_seqr_individual_template( self, project: int, diff --git a/scripts/parse_ped.py b/scripts/parse_ped.py index f587e483b..831ddd402 100644 --- a/scripts/parse_ped.py +++ b/scripts/parse_ped.py @@ -15,7 +15,7 @@ def main(ped_file_path: str, project: str): fapi = FamilyApi() # pylint: disable=no-member - with AnyPath(ped_file_path).open() as ped_file: # type: ignore + with AnyPath(ped_file_path).open() as ped_file: fapi.import_pedigree( file=ped_file, has_header=True, diff --git a/test/test_graphql.py b/test/test_graphql.py index c61eed238..a52a5f947 100644 --- a/test/test_graphql.py +++ b/test/test_graphql.py @@ -229,3 +229,30 @@ async def test_sg_analyses_query(self): self.assertIn('id', analyses[0]) self.assertIn('meta', analyses[0]) self.assertIn('output', analyses[0]) + + @run_as_sync + async def test_participant_phenotypes(self): + """ + Test getting participant phentypes in graphql + """ + # insert participant + p = await self.player.upsert_participant( + ParticipantUpsertInternal(external_id='Demeter', meta={}, samples=[]) + ) + + phenotypes = {'phenotype1': 'value1', 'phenotype2': {'number': 123}} + # insert participant_phenotypes + await self.player.insert_participant_phenotypes({p.id: phenotypes}) + + q = """ +query MyQuery($pid: Int!) { + participant(id: $pid) { + phenotypes + } +}""" + + resp = await self.run_graphql_query_async(q, {'pid': p.id}) + + self.assertIn('participant', resp) + self.assertIn('phenotypes', resp['participant']) + self.assertDictEqual(phenotypes, resp['participant']['phenotypes']) From f128b70ca71d33537c9d0922f196901dc3f23d2f Mon Sep 17 00:00:00 2001 From: John Marshall Date: Mon, 18 Sep 2023 15:33:57 +1200 Subject: [PATCH 3/3] Use current GitHub Actions versions (and avoid ::set-output) (#520) Prevent "The following action(s) uses node12 which is deprecated" warnings by updating to the current releases of the Actions used. (Node.js 12 is scheduled for removal from Actions runners next month.) Use the current releases instead of `@main` too, as we don't want to be affected by bleeding-edge bugs. Rewrite ::set-output as a write to $GITHUB_OUTPUT instead; the ::save-state and ::set-output commands are also deprecated. --- .github/workflows/deploy.yaml | 8 ++++---- .github/workflows/lint.yaml | 4 ++-- .github/workflows/test.yaml | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index ca5c077f1..997b1129d 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -21,10 +21,10 @@ jobs: run: shell: bash -eo pipefail -l {0} steps: - - uses: actions/checkout@main + - uses: actions/checkout@v3 - name: "gcloud setup" - uses: google-github-actions/setup-gcloud@v0 + uses: google-github-actions/setup-gcloud@v1 with: project_id: sample-metadata service_account_key: ${{ secrets.GCP_SERVER_DEPLOY_KEY }} @@ -33,11 +33,11 @@ jobs: run: | gcloud auth configure-docker australia-southeast1-docker.pkg.dev - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: "3.10" - - uses: actions/setup-java@v2 + - uses: actions/setup-java@v3 with: distribution: "temurin" # See 'Supported distributions' for available options java-version: "17" diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 68f65dc86..71c330c5a 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -14,9 +14,9 @@ jobs: run: shell: bash -eo pipefail -l {0} steps: - - uses: actions/checkout@main + - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index aaffc014b..e15e52f8e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,13 +16,13 @@ jobs: run: shell: bash -eo pipefail -l {0} steps: - - uses: actions/checkout@main + - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: "3.10" - - uses: actions/setup-java@v2 + - uses: actions/setup-java@v3 with: distribution: "temurin" # See 'Supported distributions' for available options java-version: "17" @@ -70,10 +70,10 @@ jobs: rc=$? coverage xml - echo "::set-output name=rc::$rc" + echo "rc=$rc" >> $GITHUB_OUTPUT - name: "Upload coverage report" - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v3 with: files: ./coverage.xml @@ -89,7 +89,7 @@ jobs: - name: Fail if tests are not passing if: ${{ steps.runtests.outputs.rc != 0 }} - uses: actions/github-script@v3 + uses: actions/github-script@v6 with: script: | core.setFailed('Unit tests failed with rc = ${{ steps.runtests.outputs.rc }}')