diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index ca5c077f1..997b1129d 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -21,10 +21,10 @@ jobs: run: shell: bash -eo pipefail -l {0} steps: - - uses: actions/checkout@main + - uses: actions/checkout@v3 - name: "gcloud setup" - uses: google-github-actions/setup-gcloud@v0 + uses: google-github-actions/setup-gcloud@v1 with: project_id: sample-metadata service_account_key: ${{ secrets.GCP_SERVER_DEPLOY_KEY }} @@ -33,11 +33,11 @@ jobs: run: | gcloud auth configure-docker australia-southeast1-docker.pkg.dev - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: "3.10" - - uses: actions/setup-java@v2 + - uses: actions/setup-java@v3 with: distribution: "temurin" # See 'Supported distributions' for available options java-version: "17" diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index d4863658a..71c330c5a 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -4,22 +4,52 @@ on: push jobs: lint: runs-on: ubuntu-latest + env: + DOCKER_BUILDKIT: 1 + BUILDKIT_PROGRESS: plain + CLOUDSDK_CORE_DISABLE_PROMPTS: 1 + # used for generating API + SM_DOCKER: samplemetadata:dev defaults: run: shell: bash -eo pipefail -l {0} - steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: "3.10" - cache: "pip" - - name: Install packages + - uses: actions/setup-java@v2 + with: + distribution: "temurin" # See 'Supported distributions' for available options + java-version: "17" + + - name: Setup build env + run: | + set -euxo pipefail + + pip install -r requirements-dev.txt + pip install -r requirements.txt + + # openapi-generator + wget https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/5.3.0/openapi-generator-cli-5.3.0.jar -O openapi-generator-cli.jar + + - name: "build image" + run: | + docker build \ + --build-arg SM_ENVIRONMENT=local \ + --tag $SM_DOCKER \ + -f deploy/api/Dockerfile \ + . + + - name: Build + install packages run: | + export OPENAPI_COMMAND="java -jar openapi-generator-cli.jar" + python regenerate_api.py pip install -r requirements-dev.txt pip install . + mkdir .mypy_cache - name: pre-commit run: pre-commit run --all-files diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c73329824..1061ddc50 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,13 +16,13 @@ jobs: run: shell: bash -eo pipefail -l {0} steps: - - uses: actions/checkout@main + - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: "3.10" - - uses: actions/setup-java@v2 + - uses: actions/setup-java@v3 with: distribution: "temurin" # See 'Supported distributions' for available options java-version: "17" @@ -70,10 +70,10 @@ jobs: rc=$? coverage xml - echo "::set-output name=rc::$rc" + echo "rc=$rc" >> $GITHUB_OUTPUT - name: "Upload coverage report" - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v3 with: files: ./coverage.xml @@ -89,7 +89,7 @@ jobs: - name: Fail if tests are not passing if: ${{ steps.runtests.outputs.rc != 0 }} - uses: actions/github-script@v3 + uses: actions/github-script@v6 with: script: | core.setFailed('Unit tests failed with rc = ${{ steps.runtests.outputs.rc }}') diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 155d7cda1..c4d4ec406 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,92 +1,98 @@ repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: check-yaml - exclude: '\.*conda/.*' - - id: end-of-file-fixer - - id: trailing-whitespace - exclude: '\.txt$|\.tsv$' - - id: check-case-conflict - - id: check-merge-conflict - - id: detect-private-key - - id: debug-statements - - id: check-added-large-files + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-yaml + exclude: '\.*conda/.*' + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: '\.txt$|\.tsv$' + - id: check-case-conflict + - id: check-merge-conflict + - id: detect-private-key + - id: debug-statements + - id: check-added-large-files - - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.33.0 - hooks: - - id: markdownlint - args: ["--config", ".markdownlint.json"] + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.33.0 + hooks: + - id: markdownlint + args: ["--config", ".markdownlint.json"] - - repo: https://github.com/ambv/black - rev: 23.3.0 - hooks: - - id: black - args: [.] - pass_filenames: false - always_run: true - exclude: ^metamist/ + - repo: https://github.com/ambv/black + rev: 23.3.0 + hooks: + - id: black + args: [.] + pass_filenames: false + always_run: true + exclude: ^metamist/ - - repo: https://github.com/PyCQA/flake8 - rev: "6.0.0" - hooks: - - id: flake8 - additional_dependencies: [flake8-bugbear, flake8-quotes] + - repo: https://github.com/PyCQA/flake8 + rev: "6.0.0" + hooks: + - id: flake8 + additional_dependencies: [flake8-bugbear, flake8-quotes] - # Using system installation of pylint to support checking python module imports - - repo: local - hooks: - - id: pylint - name: pylint - entry: pylint - language: system - types: [python] + # Using system installation of pylint to support checking python module imports + - repo: local + hooks: + - id: pylint + name: pylint + entry: pylint + language: system + types: [python] - # mypy - - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.961 - hooks: - - id: mypy - args: - [ - --pretty, - --show-error-codes, - --no-strict-optional, - --ignore-missing-imports, - --install-types, - --non-interactive, - ] - additional_dependencies: - - strawberry-graphql[fastapi]==0.138.1 + # mypy + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.5.1 + hooks: + - id: mypy + args: + [ + --pretty, + --show-error-codes, + --no-strict-optional, + --ignore-missing-imports, + --install-types, + --non-interactive, + --show-error-context, + --check-untyped-defs, + --explicit-package-bases, + --disable-error-code, + operator, + ] + additional_dependencies: + - strawberry-graphql[fastapi]==0.206.0 + - types-PyMySQL==1.1.0.1 - - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v3.0.0-alpha.4" - hooks: - - id: prettier - # I'm not exactly sure why it changes behaviour, but - # calling `cd web`, then calling `ls src/**/*.tsx` - # returns different results to `cd web && ls src/**/*.tsx` - # so just include both patterns here - entry: bash -c 'cd web && prettier --write --ignore-unknown --check src/*.{ts,tsx,css} src/**/*.{ts,tsx,css}' + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.0.0-alpha.4" + hooks: + - id: prettier + # I'm not exactly sure why it changes behaviour, but + # calling `cd web`, then calling `ls src/**/*.tsx` + # returns different results to `cd web && ls src/**/*.tsx` + # so just include both patterns here + entry: bash -c 'cd web && prettier --write --ignore-unknown --check src/*.{ts,tsx,css} src/**/*.{ts,tsx,css}' - - repo: https://github.com/pre-commit/mirrors-eslint - rev: "v8.33.0" - hooks: - - id: eslint - entry: bash -c 'cd web && eslint' - files: \.[jt]sx?$ - types: [file] - additional_dependencies: - - eslint@^7.32.0 - - eslint-config-airbnb@^19.0.4 - - eslint-config-airbnb-base@^15.0.0 - - eslint-config-airbnb-typescript@^17.0.0 - - eslint-config-prettier@^8.6.0 - - eslint-plugin-import@^2.26.0 - - eslint-plugin-jsx-a11y@^6.6.1 - - eslint-plugin-prettier@^4.2.1 - - eslint-plugin-react@^7.31.11 - - eslint-plugin-react-hooks@^4.6.0 - - "@typescript-eslint/eslint-plugin@^5.48.0" - - "@typescript-eslint/parser@^5.48.0" + - repo: https://github.com/pre-commit/mirrors-eslint + rev: "v8.33.0" + hooks: + - id: eslint + entry: bash -c 'cd web && eslint' + files: \.[jt]sx?$ + types: [file] + additional_dependencies: + - eslint@^7.32.0 + - eslint-config-airbnb@^19.0.4 + - eslint-config-airbnb-base@^15.0.0 + - eslint-config-airbnb-typescript@^17.0.0 + - eslint-config-prettier@^8.6.0 + - eslint-plugin-import@^2.26.0 + - eslint-plugin-jsx-a11y@^6.6.1 + - eslint-plugin-prettier@^4.2.1 + - eslint-plugin-react@^7.31.11 + - eslint-plugin-react-hooks@^4.6.0 + - "@typescript-eslint/eslint-plugin@^5.48.0" + - "@typescript-eslint/parser@^5.48.0" diff --git a/api/graphql/loaders.py b/api/graphql/loaders.py index 2a54fc514..905297009 100644 --- a/api/graphql/loaders.py +++ b/api/graphql/loaders.py @@ -13,26 +13,26 @@ from db.python.connect import NotFoundError from db.python.layers import ( AnalysisLayer, - SampleLayer, AssayLayer, + FamilyLayer, ParticipantLayer, + SampleLayer, SequencingGroupLayer, - FamilyLayer, ) from db.python.tables.analysis import AnalysisFilter from db.python.tables.assay import AssayFilter from db.python.tables.project import ProjectPermissionsTable from db.python.tables.sample import SampleFilter from db.python.tables.sequencing_group import SequencingGroupFilter -from db.python.utils import ProjectId, GenericFilter +from db.python.utils import GenericFilter, ProjectId from models.models import ( - AssayInternal, - SampleInternal, - SequencingGroupInternal, AnalysisInternal, - ParticipantInternal, + AssayInternal, FamilyInternal, + ParticipantInternal, Project, + SampleInternal, + SequencingGroupInternal, ) @@ -53,6 +53,8 @@ class LoaderKeys(enum.Enum): SAMPLES_FOR_PARTICIPANTS = 'samples_for_participants' SAMPLES_FOR_PROJECTS = 'samples_for_projects' + PHENOTYPES_FOR_PARTICIPANTS = 'phenotypes_for_participants' + PARTICIPANTS_FOR_IDS = 'participants_for_ids' PARTICIPANTS_FOR_FAMILIES = 'participants_for_families' PARTICIPANTS_FOR_PROJECTS = 'participants_for_projects' @@ -291,9 +293,7 @@ async def load_participants_for_ids( p_by_id = {p.id: p for p in persons} missing_pids = set(participant_ids) - set(p_by_id.keys()) if missing_pids: - raise NotFoundError( - f'Could not find participants with ids {missing_pids}' - ) + raise NotFoundError(f'Could not find participants with ids {missing_pids}') return [p_by_id.get(p) for p in participant_ids] @@ -400,7 +400,23 @@ async def load_analyses_for_sequencing_groups( return by_sg_id -async def get_context(request: Request, connection=get_projectless_db_connection): # pylint: disable=unused-argument +@connected_data_loader(LoaderKeys.PHENOTYPES_FOR_PARTICIPANTS) +async def load_phenotypes_for_participants( + participant_ids: list[int], connection +) -> list[dict]: + """ + Data loader for phenotypes for participants + """ + player = ParticipantLayer(connection) + participant_phenotypes = await player.get_phenotypes_for_participants( + participant_ids=participant_ids + ) + return [participant_phenotypes.get(pid, {}) for pid in participant_ids] + + +async def get_context( + request: Request, connection=get_projectless_db_connection +): # pylint: disable=unused-argument """Get loaders / cache context for strawberyy GraphQL""" mapped_loaders = {k: fn(connection) for k, fn in loaders.items()} return { diff --git a/api/graphql/schema.py b/api/graphql/schema.py index 8befa8867..7255821ed 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -14,16 +14,10 @@ from strawberry.fastapi import GraphQLRouter from strawberry.types import Info -from api.graphql.filters import ( - GraphQLFilter, - GraphQLMetaFilter, -) -from api.graphql.loaders import ( - get_context, - LoaderKeys, -) +from api.graphql.filters import GraphQLFilter, GraphQLMetaFilter +from api.graphql.loaders import LoaderKeys, get_context from db.python import enum_tables -from db.python.layers import AnalysisLayer, SequencingGroupLayer, SampleLayer +from db.python.layers import AnalysisLayer, SampleLayer, SequencingGroupLayer from db.python.layers.assay import AssayLayer from db.python.layers.family import FamilyLayer from db.python.tables.analysis import AnalysisFilter @@ -34,21 +28,19 @@ from db.python.utils import GenericFilter from models.enums import AnalysisStatus from models.models import ( - SampleInternal, - ParticipantInternal, - Project, AnalysisInternal, + AssayInternal, FamilyInternal, + ParticipantInternal, + Project, + SampleInternal, SequencingGroupInternal, - AssayInternal, ) from models.models.sample import sample_id_transform_to_raw -from models.utils.sample_id_format import ( - sample_id_format, -) +from models.utils.sample_id_format import sample_id_format from models.utils.sequencing_group_id_format import ( - sequencing_group_id_transform_to_raw, sequencing_group_id_format, + sequencing_group_id_transform_to_raw, ) enum_methods = {} @@ -336,6 +328,13 @@ async def samples( samples = await info.context[LoaderKeys.SAMPLES_FOR_PARTICIPANTS].load(q) return [GraphQLSample.from_internal(s) for s in samples] + @strawberry.field + async def phenotypes( + self, info: Info, root: 'GraphQLParticipant' + ) -> strawberry.scalars.JSON: + loader = info.context[LoaderKeys.PHENOTYPES_FOR_PARTICIPANTS] + return await loader.load(root.id) + @strawberry.field async def families( self, info: Info, root: 'GraphQLParticipant' diff --git a/api/routes/analysis.py b/api/routes/analysis.py index 0af9ade7b..1a0057dcc 100644 --- a/api/routes/analysis.py +++ b/api/routes/analysis.py @@ -8,12 +8,11 @@ from pydantic import BaseModel from starlette.responses import StreamingResponse -from api.utils.dates import parse_date_only_string from api.utils.db import ( - get_projectless_db_connection, + Connection, get_project_readonly_connection, get_project_write_connection, - Connection, + get_projectless_db_connection, ) from api.utils.export import ExportType from db.python.layers.analysis import AnalysisLayer @@ -22,20 +21,17 @@ from db.python.utils import GenericFilter from models.enums import AnalysisStatus from models.models.analysis import ( + Analysis, AnalysisInternal, ProjectSizeModel, - SequencingGroupSizeModel, - DateSizeModel, - Analysis, ) from models.utils.sample_id_format import ( sample_id_transform_to_raw_list, - sample_id_format, ) from models.utils.sequencing_group_id_format import ( + sequencing_group_id_format, sequencing_group_id_format_list, sequencing_group_id_transform_to_raw_list, - sequencing_group_id_format, ) router = APIRouter(prefix='/analysis', tags=['analysis']) @@ -326,40 +322,42 @@ async def get_sequencing_group_file_sizes( """ Get the per sample file size by type over the given projects and date range """ - atable = AnalysisLayer(connection) - - # Check access to projects - project_ids = None - pt = ProjectPermissionsTable(connection=connection.connection) - project_ids = await pt.get_project_ids_from_names_and_user( - connection.author, project_names, readonly=True - ) - - # Map from internal pids to project name - prj_name_map = dict(zip(project_ids, project_names)) - - # Convert dates - start = parse_date_only_string(start_date) - end = parse_date_only_string(end_date) - - # Get results with internal ids as keys - results = await atable.get_sequencing_group_file_sizes( - project_ids=project_ids, start_date=start, end_date=end - ) - - # Convert to the correct output type, converting internal ids to external - fixed_pids: list[Any] = [ - ProjectSizeModel( - project=prj_name_map[project_data['project']], - samples=[ - SequencingGroupSizeModel( - sample=sample_id_format(s['sample']), - dates=[DateSizeModel(**d) for d in s['dates']], - ) - for s in project_data['samples'] - ], - ) - for project_data in results - ] - return fixed_pids + raise NotImplementedError('This route is broken, and not properly implemented yet') + # atable = AnalysisLayer(connection) + + # # Check access to projects + # project_ids = None + # pt = ProjectPermissionsTable(connection=connection.connection) + # project_ids = await pt.get_project_ids_from_names_and_user( + # connection.author, project_names, readonly=True + # ) + + # # Map from internal pids to project name + # prj_name_map = dict(zip(project_ids, project_names)) + + # # Convert dates + # start = parse_date_only_string(start_date) + # end = parse_date_only_string(end_date) + + # # Get results with internal ids as keys + # results = await atable.get_sequencing_group_file_sizes( + # project_ids=project_ids, start_date=start, end_date=end + # ) + + # # Convert to the correct output type, converting internal ids to external + # fixed_pids: list[Any] = [ + # ProjectSizeModel( + # project=prj_name_map[project_data['project']], + # samples=[ + # SequencingGroupSizeModel( + # sample=sample_id_format(s['sample']), + # dates=[DateSizeModel(**d) for d in s['dates']], + # ) + # for s in project_data['samples'] + # ], + # ) + # for project_data in results + # ] + + # return fixed_pids diff --git a/db/python/connect.py b/db/python/connect.py index 0b9acf57a..8a5a7811e 100644 --- a/db/python/connect.py +++ b/db/python/connect.py @@ -121,7 +121,9 @@ def get_connection_string(self): if self.port: _host += f':{self.port}' - options = {} # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size} + options: dict[ + str, str | int + ] = {} # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size} _options = '&'.join(f'{k}={v}' for k, v in options.items()) url = f'mysql://{u_p}@{_host}/{self.dbname}?{_options}' diff --git a/db/python/enum_tables/enums.py b/db/python/enum_tables/enums.py index e6419b98b..113daf3d6 100644 --- a/db/python/enum_tables/enums.py +++ b/db/python/enum_tables/enums.py @@ -1,6 +1,7 @@ -import re import abc +import re from functools import lru_cache + from async_lru import alru_cache from db.python.connect import DbBase @@ -36,7 +37,8 @@ def _get_table_name(cls): matcher = table_name_matcher.match(tn) if not matcher: raise ValueError( - f'The tablename {tn} is not valid (must match {table_name_matcher.pattern})' + f'The tablename {tn} is not valid (must match ' + f'{table_name_matcher.pattern})' ) return tn @@ -47,9 +49,9 @@ async def get(self) -> list[str]: """ _query = f'SELECT DISTINCT name FROM {self._get_table_name()}' rows = await self.connection.fetch_all(_query) - rows = [r['name'] for r in rows] + nrows = [r['name'] for r in rows] - return rows + return nrows async def insert(self, value: str): """ diff --git a/db/python/layers/participant.py b/db/python/layers/participant.py index 216427e31..44d6d4db2 100644 --- a/db/python/layers/participant.py +++ b/db/python/layers/participant.py @@ -2,9 +2,9 @@ import re from collections import defaultdict from enum import Enum -from typing import Dict, List, Tuple, Optional, Any +from typing import Any, Dict, List, Optional, Tuple -from db.python.connect import NotFoundError, NoOpAenter +from db.python.connect import NoOpAenter, NotFoundError from db.python.layers.base import BaseLayer from db.python.layers.sample import SampleLayer from db.python.tables.family import FamilyTable @@ -335,6 +335,21 @@ async def fill_in_missing_participants(self): return f'Updated {len(sample_ids_to_update)} records' + async def insert_participant_phenotypes( + self, participant_phenotypes: dict[int, dict] + ): + """ + Insert participant phenotypes, with format: {pid: {key: value}} + """ + ppttable = ParticipantPhenotypeTable(self.connection) + return await ppttable.add_key_value_rows( + [ + (pid, pk, pv) + for pid, phenotypes in participant_phenotypes.items() + for pk, pv in phenotypes.items() + ] + ) + async def generic_individual_metadata_importer( self, headers: List[str], @@ -653,6 +668,17 @@ async def update_many_participant_external_ids( # region PHENOTYPES / SEQR + async def get_phenotypes_for_participants( + self, participant_ids: list[int] + ) -> dict[int, dict[str, Any]]: + """ + Get phenotypes for participants keyed by by pid + """ + ppttable = ParticipantPhenotypeTable(self.connection) + return await ppttable.get_key_value_rows_for_participant_ids( + participant_ids=participant_ids + ) + async def get_seqr_individual_template( self, project: int, diff --git a/db/python/layers/seqr.py b/db/python/layers/seqr.py index 8c203979a..32ef5f5e2 100644 --- a/db/python/layers/seqr.py +++ b/db/python/layers/seqr.py @@ -1,8 +1,8 @@ # pylint: disable=unnecessary-lambda-assignment,too-many-locals,broad-exception-caught +import asyncio import os import re -import asyncio import traceback from collections import defaultdict from datetime import datetime @@ -15,13 +15,14 @@ from cpg_utils.cloud import get_google_identity_token from api.settings import ( - SEQR_URL, SEQR_AUDIENCE, SEQR_MAP_LOCATION, SEQR_SLACK_NOTIFICATION_CHANNEL, + SEQR_URL, get_slack_token, ) from db.python.connect import Connection +from db.python.enum_tables import SequencingTypeTable from db.python.layers.analysis import AnalysisLayer from db.python.layers.base import BaseLayer from db.python.layers.family import FamilyLayer @@ -29,15 +30,14 @@ from db.python.layers.sequencing_group import SequencingGroupLayer from db.python.tables.analysis import AnalysisFilter from db.python.tables.project import ProjectPermissionsTable -from db.python.enum_tables import SequencingTypeTable -from db.python.utils import ProjectId, GenericFilter +from db.python.utils import GenericFilter, ProjectId from models.enums import AnalysisStatus # literally the most temporary thing ever, but for complete # automation need to have sample inclusion / exclusion from models.utils.sequencing_group_id_format import ( - sequencing_group_id_format_list, sequencing_group_id_format, + sequencing_group_id_format_list, ) SEQUENCING_GROUPS_TO_IGNORE = {22735, 22739} @@ -421,9 +421,9 @@ async def update_es_index( ) if len(es_index_analyses) == 0: - return [f'No ES index to synchronise'] + return ['No ES index to synchronise'] - with AnyPath(fn_path).open('w+') as f: + with AnyPath(fn_path).open('w+') as f: # type: ignore f.write('\n'.join(rows_to_write)) es_index = es_index_analyses[-1].output diff --git a/db/python/layers/sequencing_group.py b/db/python/layers/sequencing_group.py index 6d6a02a55..5ff5b133b 100644 --- a/db/python/layers/sequencing_group.py +++ b/db/python/layers/sequencing_group.py @@ -6,13 +6,13 @@ from db.python.tables.assay import AssayTable, NoOpAenter from db.python.tables.sample import SampleTable from db.python.tables.sequencing_group import ( - SequencingGroupTable, SequencingGroupFilter, + SequencingGroupTable, ) from db.python.utils import ProjectId from models.models.sequencing_group import ( - SequencingGroupUpsertInternal, SequencingGroupInternal, + SequencingGroupUpsertInternal, ) from models.utils.sequencing_group_id_format import sequencing_group_id_format @@ -133,7 +133,7 @@ async def get_participant_ids_sequencing_group_ids_for_sequencing_type( ( projects, pids, - ) = await self.seqgt.get_participant_ids_and_sequence_group_ids_for_sequencing_type( + ) = await self.seqgt.get_participant_ids_and_sequencing_group_ids_for_sequencing_type( sequencing_type ) if not pids: @@ -209,7 +209,7 @@ async def create_sequencing_group_from_assays( type_=next(iter(sequencing_types)), technology=next(iter(sequencing_technologies)), platform=next(iter(sequencing_platforms)), - sequence_ids=assay_ids, + assay_ids=assay_ids, meta=meta, ) return SequencingGroupInternal( @@ -217,7 +217,6 @@ async def create_sequencing_group_from_assays( type=next(iter(sequencing_types)), technology=next(iter(sequencing_technologies)), platform=next(iter(sequencing_platforms)), - sequence_ids=assay_ids, sample_id=next(iter(sample_ids)), meta=meta, assays=assays, @@ -249,7 +248,7 @@ async def recreate_sequencing_group_with_new_assays( technology=seqgroup.technology, platform=seqgroup.platform, meta={**seqgroup.meta, **meta}, - sequence_ids=assays, + assay_ids=assays, author=self.author, open_transaction=False, ) @@ -324,7 +323,7 @@ async def upsert_sequencing_groups( technology=sg.technology, platform=sg.platform, meta=sg.meta, - sequence_ids=assay_ids, + assay_ids=assay_ids, open_transaction=False, ) diff --git a/db/python/layers/web.py b/db/python/layers/web.py index 6fffe2e20..95979457e 100644 --- a/db/python/layers/web.py +++ b/db/python/layers/web.py @@ -16,12 +16,12 @@ from db.python.tables.project import ProjectPermissionsTable from db.python.tables.sequencing_group import SequencingGroupTable from models.models import ( + AssayInternal, + FamilySimpleInternal, NestedParticipantInternal, NestedSampleInternal, NestedSequencingGroupInternal, - AssayInternal, SearchItem, - FamilySimpleInternal, ) from models.models.web import ProjectSummaryInternal, WebProject @@ -82,7 +82,7 @@ def _project_summary_sample_query(self, grid_filter: list[SearchItem]): # the query to determine the total count, then take the selection of samples # for the current page. This is more efficient than doing 2 queries separately. sample_query = f""" - SELECT s.id, s.external_id, s.type, s.meta, s.participant_id + SELECT s.id, s.external_id, s.type, s.meta, s.participant_id, s.active FROM sample s LEFT JOIN assay a ON s.id = a.sample_id LEFT JOIN participant p ON p.id = s.participant_id @@ -189,6 +189,7 @@ def _project_summary_process_sample_rows( created_date=str(sample_id_start_times.get(s['id'], '')), sequencing_groups=sg_models_by_sample_id.get(s['id'], []), non_sequencing_assays=filtered_assay_models_by_sid.get(s['id'], []), + active=bool(ord(s['active'])), ) for s in sample_rows ] @@ -402,8 +403,8 @@ async def get_project_summary( sg_models_by_sample_id=seq_group_models_by_sample_id, sample_id_start_times=sample_id_start_times, ) - # the pydantic model is casting to the id to a str, as that makes sense on the front end - # but cast back here to do the lookup + # the pydantic model is casting to the id to a str, as that makes sense on + # the front end but cast back here to do the lookup sid_to_pid = {s['id']: s['participant_id'] for s in sample_rows} smodels_by_pid = group_by(smodels, lambda s: sid_to_pid[int(s.id)]) @@ -429,7 +430,7 @@ async def get_project_summary( reported_sex=None, reported_gender=None, karyotype=None, - project=self.project, + # project=self.project, ) ) elif pid not in pid_seen: @@ -445,7 +446,7 @@ async def get_project_summary( reported_sex=p['reported_sex'], reported_gender=p['reported_gender'], karyotype=p['karyotype'], - project=self.project, + # project=self.project, ) ) diff --git a/db/python/tables/analysis.py b/db/python/tables/analysis.py index d6d2fe41d..a1d4a4c82 100644 --- a/db/python/tables/analysis.py +++ b/db/python/tables/analysis.py @@ -2,15 +2,15 @@ import dataclasses from collections import defaultdict from datetime import datetime -from typing import List, Optional, Set, Tuple, Dict, Any +from typing import Any, Dict, List, Optional, Set, Tuple from db.python.connect import DbBase, NotFoundError from db.python.tables.project import ProjectId from db.python.utils import ( - to_db_json, - GenericFilterModel, GenericFilter, + GenericFilterModel, GenericMetaFilter, + to_db_json, ) from models.enums import AnalysisStatus from models.models.analysis import AnalysisInternal @@ -285,7 +285,7 @@ async def get_incomplete_analyses( """ Gets details of analysis with status queued or in-progress """ - _query = f""" + _query = """ SELECT a.id as id, a.type as type, a.status as status, a.output as output, a_sg.sequencing_group_id as sequencing_group_id, a.project as project, a.meta as meta @@ -339,7 +339,7 @@ async def get_latest_complete_analysis_for_sequencing_group_ids_by_type( if row['sequencing_group_id'] in seen_sequencing_group_ids: continue seen_sequencing_group_ids.add(row['sequencing_group_id']) - analyses.append(AnalysisInternal.from_db(**row)) + analyses.append(AnalysisInternal.from_db(**dict(row))) # reverse after timestamp_completed return analyses[::-1] @@ -439,7 +439,7 @@ async def get_sample_cram_path_map_for_seqr( seq_check = 'IN :seq_types' values['seq_types'] = sequencing_types - filters.append(f'JSON_VALUE(a.meta, "$.sequencing_type") ' + seq_check) + filters.append('JSON_VALUE(a.meta, "$.sequencing_type") ' + seq_check) if participant_ids: filters.append('p.id IN :pids') diff --git a/db/python/tables/project.py b/db/python/tables/project.py index d7c002c28..ac01e18bb 100644 --- a/db/python/tables/project.py +++ b/db/python/tables/project.py @@ -1,22 +1,21 @@ # pylint: disable=global-statement import asyncio -from typing import Dict, List, Set, Iterable, Optional, Tuple, Any - import json from datetime import datetime, timedelta +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple +from cpg_utils.cloud import get_cached_group_members from databases import Database from google.cloud import secretmanager -from cpg_utils.cloud import get_cached_group_members from api.settings import MEMBERS_CACHE_LOCATION, is_all_access from db.python.utils import ( - ProjectId, Forbidden, + InternalError, NoProjectAccess, + ProjectId, get_logger, to_db_json, - InternalError, ) from models.models.project import Project @@ -440,9 +439,9 @@ async def get_seqr_projects(self) -> list[dict[str, Any]]: projects = [] for r in await self.connection.fetch_all(_query): - r = dict(r) - r['meta'] = json.loads(r['meta'] or '{}') - projects.append(r) + row = dict(r) + row['meta'] = json.loads(row['meta'] or '{}') + projects.append(row) return projects diff --git a/db/python/tables/sequencing_group.py b/db/python/tables/sequencing_group.py index a3a83e864..fefa36116 100644 --- a/db/python/tables/sequencing_group.py +++ b/db/python/tables/sequencing_group.py @@ -6,11 +6,11 @@ from db.python.connect import DbBase, NoOpAenter, NotFoundError from db.python.utils import ( - ProjectId, - to_db_json, - GenericFilterModel, GenericFilter, + GenericFilterModel, GenericMetaFilter, + ProjectId, + to_db_json, ) from models.models.sequencing_group import SequencingGroupInternal @@ -125,10 +125,10 @@ async def get_sequencing_groups_by_ids( f'Couldn\'t find sequencing groups with internal id {ids})' ) - rows = [SequencingGroupInternal.from_db(**dict(r)) for r in rows] - projects = set(r.project for r in rows) + sg_rows = [SequencingGroupInternal.from_db(**dict(r)) for r in rows] + projects = set(r.project for r in sg_rows) - return projects, rows + return projects, sg_rows async def get_assay_ids_by_sequencing_group_ids( self, ids: list[int] @@ -172,7 +172,7 @@ async def get_all_sequencing_group_ids_by_sample_ids_by_type( return sequencing_group_ids_by_sample_ids_by_type - async def get_participant_ids_and_sequence_group_ids_for_sequencing_type( + async def get_participant_ids_and_sequencing_group_ids_for_sequencing_type( self, sequencing_type: str ) -> tuple[set[ProjectId], dict[int, list[int]]]: """ @@ -252,7 +252,7 @@ async def create_sequencing_group( type_: str, technology: str, platform: str, - sequence_ids: list[int], + assay_ids: list[int], meta: dict = None, author: str = None, open_transaction=True, @@ -319,16 +319,16 @@ async def create_sequencing_group( _query, {**values, 'author': author or self.author}, ) - sequence_insert_values = [ + assay_id_insert_values = [ { 'seqgroup': id_of_seq_group, 'assayid': s, 'author': author or self.author, } - for s in sequence_ids + for s in assay_ids ] await self.connection.execute_many( - _seqg_linker_query, sequence_insert_values + _seqg_linker_query, assay_id_insert_values ) return id_of_seq_group diff --git a/etl/extract/main.py b/etl/extract/main.py index 9de820e46..6b05cbbaf 100644 --- a/etl/extract/main.py +++ b/etl/extract/main.py @@ -8,7 +8,8 @@ import functions_framework import google.cloud.bigquery as bq from cpg_utils.cloud import email_from_id_token -from google.cloud import pubsub_v1 + +from google.cloud import pubsub_v1 # type: ignore BIGQUERY_TABLE = os.getenv('BIGQUERY_TABLE') PUBSUB_TOPIC = os.getenv('PUBSUB_TOPIC') @@ -77,9 +78,7 @@ def etl_extract(request: flask.Request): bq_client = bq.Client() pubsub_client = pubsub_v1.PublisherClient() - errors = bq_client.insert_rows_json( - BIGQUERY_TABLE, [bq_obj | {'body': jbody_str}] - ) + errors = bq_client.insert_rows_json(BIGQUERY_TABLE, [bq_obj | {'body': jbody_str}]) if errors: return { diff --git a/metamist/parser/generic_parser.py b/metamist/parser/generic_parser.py index 6c7c8f4ff..fbef07cc9 100644 --- a/metamist/parser/generic_parser.py +++ b/metamist/parser/generic_parser.py @@ -1,47 +1,45 @@ # pylint: disable=too-many-lines,too-many-instance-attributes,too-many-locals,unused-argument,assignment-from-none,invalid-name,ungrouped-imports -import json -import sys import asyncio import csv +import json import logging import os import re +import sys from abc import abstractmethod from collections import defaultdict +from functools import wraps from io import StringIO from typing import ( - List, + Any, + Coroutine, Dict, - Union, - Optional, - Tuple, + Hashable, + Iterable, + Iterator, + List, Match, - Any, + Optional, Sequence, - TypeVar, - Iterator, - Coroutine, Set, - Iterable, - Hashable, + Tuple, + TypeVar, + Union, ) -from functools import wraps from cloudpathlib import AnyPath -from metamist.graphql import query_async, gql -from metamist.parser.cloudhelper import CloudHelper, group_by - -from metamist.apis import SampleApi, AssayApi, AnalysisApi, ParticipantApi +from metamist.apis import AnalysisApi, AssayApi, ParticipantApi, SampleApi +from metamist.graphql import gql, query_async from metamist.models import ( Analysis, AnalysisStatus, + AssayUpsert, ParticipantUpsert, SampleUpsert, SequencingGroupUpsert, - AssayUpsert, ) - +from metamist.parser.cloudhelper import CloudHelper, group_by # https://mypy.readthedocs.io/en/stable/runtime_troubles.html#using-new-additions-to-the-typing-module if sys.version_info >= (3, 8): @@ -322,8 +320,8 @@ def __init__( def to_sm(self) -> AssayUpsert: """Convert to SM upsert model""" return AssayUpsert( - type=self.assay_type, id=self.internal_id, + type=self.assay_type, external_ids=self.external_ids, # sample_id=self.s, meta=self.meta, @@ -1084,7 +1082,9 @@ async def add_analyses(self, analyses_to_add, external_to_internal_id_map): for external_id, analysis in chunked_analysis: # TODO: resolve this external_to_internal_id_map # this one is going to be slightly harder : - analysis.sequence_group_ids = [external_to_internal_id_map[external_id]] + analysis.sequencing_group_ids = [ + external_to_internal_id_map[external_id] + ] promises.append( analysisapi.create_analysis_async( project=proj, analysis_model=analysis diff --git a/models/base.py b/models/base.py index 133671761..389c38a56 100644 --- a/models/base.py +++ b/models/base.py @@ -2,7 +2,7 @@ # annotate any external objects that must be instantiated with this # type to force openapi generator to allow for Nones (it will actually allow Any) -OpenApiGenNoneType = bytes +OpenApiGenNoneType = bytes | None class SMBase(BaseModel): diff --git a/models/models/__init__.py b/models/models/__init__.py index 24069f708..37f1068a9 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -1,60 +1,61 @@ from models.models.analysis import ( - AnalysisInternal, Analysis, + AnalysisInternal, DateSizeModel, - SequencingGroupSizeModel, ProjectSizeModel, + SequencingGroupSizeModel, ) from models.models.assay import ( - AssayInternal, - AssayUpsertInternal, Assay, + AssayInternal, AssayUpsert, + AssayUpsertInternal, ) from models.models.family import ( - FamilySimpleInternal, + Family, FamilyInternal, FamilySimple, - Family, + FamilySimpleInternal, PedRowInternal, ) from models.models.participant import ( - ParticipantInternal, + NestedParticipant, NestedParticipantInternal, - ParticipantUpsertInternal, Participant, - NestedParticipant, + ParticipantInternal, ParticipantUpsert, + ParticipantUpsertInternal, ) from models.models.project import Project from models.models.sample import ( - SampleInternal, + NestedSample, NestedSampleInternal, - SampleUpsertInternal, Sample, - NestedSample, + SampleInternal, SampleUpsert, + SampleUpsertInternal, ) from models.models.search import ( - SearchResponseData, + ErrorResponse, FamilySearchResponseData, ParticipantSearchResponseData, SampleSearchResponseData, - ErrorResponse, - SearchResponse, SearchItem, + SearchResponse, + SearchResponseData, + SequencingGroupSearchResponseData, ) from models.models.sequencing_group import ( - SequencingGroupInternal, + NestedSequencingGroup, NestedSequencingGroupInternal, - SequencingGroupUpsertInternal, SequencingGroup, - NestedSequencingGroup, + SequencingGroupInternal, SequencingGroupUpsert, + SequencingGroupUpsertInternal, ) from models.models.web import ( + PagingLinks, + ProjectSummary, ProjectSummaryInternal, WebProject, - ProjectSummary, - PagingLinks, ) diff --git a/models/models/analysis.py b/models/models/analysis.py index a88d6dd99..f35e95a48 100644 --- a/models/models/analysis.py +++ b/models/models/analysis.py @@ -15,7 +15,7 @@ class AnalysisInternal(SMBase): """Model for Analysis""" - id: int | None + id: int | None = None type: str status: AnalysisStatus output: str = None diff --git a/models/models/assay.py b/models/models/assay.py index 6e17a6ef7..04658ad44 100644 --- a/models/models/assay.py +++ b/models/models/assay.py @@ -1,11 +1,8 @@ import json from typing import Any -from models.base import SMBase, OpenApiGenNoneType -from models.utils.sample_id_format import ( - sample_id_format, - sample_id_transform_to_raw, -) +from models.base import OpenApiGenNoneType, SMBase +from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw class AssayInternal(SMBase): @@ -104,12 +101,12 @@ def to_internal(self): _sample_id = None if self.sample_id: # but may be provided directly when inserting directly - _sample_id = sample_id_transform_to_raw(self.sample_id) + _sample_id = sample_id_transform_to_raw(self.sample_id) # type: ignore return AssayUpsertInternal( - id=self.id, - type=self.type, - external_ids=self.external_ids, - sample_id=_sample_id, - meta=self.meta, + id=self.id, # type: ignore + type=self.type, # type: ignore + external_ids=self.external_ids, # type: ignore + sample_id=_sample_id, # type: ignore + meta=self.meta, # type: ignore ) diff --git a/models/models/participant.py b/models/models/participant.py index c72451274..ab843e343 100644 --- a/models/models/participant.py +++ b/models/models/participant.py @@ -1,14 +1,14 @@ import json from db.python.utils import ProjectId -from models.base import SMBase, OpenApiGenNoneType +from models.base import OpenApiGenNoneType, SMBase +from models.models.family import FamilySimple, FamilySimpleInternal from models.models.sample import ( - SampleUpsertInternal, - SampleUpsert, - NestedSampleInternal, NestedSample, + NestedSampleInternal, + SampleUpsert, + SampleUpsertInternal, ) -from models.models.family import FamilySimple, FamilySimpleInternal class ParticipantInternal(SMBase): @@ -135,12 +135,12 @@ class ParticipantUpsert(SMBase): def to_internal(self): """Convert to internal model, doesn't really do much""" p = ParticipantUpsertInternal( - id=self.id, - external_id=self.external_id, - reported_sex=self.reported_sex, - reported_gender=self.reported_gender, - karyotype=self.karyotype, - meta=self.meta, + id=self.id, # type: ignore + external_id=self.external_id, # type: ignore + reported_sex=self.reported_sex, # type: ignore + reported_gender=self.reported_gender, # type: ignore + karyotype=self.karyotype, # type: ignore + meta=self.meta, # type: ignore ) if self.samples: diff --git a/models/models/sample.py b/models/models/sample.py index 94fd3a901..a41c06463 100644 --- a/models/models/sample.py +++ b/models/models/sample.py @@ -1,17 +1,14 @@ import json -from models.base import SMBase, OpenApiGenNoneType -from models.models.assay import AssayUpsertInternal, AssayUpsert, AssayInternal, Assay +from models.base import OpenApiGenNoneType, SMBase +from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal from models.models.sequencing_group import ( - SequencingGroupUpsert, NestedSequencingGroup, - SequencingGroupUpsertInternal, NestedSequencingGroupInternal, + SequencingGroupUpsert, + SequencingGroupUpsertInternal, ) -from models.utils.sample_id_format import ( - sample_id_format, - sample_id_transform_to_raw, -) +from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw class SampleInternal(SMBase): @@ -143,6 +140,7 @@ def to_internal(self): type=self.type, participant_id=self.participant_id, active=self.active, + author='', ) @@ -180,12 +178,12 @@ def to_internal(self) -> SampleUpsertInternal: sample_upsert = SampleUpsertInternal( id=_id, - external_id=self.external_id, - meta=self.meta, - project=self.project, - type=self.type, - participant_id=self.participant_id, - active=self.active, + external_id=self.external_id, # type: ignore + meta=self.meta, # type: ignore + project=self.project, # type: ignore + type=self.type, # type: ignore + participant_id=self.participant_id, # type: ignore + active=self.active, # type: ignore ) if self.sequencing_groups: diff --git a/models/models/sequencing_group.py b/models/models/sequencing_group.py index 36e250c73..1b0bbd3f4 100644 --- a/models/models/sequencing_group.py +++ b/models/models/sequencing_group.py @@ -1,11 +1,11 @@ import json -from models.base import SMBase, OpenApiGenNoneType -from models.models.assay import AssayUpsert, AssayUpsertInternal, Assay, AssayInternal -from models.utils.sample_id_format import sample_id_transform_to_raw, sample_id_format +from models.base import OpenApiGenNoneType, SMBase +from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal +from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw from models.utils.sequencing_group_id_format import ( - sequencing_group_id_transform_to_raw, sequencing_group_id_format, + sequencing_group_id_transform_to_raw, ) @@ -60,6 +60,7 @@ def to_external(self): type=self.type, technology=self.technology, platform=self.platform, + external_ids=self.external_ids, meta=self.meta, sample_id=sample_id_format(self.sample_id), assays=[a.to_external() for a in self.assays or []], @@ -141,6 +142,7 @@ class SequencingGroup(SMBase): sample_id: str external_ids: dict[str, str] archived: bool + assays: list[Assay] class NestedSequencingGroup(SMBase): @@ -169,7 +171,7 @@ class SequencingGroupUpsert(SMBase): sample_id: str | OpenApiGenNoneType = None external_ids: dict[str, str] | OpenApiGenNoneType = None - assays: list[AssayUpsert] | None = None + assays: list[AssayUpsert] | OpenApiGenNoneType = None def to_internal(self) -> SequencingGroupUpsertInternal: """ @@ -185,15 +187,15 @@ def to_internal(self) -> SequencingGroupUpsertInternal: sg_internal = SequencingGroupUpsertInternal( id=_id, - type=self.type, - technology=self.technology, - platform=self.platform.lower() if self.platform else None, - meta=self.meta, + type=self.type, # type: ignore + technology=self.technology, # type: ignore + platform=self.platform.lower() if self.platform else None, # type: ignore + meta=self.meta, # type: ignore sample_id=_sample_id, - external_ids=self.external_ids or {}, + external_ids=self.external_ids or {}, # type: ignore ) if self.assays is not None: - sg_internal.assays = [a.to_internal() for a in self.assays] + sg_internal.assays = [a.to_internal() for a in self.assays] # type: ignore return sg_internal diff --git a/mypy.ini b/mypy.ini index 3403e0acb..418e757ed 100644 --- a/mypy.ini +++ b/mypy.ini @@ -4,10 +4,15 @@ python_version = 3.10 ; warn_return_any = True ; warn_unused_configs = True + exclude = (build|update_sample_status) + # Per-module options: plugins = strawberry.ext.mypy_plugin +[mypy.db] +disable_error_code = operator + [mypy-sample_metadata.*] ignore_errors = true @@ -44,3 +49,5 @@ ignore_missing_imports=True ignore_missing_imports = True [mypy-graphql] ignore_missing_imports = True +[mypy-strawberry] +ignore_errors = True diff --git a/regenerate_api.py b/regenerate_api.py index 07112163e..8e6a9fece 100755 --- a/regenerate_api.py +++ b/regenerate_api.py @@ -184,7 +184,7 @@ def generate_schema_file(): Generate schema file and place in the metamist/graphql/ directory """ command = ['strawberry', 'export-schema', 'api.graphql.schema:schema'] - schema = subprocess.check_output(command, stderr=subprocess.STDOUT).decode() + schema = subprocess.check_output(command).decode() with open(os.path.join(MODULE_DIR, 'graphql/schema.graphql'), 'w+') as f: f.write(schema) @@ -317,7 +317,7 @@ def main(): while (not check_if_server_is_accessible()) and startup_tries > 0: startup_tries -= 1 logger.info( - f'Dockerised API server is not ready yet. ' + 'Dockerised API server is not ready yet. ' + f'Retrying in {wait_time_in_seconds} seconds. ' + f'Remaining tries: {startup_tries}' ) diff --git a/requirements.txt b/requirements.txt index b5cc04e02..2affefbb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ google-cloud-logging==2.7.0 google-cloud-storage==1.43.0 uvicorn==0.18.3 fastapi[all]==0.85.1 -strawberry-graphql[fastapi]==0.177.1 +strawberry-graphql[fastapi]==0.206.0 python-multipart==0.0.5 databases[mysql]==0.6.1 SQLAlchemy==1.4.41 diff --git a/scripts/20230420_sequencinggroupmigration.py b/scripts/20230420_sequencinggroupmigration.py index a3d2a705a..3feeecdd5 100644 --- a/scripts/20230420_sequencinggroupmigration.py +++ b/scripts/20230420_sequencinggroupmigration.py @@ -27,7 +27,7 @@ import json from collections import defaultdict from textwrap import dedent -from typing import Any, List, Dict, Tuple +from typing import Any, Dict, List, Tuple import click from databases import Database @@ -338,7 +338,7 @@ async def migrate_analyses(connection: Database, dry_run: bool = True): ) analysis_samples = await connection.fetch_all(analyses_query) - sequence_group_ids_of_duplicate_samples_query = dedent( + sequencing_group_ids_of_duplicate_samples_query = dedent( """ SELECT sg.sample_id, sg.id, sg.type FROM sequencing_group sg @@ -352,13 +352,13 @@ async def migrate_analyses(connection: Database, dry_run: bool = True): ORDER BY sg.sample_id DESC; """ ) - sequence_group_ids_of_duplicate_samples = await connection.fetch_all( - sequence_group_ids_of_duplicate_samples_query + sequencing_group_ids_of_duplicate_samples = await connection.fetch_all( + sequencing_group_ids_of_duplicate_samples_query ) duplicate_sg_id_map: Dict[ SampleId, Dict[SequenceType, SequenceGroupId] ] = defaultdict(dict) - for row in sequence_group_ids_of_duplicate_samples: + for row in sequencing_group_ids_of_duplicate_samples: duplicate_sg_id_map[row['sample_id']][row['type']] = row['id'] values_to_insert: List[Tuple[int, SequenceGroupId]] = [] diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py index fd1e74381..c14f13d79 100755 --- a/scripts/create_test_subset.py +++ b/scripts/create_test_subset.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 -# pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument,too-many-arguments +# type: ignore +# pylint: skip-file + + +# # pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument,too-many-arguments """ Example Invocation @@ -11,7 +15,7 @@ This example will populate acute-care-test with the metamist data for 4 families. """ -from typing import Optional +import csv import logging import os import random @@ -19,25 +23,19 @@ import traceback import typing from collections import Counter -import csv +from typing import Optional import click from google.cloud import storage from metamist import exceptions -from metamist.apis import ( - AnalysisApi, - AssayApi, - SampleApi, - FamilyApi, - ParticipantApi, -) +from metamist.apis import AnalysisApi, AssayApi, FamilyApi, ParticipantApi, SampleApi from metamist.models import ( - BodyGetAssaysByCriteria, - AssayUpsert, - SampleUpsert, Analysis, AnalysisStatus, + AssayUpsert, + BodyGetAssaysByCriteria, + SampleUpsert, ) logger = logging.getLogger(__file__) @@ -313,7 +311,7 @@ def main( ) logger.info(f'Creating {a_type} analysis entry in test') aapi.create_analysis(project=target_project, analysis=am) - logger.info(f'-') + logger.info('-') def transfer_families( diff --git a/scripts/parse_ont_sheet.py b/scripts/parse_ont_sheet.py index 82998215d..a157e0ff6 100644 --- a/scripts/parse_ont_sheet.py +++ b/scripts/parse_ont_sheet.py @@ -1,15 +1,15 @@ #!/usr/bin/env python3 # pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument -from typing import List import logging +from typing import List import click -from metamist.parser.generic_parser import ParsedSample, ParsedSequencingGroup from metamist.parser.generic_metadata_parser import ( - run_as_sync, GenericMetadataParser, + run_as_sync, ) +from metamist.parser.generic_parser import ParsedSample, ParsedSequencingGroup logger = logging.getLogger(__file__) logger.addHandler(logging.StreamHandler()) @@ -97,14 +97,14 @@ def parse_fastqs_structure(fastqs) -> List[List[str]]: return [fastqs] async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup]: - sequence_groups = await super().group_assays(sample) + sequencing_groups = await super().group_assays(sample) - for sequence_group in sequence_groups: + for sequencing_group in sequencing_groups: failed_fastqs: list[str] = [] - for r in sequence_group.rows: + for r in sequencing_group.rows: parsed_failed_fastqs = await self.parse_files( - sequence_group.sample.external_sid, r[Columns.FAIL_FASTQ_FILENAME] + sequencing_group.sample.external_sid, r[Columns.FAIL_FASTQ_FILENAME] ) if 'reads' not in parsed_failed_fastqs: raise ValueError( @@ -120,9 +120,9 @@ async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup ) failed_fastqs.extend(parsed_failed_fastq_reads['fastq']) - sequence_group.meta['failed_reads'] = failed_fastqs + sequencing_group.meta['failed_reads'] = failed_fastqs - return sequence_groups + return sequencing_groups @click.command() diff --git a/scripts/parse_ped.py b/scripts/parse_ped.py index 894d0c241..831ddd402 100644 --- a/scripts/parse_ped.py +++ b/scripts/parse_ped.py @@ -1,12 +1,9 @@ """ A really simple script to import a pedigree file """ import click +from cloudpathlib import AnyPath -from cloudpathlib import CloudPath - -from metamist.apis import ( - FamilyApi, -) +from metamist.apis import FamilyApi @click.command() @@ -17,7 +14,8 @@ def main(ped_file_path: str, project: str): fapi = FamilyApi() - with CloudPath(ped_file_path).open() as ped_file: + # pylint: disable=no-member + with AnyPath(ped_file_path).open() as ped_file: fapi.import_pedigree( file=ped_file, has_header=True, diff --git a/scripts/sync_seqr.py b/scripts/sync_seqr.py index 67967e437..12be0fd93 100644 --- a/scripts/sync_seqr.py +++ b/scripts/sync_seqr.py @@ -1,27 +1,28 @@ # pylint: disable=missing-timeout,unnecessary-lambda-assignment,import-outside-toplevel,too-many-locals import asyncio -import os -import re -import json import datetime +import json import logging +import os +import re import traceback from collections import defaultdict -from typing import Any from io import StringIO +from typing import Any import aiohttp import yaml from cloudpathlib import AnyPath -from metamist.graphql import query_async -from metamist.model.analysis_status import AnalysisStatus -from metamist.model.export_type import ExportType -from metamist.model.analysis_query_model import AnalysisQueryModel + from metamist.apis import ( - SeqrApi, - ProjectApi, AnalysisApi, + ProjectApi, + SeqrApi, ) +from metamist.graphql import query_async +from metamist.model.analysis_query_model import AnalysisQueryModel +from metamist.model.analysis_status import AnalysisStatus +from metamist.model.export_type import ExportType from metamist.parser.generic_parser import chunk loggers_to_silence = [ @@ -410,7 +411,7 @@ async def update_es_index( fn_path = os.path.join(MAP_LOCATION, filename) # pylint: disable=no-member - with AnyPath(fn_path).open('w+') as f: + with AnyPath(fn_path).open('w+') as f: # type: ignore f.write('\n'.join(rows_to_write)) if check_metamist: # len(es_index_analyses) > 0: @@ -678,7 +679,9 @@ def sync_all_datasets(sequencing_type: str, ignore: set[str] = None): continue try: el.run_until_complete( - sync_dataset_async(project_name, seqr_guid, sequencing_type=sequencing_type) + sync_dataset_async( + project_name, seqr_guid, sequencing_type=sequencing_type + ) ) except Exception as e: # pylint: disable=broad-exception-caught print( diff --git a/test/test_analysis.py b/test/test_analysis.py index ec03f0702..ef7e1acac 100644 --- a/test/test_analysis.py +++ b/test/test_analysis.py @@ -1,21 +1,20 @@ # pylint: disable=invalid-overridden-method -from datetime import timedelta, datetime - +from datetime import datetime, timedelta from test.testbase import DbIsolatedTest, run_as_sync -from db.python.tables.analysis import AnalysisFilter -from db.python.utils import GenericFilter -from db.python.layers.assay import AssayLayer from db.python.layers.analysis import AnalysisLayer +from db.python.layers.assay import AssayLayer from db.python.layers.sample import SampleLayer from db.python.layers.sequencing_group import SequencingGroupLayer +from db.python.tables.analysis import AnalysisFilter +from db.python.utils import GenericFilter +from models.enums import AnalysisStatus from models.models import ( AnalysisInternal, AssayUpsertInternal, - SequencingGroupUpsertInternal, SampleUpsertInternal, + SequencingGroupUpsertInternal, ) -from models.enums import AnalysisStatus class TestAnalysis(DbIsolatedTest): @@ -132,7 +131,7 @@ async def test_get_analysis(self): AnalysisInternal( type='analysis-runner', status=AnalysisStatus.UNKNOWN, - sequence_group_ids=[], + sequencing_group_ids=[], meta={}, ) ) @@ -148,7 +147,7 @@ async def test_get_analysis(self): id=a_id, type='analysis-runner', status=AnalysisStatus.UNKNOWN, - sequence_group_ids=[], + sequencing_group_ids=[], output=None, timestamp_completed=None, project=1, diff --git a/test/test_assay.py b/test/test_assay.py index af641a278..cde5f242a 100644 --- a/test/test_assay.py +++ b/test/test_assay.py @@ -1,10 +1,11 @@ from test.testbase import DbIsolatedTest, run_as_sync + from pymysql.err import IntegrityError from db.python.connect import NotFoundError -from db.python.layers.sample import SampleLayer -from db.python.layers.assay import AssayLayer from db.python.enum_tables import AssayTypeTable +from db.python.layers.assay import AssayLayer +from db.python.layers.sample import SampleLayer from db.python.tables.assay import AssayFilter from db.python.utils import GenericFilter from models.models.assay import AssayUpsertInternal @@ -200,11 +201,20 @@ async def test_getting_assay_by_external_id(self): ) ) - fquery_1 = AssayFilter(external_id='SEQ01', project=self.project_id) + fquery_1 = AssayFilter( + external_id=GenericFilter(eq='SEQ01'), + project=GenericFilter(eq=self.project_id), + ) self.assertEqual(seq1.id, (await self.assaylayer.query(fquery_1))[0].id) - fquery_2 = AssayFilter(external_id='EXT_SEQ1', project=self.project_id) + fquery_2 = AssayFilter( + external_id=GenericFilter(eq='EXT_SEQ1'), + project=GenericFilter(eq=self.project_id), + ) self.assertEqual(seq1.id, (await self.assaylayer.query(fquery_2))[0].id) - fquery_3 = AssayFilter(external_id='SEQ02', project=self.project_id) + fquery_3 = AssayFilter( + external_id=GenericFilter(eq='SEQ02'), + project=GenericFilter(eq=self.project_id), + ) self.assertEqual(seq2.id, (await self.assaylayer.query(fquery_3))[0].id) @run_as_sync @@ -285,20 +295,22 @@ async def search_result_to_ids(filter_: AssayFilter): ) self.assertSetEqual( {seq1_id, seq2_id}, - await search_result_to_ids(AssayFilter(meta={'common': 'common'})), + await search_result_to_ids( + AssayFilter(meta={'common': GenericFilter(eq='common')}) + ), ) # sample meta self.assertSetEqual( {seq1_id, seq2_id}, await search_result_to_ids( - AssayFilter(sample_meta={'collection-year': '2022'}) + AssayFilter(sample_meta={'collection-year': GenericFilter(eq='2022')}) ), ) self.assertSetEqual( set(), await search_result_to_ids( - AssayFilter(sample_meta={'unknown_key': '2022'}) + AssayFilter(sample_meta={'unknown_key': GenericFilter(eq='2022')}) ), ) @@ -315,7 +327,7 @@ async def search_result_to_ids(filter_: AssayFilter): {seq2_id}, await search_result_to_ids( AssayFilter( - sample_meta={'collection-year': '2022'}, + sample_meta={'collection-year': GenericFilter(eq='2022')}, external_id=GenericFilter(in_=['SEQ02']), ) ), diff --git a/test/test_generic_auditor.py b/test/test_generic_auditor.py index 4fb526dc9..21ef3f6b5 100644 --- a/test/test_generic_auditor.py +++ b/test/test_generic_auditor.py @@ -1,16 +1,16 @@ -from collections import namedtuple import unittest -from unittest.mock import MagicMock, patch +import unittest.mock +from collections import namedtuple + from metamist.audit.generic_auditor import GenericAuditor -# pylint: disable=dangerous-default-value # noqa: B006 class TestGenericAuditor(unittest.TestCase): """Test the audit helper functions""" - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_get_participant_data_for_dataset(self, mock_query): """Only participants with a non-empty samples field should be returned""" auditor = GenericAuditor( @@ -360,7 +360,7 @@ def test_get_sequence_mapping_warning_logging(self): log.output[0], ) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_genome_analyses_crams(self, mock_query): """Test that only the genome analysis crams for a sample map dictionary are returned""" auditor = GenericAuditor( @@ -412,7 +412,7 @@ def test_query_genome_analyses_crams(self, mock_query): self.assertDictEqual(test_result, expected_result) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_genome_and_exome_analyses_crams(self, mock_query): """Test that both the genome and exome analysis crams for a sample map dictionary are returned""" auditor = GenericAuditor( @@ -472,7 +472,7 @@ def test_query_genome_and_exome_analyses_crams(self, mock_query): self.assertDictEqual(test_result, expected_result) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_broken_analyses_crams(self, mock_query): """ All analysis crams must have 'sequencing_type' meta field, @@ -506,7 +506,7 @@ def test_query_broken_analyses_crams(self, mock_query): assay_sg_id_map={1: 'CPG123'} ) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_query_analyses_crams_warning(self, mock_query): """Warn if the sample_ids field is absent and the sample meta field is used instead""" auditor = GenericAuditor( @@ -541,7 +541,7 @@ def test_query_analyses_crams_warning(self, mock_query): log.output[0], ) - @patch('metamist.audit.generic_auditor.query') + @unittest.mock.patch('metamist.audit.generic_auditor.query') def test_analyses_for_sgs_without_crams(self, mock_query): """Log any analyses found for samples without completed CRAMs""" auditor = GenericAuditor( @@ -569,7 +569,9 @@ def test_analyses_for_sgs_without_crams(self, mock_query): } with self.assertLogs(level='WARNING') as log: - _ = auditor.analyses_for_sgs_without_crams(sgs_without_crams) + # catch the warning logs from here and check below + auditor.analyses_for_sgs_without_crams(sgs_without_crams) + self.assertEqual(len(log.output), 8) # 8 analysis types checked self.assertEqual(len(log.records), 8) self.assertIn( @@ -577,7 +579,21 @@ def test_analyses_for_sgs_without_crams(self, mock_query): log.output[0], ) - def test_get_complete_and_incomplete_sgs(self): + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.get_gcs_bucket_subdirs_to_search' + ) + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.find_files_in_gcs_buckets_subdirs' + ) + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.analyses_for_sgs_without_crams' + ) + def test_get_complete_and_incomplete_sgs( + self, + mock_analyses_for_sgs_without_crams, + mock_find_files_in_gcs_buckets_subdirs, + mock_get_gcs_bucket_subdirs, + ): """Report on samples that have completed CRAMs and those that dont""" assay_sg_id_map = { # noqa: B006 1: 'CPG123', @@ -591,17 +607,15 @@ def test_get_complete_and_incomplete_sgs(self): auditor = GenericAuditor( dataset='dev', sequencing_type=['genome', 'exome'], file_types=('fastq',) ) - auditor.get_gcs_bucket_subdirs_to_search = MagicMock() - auditor.find_files_in_gcs_buckets_subdirs = MagicMock() - auditor.analyses_for_sgs_without_crams = MagicMock() - auditor.get_gcs_bucket_subdirs_to_search.return_value = { + mock_get_gcs_bucket_subdirs.return_value = { 'cpg-dataset-main': ['cram', 'exome/cram'] } - auditor.find_files_in_gcs_buckets_subdirs.return_value = [ + mock_find_files_in_gcs_buckets_subdirs.return_value = [ 'gs://cpg-dataset-main/cram/CPG123.cram', 'gs://cpg-dataset-main/exome/cram/CPG456.cram', ] + mock_analyses_for_sgs_without_crams.return_value = None result = auditor.get_complete_and_incomplete_sgs( assay_sg_id_map=assay_sg_id_map, @@ -615,8 +629,16 @@ def test_get_complete_and_incomplete_sgs(self): self.assertDictEqual(result, expected_result) - async def test_check_for_uningested_or_moved_assays(self): - """Test 2 ingested reads, one ingested and moved read, and one uningested read""" + @unittest.mock.patch('metamist.audit.generic_auditor.GenericAuditor.file_size') + @unittest.mock.patch( + 'metamist.audit.generic_auditor.GenericAuditor.find_sequence_files_in_gcs_bucket' + ) + async def test_check_for_uningested_or_moved_assays( + self, mock_find_sequence_files_in_gcs_bucket, mock_file_size + ): + """ + Test 2 ingested reads, one ingested and moved read, and one uningested read + """ auditor = GenericAuditor( dataset='dev', sequencing_type=['genome'], file_types=('fastq',) ) @@ -627,16 +649,14 @@ async def test_check_for_uningested_or_moved_assays(self): sg_sample_id_map = {'CPG123': 'EXT123'} assay_sg_id_map = {1: 'CPG123'} sample_internal_external_id_map = {'CPG123': 'EXT123'} - auditor.find_sequence_files_in_gcs_bucket = MagicMock() - auditor.find_sequence_files_in_gcs_bucket.return_value = [ + mock_find_sequence_files_in_gcs_bucket.return_value = [ 'read1.fq', 'read2.fq', 'dir2/read3.fq', 'read4.fq', ] - auditor.file_size = MagicMock() - auditor.file_size.return_value = 12 + mock_file_size.return_value = 12 ( uningested_sequence_paths, diff --git a/test/test_generic_filters.py b/test/test_generic_filters.py index 343047c96..cc01e7f80 100644 --- a/test/test_generic_filters.py +++ b/test/test_generic_filters.py @@ -15,12 +15,6 @@ class GenericFilterTest(GenericFilterModel): class TestGenericFilters(unittest.TestCase): """Test generic filters SQL generation""" - def test_post_init_correction(self): - """Test that the post init correction works""" - filter_ = GenericFilterTest(test_string='test') - self.assertIsInstance(filter_.test_string, GenericFilter) - self.assertEqual(filter_.test_string.eq, 'test') - def test_basic_no_override(self): """Test that the basic filter converts to SQL as expected""" filter_ = GenericFilterTest(test_string=GenericFilter(eq='test')) diff --git a/test/test_graphql.py b/test/test_graphql.py index 95b817931..a52a5f947 100644 --- a/test/test_graphql.py +++ b/test/test_graphql.py @@ -1,20 +1,19 @@ from test.testbase import DbIsolatedTest, run_as_sync + from graphql.error import GraphQLError, GraphQLSyntaxError import api.graphql.schema -from db.python.layers import ParticipantLayer, AnalysisLayer +from db.python.layers import AnalysisLayer, ParticipantLayer +from metamist.graphql import configure_sync_client, gql, validate +from models.enums import AnalysisStatus from models.models import ( - SampleUpsertInternal, + AnalysisInternal, + AssayUpsertInternal, ParticipantUpsertInternal, + SampleUpsertInternal, SequencingGroupUpsertInternal, - AssayUpsertInternal, - AnalysisInternal, ) from models.utils.sequencing_group_id_format import sequencing_group_id_format -from models.enums import AnalysisStatus - -from metamist.graphql import gql, validate, configure_sync_client - default_assay_meta = { 'sequencing_type': 'genome', @@ -24,7 +23,6 @@ def _get_single_participant_upsert(): - return ParticipantUpsertInternal( external_id='Demeter', meta={}, @@ -43,20 +41,20 @@ def _get_single_participant_upsert(): type='sequencing', meta={ 'reads': [ - { - 'basename': 'sample_id001.filename-R1.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R1.fastq.gz', - 'size': 111, - }, - { - 'basename': 'sample_id001.filename-R2.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R2.fastq.gz', - 'size': 111, - }, + { + 'basename': 'sample_id001.filename-R1.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R1.fastq.gz', + 'size': 111, + }, + { + 'basename': 'sample_id001.filename-R2.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R2.fastq.gz', + 'size': 111, + }, ], 'reads_type': 'fastq', 'batch': 'M001', @@ -114,7 +112,7 @@ def test_validate_provided_schema(self): (strawberry has an as_str() method) """ client = configure_sync_client( - schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' + schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' # type: ignore ) validate(TEST_QUERY, client=client) @@ -231,3 +229,30 @@ async def test_sg_analyses_query(self): self.assertIn('id', analyses[0]) self.assertIn('meta', analyses[0]) self.assertIn('output', analyses[0]) + + @run_as_sync + async def test_participant_phenotypes(self): + """ + Test getting participant phentypes in graphql + """ + # insert participant + p = await self.player.upsert_participant( + ParticipantUpsertInternal(external_id='Demeter', meta={}, samples=[]) + ) + + phenotypes = {'phenotype1': 'value1', 'phenotype2': {'number': 123}} + # insert participant_phenotypes + await self.player.insert_participant_phenotypes({p.id: phenotypes}) + + q = """ +query MyQuery($pid: Int!) { + participant(id: $pid) { + phenotypes + } +}""" + + resp = await self.run_graphql_query_async(q, {'pid': p.id}) + + self.assertIn('participant', resp) + self.assertIn('phenotypes', resp['participant']) + self.assertDictEqual(phenotypes, resp['participant']['phenotypes']) diff --git a/test/test_import_individual_metadata.py b/test/test_import_individual_metadata.py index ae51f7a20..adb65622e 100644 --- a/test/test_import_individual_metadata.py +++ b/test/test_import_individual_metadata.py @@ -1,5 +1,7 @@ from test.testbase import DbIsolatedTest, run_as_sync +from databases.interfaces import Record + from db.python.layers.participant import ParticipantLayer from models.models.participant import ParticipantUpsertInternal @@ -21,20 +23,22 @@ async def test_import_many_hpo_terms(self): 'HPO Term 3', 'HPO Term 20', ] - rows = [['TP01', 'HP:0000001', 'HP:0000002', 'HP:0000003', 'HP:0000004']] + rows_to_insert = [ + ['TP01', 'HP:0000001', 'HP:0000002', 'HP:0000003', 'HP:0000004'] + ] - await pl.generic_individual_metadata_importer(headers, rows) + await pl.generic_individual_metadata_importer(headers, rows_to_insert) - rows = list( + db_rows: list[Record] = list( await self.connection.connection.fetch_all( 'SELECT participant_id, description, value FROM participant_phenotypes' ) ) - self.assertEqual(1, len(rows)) - self.assertEqual('HPO Terms (present)', rows[0]['description']) + self.assertEqual(1, len(db_rows)) + self.assertEqual('HPO Terms (present)', db_rows[0]['description']) self.assertEqual( - '"HP:0000001,HP:0000002,HP:0000003,HP:0000004"', rows[0]['value'] + '"HP:0000001,HP:0000002,HP:0000003,HP:0000004"', db_rows[0]['value'] ) @run_as_sync @@ -50,12 +54,12 @@ async def test_import_basic_metadata(self): ) headers = ['Individual ID', 'HPO Term 20', 'Age of Onset'] - rows = [ + rows_to_insert = [ ['TP01', 'HP:0000020', 'Congenital'], ['TP02', 'HP:00000021; HP:023', 'Infantile'], ] - await pl.generic_individual_metadata_importer(headers, rows) + await pl.generic_individual_metadata_importer(headers, rows_to_insert) rows = list( await self.connection.connection.fetch_all( diff --git a/test/test_parse_generic_metadata.py b/test/test_parse_generic_metadata.py index ed95a4fbc..ebcc5ec75 100644 --- a/test/test_parse_generic_metadata.py +++ b/test/test_parse_generic_metadata.py @@ -1,33 +1,31 @@ import unittest from datetime import datetime from io import StringIO +from test.testbase import DbIsolatedTest, run_as_sync from unittest.mock import patch -from test.testbase import run_as_sync, DbIsolatedTest - import api.graphql.schema from db.python.layers import ParticipantLayer +from metamist.graphql import configure_sync_client, validate +from metamist.parser.generic_metadata_parser import GenericMetadataParser +from metamist.parser.generic_parser import ( + QUERY_MATCH_ASSAYS, + QUERY_MATCH_PARTICIPANTS, + QUERY_MATCH_SAMPLES, + QUERY_MATCH_SEQUENCING_GROUPS, + ParsedParticipant, + ParsedSample, + ParsedSequencingGroup, +) from models.models import ( + AssayUpsertInternal, ParticipantUpsertInternal, SampleUpsertInternal, SequencingGroupUpsertInternal, - AssayUpsertInternal, ) from models.utils.sample_id_format import sample_id_format from models.utils.sequencing_group_id_format import sequencing_group_id_format -from metamist.graphql import validate, configure_sync_client -from metamist.parser.generic_parser import ( - ParsedParticipant, - ParsedSample, - QUERY_MATCH_PARTICIPANTS, - QUERY_MATCH_SAMPLES, - QUERY_MATCH_SEQUENCING_GROUPS, - QUERY_MATCH_ASSAYS, - ParsedSequencingGroup, -) -from metamist.parser.generic_metadata_parser import GenericMetadataParser - def _get_basic_participant_to_upsert(): default_assay_meta = { @@ -96,7 +94,7 @@ def test_queries(self): # only need to apply schema to the first client to create, then it gets cached client = configure_sync_client( - schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' + schema=api.graphql.schema.schema.as_str(), auth_token='FAKE' # type: ignore ) validate(QUERY_MATCH_PARTICIPANTS) validate(QUERY_MATCH_SAMPLES, client=client) @@ -332,11 +330,11 @@ async def test_rows_with_participants(self, mock_graphql_query): # Call generic parser file_contents = '\n'.join(rows) - summary, participants = await parser.parse_manifest( + summary, prows = await parser.parse_manifest( StringIO(file_contents), delimiter='\t', dry_run=True ) - participants: list[ParsedParticipant] = participants + participants: list[ParsedParticipant] = prows self.assertEqual(3, summary['participants']['insert']) self.assertEqual(0, summary['participants']['update']) @@ -749,7 +747,9 @@ async def test_matching_sequencing_groups_and_assays( mock_datetime_added.return_value = datetime.fromisoformat('2022-02-02T22:22:22') player = ParticipantLayer(self.connection) - participant = await player.upsert_participant(_get_basic_participant_to_upsert()) + participant = await player.upsert_participant( + _get_basic_participant_to_upsert() + ) filenames = [ 'sample_id001.filename-R1.fastq.gz', diff --git a/test/test_parse_ont_processor.py b/test/test_parse_ont_processor.py index c14d95e73..a3d301754 100644 --- a/test/test_parse_ont_processor.py +++ b/test/test_parse_ont_processor.py @@ -1,8 +1,7 @@ import unittest from io import StringIO -from unittest.mock import patch - from test.testbase import run_as_sync +from unittest.mock import patch from scripts.process_ont_products import OntProductParser @@ -36,7 +35,7 @@ async def test_single_row_all_files_exist( dry_run=True, ) - parser.skip_checking_gcs_objects = True + # parser.skip_checking_gcs_objects = True fs = [ 'Sample01.bam', 'Sample01.sv.vcf.gz', @@ -44,7 +43,7 @@ async def test_single_row_all_files_exist( 'Sample01.indels.vcf.gz', ] parser.filename_map = {k: 'gs://BUCKET/FAKE/' + k for k in fs} - parser.skip_checking_gcs_objects = True + # parser.skip_checking_gcs_objects = True file_contents = '\n'.join(rows) analyses = await parser.parse_manifest( diff --git a/test/test_parse_ont_sheet.py b/test/test_parse_ont_sheet.py index 72b1acdc0..534bb50f1 100644 --- a/test/test_parse_ont_sheet.py +++ b/test/test_parse_ont_sheet.py @@ -1,11 +1,10 @@ from io import StringIO +from test.testbase import DbIsolatedTest, run_as_sync from unittest.mock import patch -from test.testbase import run_as_sync, DbIsolatedTest - from db.python.layers import ParticipantLayer -from models.models import ParticipantUpsertInternal, SampleUpsertInternal from metamist.parser.generic_parser import ParsedParticipant +from models.models import ParticipantUpsertInternal, SampleUpsertInternal from scripts.parse_ont_sheet import OntParser @@ -125,6 +124,6 @@ async def test_simple_sheet(self, mock_graphql_query): ], } self.maxDiff = None - sequence_group = participants[0].samples[0].sequencing_groups[0] - self.assertDictEqual(seqgroup_meta, sequence_group.meta) - self.assertDictEqual(meta_dict, sequence_group.assays[0].meta) + sequencing_group = participants[0].samples[0].sequencing_groups[0] + self.assertDictEqual(seqgroup_meta, sequencing_group.meta) + self.assertDictEqual(meta_dict, sequencing_group.assays[0].meta) diff --git a/test/test_pedigree.py b/test/test_pedigree.py index 77d395863..8966c8cc3 100644 --- a/test/test_pedigree.py +++ b/test/test_pedigree.py @@ -1,9 +1,8 @@ from test.testbase import DbIsolatedTest, run_as_sync -from models.models.participant import ParticipantUpsertInternal - from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer +from models.models.participant import ParticipantUpsertInternal class TestPedigree(DbIsolatedTest): @@ -14,10 +13,10 @@ async def test_import_get_pedigree(self): """Test import + get pedigree""" fl = FamilyLayer(self.connection) - rows = [ - ['FAM01', 'EX01_father', '', '', 1, 1], - ['FAM01', 'EX01_mother', '', '', 2, 1], - ['FAM01', 'EX01_subject', 'EX01_father', 'EX01_mother', 1, 2], + rows: list[list[str]] = [ + ['FAM01', 'EX01_father', '', '', '1', '1'], + ['FAM01', 'EX01_mother', '', '', '2', '1'], + ['FAM01', 'EX01_subject', 'EX01_father', 'EX01_mother', '1', '2'], ] await fl.import_pedigree( diff --git a/test/test_sample.py b/test/test_sample.py index b256a2ae0..e5b8639b7 100644 --- a/test/test_sample.py +++ b/test/test_sample.py @@ -1,7 +1,7 @@ from test.testbase import DbIsolatedTest, run_as_sync -from models.models.sample import SampleUpsertInternal from db.python.layers.sample import SampleLayer +from models.models.sample import SampleUpsertInternal class TestSample(DbIsolatedTest): @@ -17,7 +17,7 @@ async def setUp(self) -> None: @run_as_sync async def test_add_sample(self): """Test inserting a sample""" - s = await self.slayer.upsert_sample( + sample = await self.slayer.upsert_sample( SampleUpsertInternal( external_id='Test01', type='blood', @@ -30,8 +30,7 @@ async def test_add_sample(self): 'SELECT id, type, meta, project FROM sample' ) self.assertEqual(1, len(samples)) - s = samples[0] - self.assertEqual(1, s['id']) + self.assertEqual(sample.id, samples[0]['id']) @run_as_sync async def test_get_sample(self): diff --git a/test/test_search.py b/test/test_search.py index fa92cf80f..ca6718ec5 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -1,18 +1,25 @@ from test.testbase import DbIsolatedTest, run_as_sync +from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer from db.python.layers.sample import SampleLayer from db.python.layers.search import SearchLayer -from db.python.layers.family import FamilyLayer from db.python.layers.sequencing_group import SequencingGroupLayer from db.python.tables.family_participant import FamilyParticipantTable - from models.enums import SearchResponseType -from models.models.family import PedRowInternal -from models.models.sample import sample_id_format, SampleUpsertInternal -from models.models.participant import ParticipantUpsertInternal -from models.models.sequencing_group import SequencingGroupUpsertInternal, sequencing_group_id_format -from models.models.assay import AssayUpsertInternal +from models.models import ( + AssayUpsertInternal, + FamilySearchResponseData, + ParticipantSearchResponseData, + ParticipantUpsertInternal, + PedRowInternal, + SampleSearchResponseData, + SampleUpsertInternal, + SequencingGroupSearchResponseData, + SequencingGroupUpsertInternal, +) +from models.models.sample import sample_id_format +from models.models.sequencing_group import sequencing_group_id_format class TestSample(DbIsolatedTest): @@ -68,7 +75,11 @@ async def test_search_isolated_sample_by_id(self): self.assertEqual(1, len(results)) self.assertEqual(cpg_id, results[0].title) self.assertEqual(cpg_id, results[0].data.id) - self.assertListEqual(['EX001'], results[0].data.sample_external_ids) + + result_data = results[0].data + self.assertIsInstance(result_data, SampleSearchResponseData) + assert isinstance(result_data, SampleSearchResponseData) + self.assertListEqual(['EX001'], result_data.sample_external_ids) @run_as_sync async def test_search_isolated_sequencing_group_by_id(self): @@ -97,19 +108,24 @@ async def test_search_isolated_sequencing_group_by_id(self): 'sequencing_type': 'transcriptome', 'sequencing_technology': 'long-read', 'sequencing_platform': 'illumina', - } + }, ) - ] + ], ) ] ) cpg_sg_id = sequencing_group_id_format(sg[0].id) - results = await self.schlay.search(query=cpg_sg_id, project_ids=[self.project_id]) + results = await self.schlay.search( + query=cpg_sg_id, project_ids=[self.project_id] + ) self.assertEqual(1, len(results)) self.assertEqual(cpg_sg_id, results[0].title) - self.assertEqual(cpg_sg_id, results[0].data.id) - self.assertEqual(cpg_sg_id, results[0].data.sg_external_id) + result_data = results[0].data + assert isinstance(result_data, SequencingGroupSearchResponseData) + self.assertIsInstance(result_data, SequencingGroupSearchResponseData) + self.assertEqual(cpg_sg_id, result_data.id) + self.assertEqual(cpg_sg_id, result_data.sg_external_id) @run_as_sync async def test_search_isolated_sample_by_external_id(self): @@ -125,12 +141,16 @@ async def test_search_isolated_sample_by_external_id(self): cpg_id = sample_id_format(sample.id) self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(cpg_id, result.title) - self.assertEqual(cpg_id, result.data.id) - self.assertListEqual(['EX001'], result.data.sample_external_ids) - self.assertListEqual([], result.data.participant_external_ids) - self.assertListEqual([], result.data.family_external_ids) + + self.assertEqual(cpg_id, results[0].title) + result_data = results[0].data + + self.assertIsInstance(result_data, SampleSearchResponseData) + assert isinstance(result_data, SampleSearchResponseData) + self.assertEqual(cpg_id, result_data.id) + self.assertListEqual(['EX001'], result_data.sample_external_ids) + self.assertListEqual([], result_data.participant_external_ids) + self.assertListEqual([], result_data.family_external_ids) @run_as_sync async def test_search_participant_isolated(self): @@ -145,12 +165,13 @@ async def test_search_participant_isolated(self): query='PART01', project_ids=[self.project_id] ) self.assertEqual(1, len(results)) - result = results[0] - self.assertEqual(p.id, result.data.id) - self.assertEqual('PART01', result.title) - self.assertListEqual(['PART01'], result.data.participant_external_ids) - self.assertListEqual([], result.data.family_external_ids) - self.assertRaises(AttributeError, lambda: result.data.sample_external_ids) + + self.assertEqual('PART01', results[0].title) + result_data = results[0].data + assert isinstance(result_data, ParticipantSearchResponseData) + self.assertEqual(p.id, result_data.id) + self.assertListEqual(['PART01'], result_data.participant_external_ids) + self.assertListEqual([], result_data.family_external_ids) @run_as_sync async def test_search_family(self): @@ -164,11 +185,11 @@ async def test_search_family(self): ) self.assertEqual(1, len(results)) result = results[0] - self.assertEqual(f_id, result.data.id) self.assertEqual('FAMXX01', result.title) - self.assertListEqual(['FAMXX01'], result.data.family_external_ids) - self.assertRaises(AttributeError, lambda: result.data.participant_external_ids) - self.assertRaises(AttributeError, lambda: result.data.sample_external_ids) + result_data = result.data + assert isinstance(result_data, FamilySearchResponseData) + self.assertEqual(f_id, result_data.id) + self.assertListEqual(['FAMXX01'], result_data.family_external_ids) @run_as_sync async def test_search_mixed(self): @@ -195,7 +216,7 @@ async def test_search_mixed(self): sample = await self.slayer.upsert_sample( SampleUpsertInternal( external_id='X:SAM001', - sample_type='blood', + type='blood', participant_id=p.id, ) ) @@ -214,19 +235,26 @@ async def test_search_mixed(self): sample_result = next( r for r in all_results if r.type == SearchResponseType.SAMPLE ) + family_result_data = family_result.data + participant_result_data = participant_result.data + sample_result_data = sample_result.data + + assert isinstance(family_result_data, FamilySearchResponseData) + assert isinstance(participant_result_data, ParticipantSearchResponseData) + assert isinstance(sample_result_data, SampleSearchResponseData) # linked family matches self.assertEqual('X:FAM01', family_result.title) # linked participant matches self.assertEqual('X:PART01', participant_result.title) - self.assertListEqual(['X:FAM01'], participant_result.data.family_external_ids) + self.assertListEqual(['X:FAM01'], participant_result_data.family_external_ids) # linked sample matches cpg_id = sample_id_format(sample.id) - self.assertEqual(cpg_id, sample_result.data.id) - self.assertListEqual(['X:SAM001'], sample_result.data.sample_external_ids) - self.assertListEqual(['X:FAM01'], participant_result.data.family_external_ids) + self.assertEqual(cpg_id, sample_result_data.id) + self.assertListEqual(['X:SAM001'], sample_result_data.sample_external_ids) + self.assertListEqual(['X:FAM01'], participant_result_data.family_external_ids) self.assertListEqual( - ['X:PART01'], participant_result.data.participant_external_ids + ['X:PART01'], participant_result_data.participant_external_ids ) diff --git a/test/test_web.py b/test/test_web.py index 4e6cb53f6..cc5a0bb63 100644 --- a/test/test_web.py +++ b/test/test_web.py @@ -1,27 +1,27 @@ from test.testbase import DbIsolatedTest, run_as_sync +from db.python.layers import ( + AssayLayer, + ParticipantLayer, + SampleLayer, + SequencingGroupLayer, + WebLayer, +) from models.enums import MetaSearchEntityPrefix from models.models import ( + Assay, + AssayInternal, + AssayUpsertInternal, ParticipantUpsertInternal, + ProjectSummaryInternal, SampleUpsertInternal, + SearchItem, SequencingGroupUpsertInternal, - AssayUpsertInternal, - ProjectSummaryInternal, - AssayInternal, - Assay, + WebProject, ) -from models.models import WebProject, SearchItem from models.utils.sample_id_format import sample_id_transform_to_raw from models.utils.sequencing_group_id_format import sequencing_group_id_transform_to_raw -from db.python.layers import ( - AssayLayer, - SequencingGroupLayer, - SampleLayer, - ParticipantLayer, - WebLayer, -) - default_assay_meta = { 'sequencing_type': 'genome', 'sequencing_technology': 'short-read', @@ -71,20 +71,20 @@ def get_test_participant(): type='sequencing', meta={ 'reads': [ - { - 'basename': 'sample_id001.filename-R1.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R1.fastq.gz', - 'size': 111, - }, - { - 'basename': 'sample_id001.filename-R2.fastq.gz', - 'checksum': None, - 'class': 'File', - 'location': '/path/to/sample_id001.filename-R2.fastq.gz', - 'size': 111, - }, + { + 'basename': 'sample_id001.filename-R1.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R1.fastq.gz', + 'size': 111, + }, + { + 'basename': 'sample_id001.filename-R2.fastq.gz', + 'checksum': None, + 'class': 'File', + 'location': '/path/to/sample_id001.filename-R2.fastq.gz', + 'size': 111, + }, ], 'reads_type': 'fastq', 'batch': 'M001', @@ -204,9 +204,7 @@ async def test_project_summary_empty(self): # Expect an empty project expected = ProjectSummaryInternal( - project=WebProject( - **{'id': 1, 'name': 'test', 'meta': {}, 'dataset': 'test'} - ), + project=WebProject(id=1, name='test', meta={}, dataset='test'), total_samples=0, total_samples_in_query=0, total_participants=0, @@ -308,9 +306,7 @@ async def project_summary_with_filter_no_results(self): ], ) empty_result = ProjectSummaryInternal( - project=WebProject( - **{'id': 1, 'name': 'test', 'meta': {}, 'dataset': 'test'} - ), + project=WebProject(id=1, name='test', meta={}, dataset='test'), total_samples=0, total_samples_in_query=0, total_participants=0, @@ -455,12 +451,10 @@ async def test_field_with_space(self): token=0, grid_filter=[ SearchItem( - **{ - 'model_type': MetaSearchEntityPrefix.ASSAY, - 'query': 'field wi', - 'field': 'field with spaces', - 'is_meta': True, - } + model_type=MetaSearchEntityPrefix.ASSAY, + query='field wi', + field='field with spaces', + is_meta=True, ) ], ) diff --git a/web/src/pages/project/ProjectGrid.tsx b/web/src/pages/project/ProjectGrid.tsx index 52dbeaa64..83a9fda74 100644 --- a/web/src/pages/project/ProjectGrid.tsx +++ b/web/src/pages/project/ProjectGrid.tsx @@ -372,7 +372,7 @@ const ProjectGrid: React.FunctionComponent = ({ : '1px solid var(--color-border-default)', backgroundColor, }} - key={`${s.id}sequence_group.${k}`} + key={`${s.id}sequencing_group.${k}`} rowSpan={(seq.assays ?? []).length} > {k === 'id' ? (