From 57ca4d3c5d490cc0a3140b476b288543967f9255 Mon Sep 17 00:00:00 2001
From: Michael Franklin <22381693+illusional@users.noreply.github.com>
Date: Mon, 18 Sep 2023 12:01:39 +1000
Subject: [PATCH 1/3] Address mypy errors (#546)

* Address mypy errors

* More linting updates

* Two tests to fix checks

* Rejig lint to build package to mypy scripts

* Add strawberry[debug-server] requirement

* Revert to the OpenApiGenNoneType for tests

* Add extra import

---------

Co-authored-by: Michael Franklin <illusional@users.noreply.github.com>
---
 .github/workflows/lint.yaml                  |  38 +++-
 .pre-commit-config.yaml                      | 174 ++++++++++---------
 api/routes/analysis.py                       |  86 +++++----
 db/python/connect.py                         |   4 +-
 db/python/enum_tables/enums.py               |  10 +-
 db/python/layers/seqr.py                     |  14 +-
 db/python/layers/sequencing_group.py         |  13 +-
 db/python/layers/web.py                      |  15 +-
 db/python/tables/analysis.py                 |  12 +-
 db/python/tables/project.py                  |  15 +-
 db/python/tables/sequencing_group.py         |  22 +--
 etl/endpoint/main.py                         |   6 +-
 metamist/parser/generic_parser.py            |  42 ++---
 models/base.py                               |   2 +-
 models/models/__init__.py                    |  41 ++---
 models/models/analysis.py                    |   2 +-
 models/models/assay.py                       |  19 +-
 models/models/participant.py                 |  22 +--
 models/models/sample.py                      |  26 ++-
 models/models/sequencing_group.py            |  24 +--
 mypy.ini                                     |   7 +
 regenerate_api.py                            |   4 +-
 requirements-dev.txt                         |   2 +
 requirements.txt                             |   2 +-
 scripts/20230420_sequencinggroupmigration.py |  10 +-
 scripts/create_test_subset.py                |  26 ++-
 scripts/parse_ont_sheet.py                   |  18 +-
 scripts/parse_ped.py                         |  10 +-
 scripts/sync_seqr.py                         |  27 +--
 test/test_analysis.py                        |  17 +-
 test/test_assay.py                           |  30 +++-
 test/test_generic_auditor.py                 |  64 ++++---
 test/test_generic_filters.py                 |   6 -
 test/test_graphql.py                         |  46 +++--
 test/test_import_individual_metadata.py      |  20 ++-
 test/test_parse_generic_metadata.py          |  38 ++--
 test/test_parse_ont_processor.py             |   7 +-
 test/test_parse_ont_sheet.py                 |  11 +-
 test/test_pedigree.py                        |  11 +-
 test/test_sample.py                          |   7 +-
 test/test_search.py                          |  98 +++++++----
 test/test_web.py                             |  72 ++++----
 web/src/pages/project/ProjectGrid.tsx        |   2 +-
 43 files changed, 605 insertions(+), 517 deletions(-)

diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index d4863658a..68f65dc86 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -4,22 +4,52 @@ on: push
 jobs:
   lint:
     runs-on: ubuntu-latest
+    env:
+      DOCKER_BUILDKIT: 1
+      BUILDKIT_PROGRESS: plain
+      CLOUDSDK_CORE_DISABLE_PROMPTS: 1
+      # used for generating API
+      SM_DOCKER: samplemetadata:dev
     defaults:
       run:
         shell: bash -eo pipefail -l {0}
-
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@main
 
       - uses: actions/setup-python@v2
         with:
           python-version: "3.10"
-          cache: "pip"
 
-      - name: Install packages
+      - uses: actions/setup-java@v2
+        with:
+          distribution: "temurin" # See 'Supported distributions' for available options
+          java-version: "17"
+
+      - name: Setup build env
+        run: |
+          set -euxo pipefail
+
+          pip install -r requirements-dev.txt
+          pip install -r requirements.txt
+
+          # openapi-generator
+          wget https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/5.3.0/openapi-generator-cli-5.3.0.jar -O openapi-generator-cli.jar
+
+      - name: "build image"
+        run: |
+          docker build \
+            --build-arg SM_ENVIRONMENT=local \
+            --tag $SM_DOCKER \
+            -f deploy/api/Dockerfile \
+            .
+
+      - name: Build + install packages
         run: |
+          export OPENAPI_COMMAND="java -jar openapi-generator-cli.jar"
+          python regenerate_api.py
           pip install -r requirements-dev.txt
           pip install .
+          mkdir .mypy_cache
 
       - name: pre-commit
         run: pre-commit run --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 155d7cda1..c4d4ec406 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,92 +1,98 @@
 repos:
-    - repo: https://github.com/pre-commit/pre-commit-hooks
-      rev: v4.4.0
-      hooks:
-          - id: check-yaml
-            exclude: '\.*conda/.*'
-          - id: end-of-file-fixer
-          - id: trailing-whitespace
-            exclude: '\.txt$|\.tsv$'
-          - id: check-case-conflict
-          - id: check-merge-conflict
-          - id: detect-private-key
-          - id: debug-statements
-          - id: check-added-large-files
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: check-yaml
+        exclude: '\.*conda/.*'
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+        exclude: '\.txt$|\.tsv$'
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: detect-private-key
+      - id: debug-statements
+      - id: check-added-large-files
 
-    - repo: https://github.com/igorshubovych/markdownlint-cli
-      rev: v0.33.0
-      hooks:
-          - id: markdownlint
-            args: ["--config", ".markdownlint.json"]
+  - repo: https://github.com/igorshubovych/markdownlint-cli
+    rev: v0.33.0
+    hooks:
+      - id: markdownlint
+        args: ["--config", ".markdownlint.json"]
 
-    - repo: https://github.com/ambv/black
-      rev: 23.3.0
-      hooks:
-          - id: black
-            args: [.]
-            pass_filenames: false
-            always_run: true
-            exclude: ^metamist/
+  - repo: https://github.com/ambv/black
+    rev: 23.3.0
+    hooks:
+      - id: black
+        args: [.]
+        pass_filenames: false
+        always_run: true
+        exclude: ^metamist/
 
-    - repo: https://github.com/PyCQA/flake8
-      rev: "6.0.0"
-      hooks:
-          - id: flake8
-            additional_dependencies: [flake8-bugbear, flake8-quotes]
+  - repo: https://github.com/PyCQA/flake8
+    rev: "6.0.0"
+    hooks:
+      - id: flake8
+        additional_dependencies: [flake8-bugbear, flake8-quotes]
 
-    # Using system installation of pylint to support checking python module imports
-    - repo: local
-      hooks:
-          - id: pylint
-            name: pylint
-            entry: pylint
-            language: system
-            types: [python]
+  # Using system installation of pylint to support checking python module imports
+  - repo: local
+    hooks:
+      - id: pylint
+        name: pylint
+        entry: pylint
+        language: system
+        types: [python]
 
-    # mypy
-    - repo: https://github.com/pre-commit/mirrors-mypy
-      rev: v0.961
-      hooks:
-          - id: mypy
-            args:
-                [
-                    --pretty,
-                    --show-error-codes,
-                    --no-strict-optional,
-                    --ignore-missing-imports,
-                    --install-types,
-                    --non-interactive,
-                ]
-            additional_dependencies:
-                - strawberry-graphql[fastapi]==0.138.1
+  # mypy
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.5.1
+    hooks:
+      - id: mypy
+        args:
+          [
+            --pretty,
+            --show-error-codes,
+            --no-strict-optional,
+            --ignore-missing-imports,
+            --install-types,
+            --non-interactive,
+            --show-error-context,
+            --check-untyped-defs,
+            --explicit-package-bases,
+            --disable-error-code,
+            operator,
+          ]
+        additional_dependencies:
+          - strawberry-graphql[fastapi]==0.206.0
+          - types-PyMySQL==1.1.0.1
 
-    - repo: https://github.com/pre-commit/mirrors-prettier
-      rev: "v3.0.0-alpha.4"
-      hooks:
-          - id: prettier
-            # I'm not exactly sure why it changes behaviour, but
-            # calling `cd web`, then calling `ls src/**/*.tsx`
-            # returns different results to `cd web && ls src/**/*.tsx`
-            # so just include both patterns here
-            entry: bash -c 'cd web && prettier --write --ignore-unknown --check src/*.{ts,tsx,css} src/**/*.{ts,tsx,css}'
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: "v3.0.0-alpha.4"
+    hooks:
+      - id: prettier
+        # I'm not exactly sure why it changes behaviour, but
+        # calling `cd web`, then calling `ls src/**/*.tsx`
+        # returns different results to `cd web && ls src/**/*.tsx`
+        # so just include both patterns here
+        entry: bash -c 'cd web && prettier --write --ignore-unknown --check src/*.{ts,tsx,css} src/**/*.{ts,tsx,css}'
 
-    - repo: https://github.com/pre-commit/mirrors-eslint
-      rev: "v8.33.0"
-      hooks:
-          - id: eslint
-            entry: bash -c 'cd web && eslint'
-            files: \.[jt]sx?$
-            types: [file]
-            additional_dependencies:
-                - eslint@^7.32.0
-                - eslint-config-airbnb@^19.0.4
-                - eslint-config-airbnb-base@^15.0.0
-                - eslint-config-airbnb-typescript@^17.0.0
-                - eslint-config-prettier@^8.6.0
-                - eslint-plugin-import@^2.26.0
-                - eslint-plugin-jsx-a11y@^6.6.1
-                - eslint-plugin-prettier@^4.2.1
-                - eslint-plugin-react@^7.31.11
-                - eslint-plugin-react-hooks@^4.6.0
-                - "@typescript-eslint/eslint-plugin@^5.48.0"
-                - "@typescript-eslint/parser@^5.48.0"
+  - repo: https://github.com/pre-commit/mirrors-eslint
+    rev: "v8.33.0"
+    hooks:
+      - id: eslint
+        entry: bash -c 'cd web && eslint'
+        files: \.[jt]sx?$
+        types: [file]
+        additional_dependencies:
+          - eslint@^7.32.0
+          - eslint-config-airbnb@^19.0.4
+          - eslint-config-airbnb-base@^15.0.0
+          - eslint-config-airbnb-typescript@^17.0.0
+          - eslint-config-prettier@^8.6.0
+          - eslint-plugin-import@^2.26.0
+          - eslint-plugin-jsx-a11y@^6.6.1
+          - eslint-plugin-prettier@^4.2.1
+          - eslint-plugin-react@^7.31.11
+          - eslint-plugin-react-hooks@^4.6.0
+          - "@typescript-eslint/eslint-plugin@^5.48.0"
+          - "@typescript-eslint/parser@^5.48.0"
diff --git a/api/routes/analysis.py b/api/routes/analysis.py
index 0af9ade7b..1a0057dcc 100644
--- a/api/routes/analysis.py
+++ b/api/routes/analysis.py
@@ -8,12 +8,11 @@
 from pydantic import BaseModel
 from starlette.responses import StreamingResponse
 
-from api.utils.dates import parse_date_only_string
 from api.utils.db import (
-    get_projectless_db_connection,
+    Connection,
     get_project_readonly_connection,
     get_project_write_connection,
-    Connection,
+    get_projectless_db_connection,
 )
 from api.utils.export import ExportType
 from db.python.layers.analysis import AnalysisLayer
@@ -22,20 +21,17 @@
 from db.python.utils import GenericFilter
 from models.enums import AnalysisStatus
 from models.models.analysis import (
+    Analysis,
     AnalysisInternal,
     ProjectSizeModel,
-    SequencingGroupSizeModel,
-    DateSizeModel,
-    Analysis,
 )
 from models.utils.sample_id_format import (
     sample_id_transform_to_raw_list,
-    sample_id_format,
 )
 from models.utils.sequencing_group_id_format import (
+    sequencing_group_id_format,
     sequencing_group_id_format_list,
     sequencing_group_id_transform_to_raw_list,
-    sequencing_group_id_format,
 )
 
 router = APIRouter(prefix='/analysis', tags=['analysis'])
@@ -326,40 +322,42 @@ async def get_sequencing_group_file_sizes(
     """
     Get the per sample file size by type over the given projects and date range
     """
-    atable = AnalysisLayer(connection)
-
-    # Check access to projects
-    project_ids = None
-    pt = ProjectPermissionsTable(connection=connection.connection)
-    project_ids = await pt.get_project_ids_from_names_and_user(
-        connection.author, project_names, readonly=True
-    )
-
-    # Map from internal pids to project name
-    prj_name_map = dict(zip(project_ids, project_names))
-
-    # Convert dates
-    start = parse_date_only_string(start_date)
-    end = parse_date_only_string(end_date)
-
-    # Get results with internal ids as keys
-    results = await atable.get_sequencing_group_file_sizes(
-        project_ids=project_ids, start_date=start, end_date=end
-    )
-
-    # Convert to the correct output type, converting internal ids to external
-    fixed_pids: list[Any] = [
-        ProjectSizeModel(
-            project=prj_name_map[project_data['project']],
-            samples=[
-                SequencingGroupSizeModel(
-                    sample=sample_id_format(s['sample']),
-                    dates=[DateSizeModel(**d) for d in s['dates']],
-                )
-                for s in project_data['samples']
-            ],
-        )
-        for project_data in results
-    ]
 
-    return fixed_pids
+    raise NotImplementedError('This route is broken, and not properly implemented yet')
+    # atable = AnalysisLayer(connection)
+
+    # # Check access to projects
+    # project_ids = None
+    # pt = ProjectPermissionsTable(connection=connection.connection)
+    # project_ids = await pt.get_project_ids_from_names_and_user(
+    #     connection.author, project_names, readonly=True
+    # )
+
+    # # Map from internal pids to project name
+    # prj_name_map = dict(zip(project_ids, project_names))
+
+    # # Convert dates
+    # start = parse_date_only_string(start_date)
+    # end = parse_date_only_string(end_date)
+
+    # # Get results with internal ids as keys
+    # results = await atable.get_sequencing_group_file_sizes(
+    #     project_ids=project_ids, start_date=start, end_date=end
+    # )
+
+    # # Convert to the correct output type, converting internal ids to external
+    # fixed_pids: list[Any] = [
+    #     ProjectSizeModel(
+    #         project=prj_name_map[project_data['project']],
+    #         samples=[
+    #             SequencingGroupSizeModel(
+    #                 sample=sample_id_format(s['sample']),
+    #                 dates=[DateSizeModel(**d) for d in s['dates']],
+    #             )
+    #             for s in project_data['samples']
+    #         ],
+    #     )
+    #     for project_data in results
+    # ]
+
+    # return fixed_pids
diff --git a/db/python/connect.py b/db/python/connect.py
index 0b9acf57a..8a5a7811e 100644
--- a/db/python/connect.py
+++ b/db/python/connect.py
@@ -121,7 +121,9 @@ def get_connection_string(self):
         if self.port:
             _host += f':{self.port}'
 
-        options = {}  # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size}
+        options: dict[
+            str, str | int
+        ] = {}  # {'min_size': self.min_pool_size, 'max_size': self.max_pool_size}
         _options = '&'.join(f'{k}={v}' for k, v in options.items())
 
         url = f'mysql://{u_p}@{_host}/{self.dbname}?{_options}'
diff --git a/db/python/enum_tables/enums.py b/db/python/enum_tables/enums.py
index e6419b98b..113daf3d6 100644
--- a/db/python/enum_tables/enums.py
+++ b/db/python/enum_tables/enums.py
@@ -1,6 +1,7 @@
-import re
 import abc
+import re
 from functools import lru_cache
+
 from async_lru import alru_cache
 
 from db.python.connect import DbBase
@@ -36,7 +37,8 @@ def _get_table_name(cls):
         matcher = table_name_matcher.match(tn)
         if not matcher:
             raise ValueError(
-                f'The tablename {tn} is not valid (must match {table_name_matcher.pattern})'
+                f'The tablename {tn} is not valid (must match '
+                f'{table_name_matcher.pattern})'
             )
         return tn
 
@@ -47,9 +49,9 @@ async def get(self) -> list[str]:
         """
         _query = f'SELECT DISTINCT name FROM {self._get_table_name()}'
         rows = await self.connection.fetch_all(_query)
-        rows = [r['name'] for r in rows]
+        nrows = [r['name'] for r in rows]
 
-        return rows
+        return nrows
 
     async def insert(self, value: str):
         """
diff --git a/db/python/layers/seqr.py b/db/python/layers/seqr.py
index 8c203979a..32ef5f5e2 100644
--- a/db/python/layers/seqr.py
+++ b/db/python/layers/seqr.py
@@ -1,8 +1,8 @@
 # pylint: disable=unnecessary-lambda-assignment,too-many-locals,broad-exception-caught
 
+import asyncio
 import os
 import re
-import asyncio
 import traceback
 from collections import defaultdict
 from datetime import datetime
@@ -15,13 +15,14 @@
 from cpg_utils.cloud import get_google_identity_token
 
 from api.settings import (
-    SEQR_URL,
     SEQR_AUDIENCE,
     SEQR_MAP_LOCATION,
     SEQR_SLACK_NOTIFICATION_CHANNEL,
+    SEQR_URL,
     get_slack_token,
 )
 from db.python.connect import Connection
+from db.python.enum_tables import SequencingTypeTable
 from db.python.layers.analysis import AnalysisLayer
 from db.python.layers.base import BaseLayer
 from db.python.layers.family import FamilyLayer
@@ -29,15 +30,14 @@
 from db.python.layers.sequencing_group import SequencingGroupLayer
 from db.python.tables.analysis import AnalysisFilter
 from db.python.tables.project import ProjectPermissionsTable
-from db.python.enum_tables import SequencingTypeTable
-from db.python.utils import ProjectId, GenericFilter
+from db.python.utils import GenericFilter, ProjectId
 from models.enums import AnalysisStatus
 
 # literally the most temporary thing ever, but for complete
 # automation need to have sample inclusion / exclusion
 from models.utils.sequencing_group_id_format import (
-    sequencing_group_id_format_list,
     sequencing_group_id_format,
+    sequencing_group_id_format_list,
 )
 
 SEQUENCING_GROUPS_TO_IGNORE = {22735, 22739}
@@ -421,9 +421,9 @@ async def update_es_index(
         )
 
         if len(es_index_analyses) == 0:
-            return [f'No ES index to synchronise']
+            return ['No ES index to synchronise']
 
-        with AnyPath(fn_path).open('w+') as f:
+        with AnyPath(fn_path).open('w+') as f:  # type: ignore
             f.write('\n'.join(rows_to_write))
 
         es_index = es_index_analyses[-1].output
diff --git a/db/python/layers/sequencing_group.py b/db/python/layers/sequencing_group.py
index 6d6a02a55..5ff5b133b 100644
--- a/db/python/layers/sequencing_group.py
+++ b/db/python/layers/sequencing_group.py
@@ -6,13 +6,13 @@
 from db.python.tables.assay import AssayTable, NoOpAenter
 from db.python.tables.sample import SampleTable
 from db.python.tables.sequencing_group import (
-    SequencingGroupTable,
     SequencingGroupFilter,
+    SequencingGroupTable,
 )
 from db.python.utils import ProjectId
 from models.models.sequencing_group import (
-    SequencingGroupUpsertInternal,
     SequencingGroupInternal,
+    SequencingGroupUpsertInternal,
 )
 from models.utils.sequencing_group_id_format import sequencing_group_id_format
 
@@ -133,7 +133,7 @@ async def get_participant_ids_sequencing_group_ids_for_sequencing_type(
         (
             projects,
             pids,
-        ) = await self.seqgt.get_participant_ids_and_sequence_group_ids_for_sequencing_type(
+        ) = await self.seqgt.get_participant_ids_and_sequencing_group_ids_for_sequencing_type(
             sequencing_type
         )
         if not pids:
@@ -209,7 +209,7 @@ async def create_sequencing_group_from_assays(
             type_=next(iter(sequencing_types)),
             technology=next(iter(sequencing_technologies)),
             platform=next(iter(sequencing_platforms)),
-            sequence_ids=assay_ids,
+            assay_ids=assay_ids,
             meta=meta,
         )
         return SequencingGroupInternal(
@@ -217,7 +217,6 @@ async def create_sequencing_group_from_assays(
             type=next(iter(sequencing_types)),
             technology=next(iter(sequencing_technologies)),
             platform=next(iter(sequencing_platforms)),
-            sequence_ids=assay_ids,
             sample_id=next(iter(sample_ids)),
             meta=meta,
             assays=assays,
@@ -249,7 +248,7 @@ async def recreate_sequencing_group_with_new_assays(
                 technology=seqgroup.technology,
                 platform=seqgroup.platform,
                 meta={**seqgroup.meta, **meta},
-                sequence_ids=assays,
+                assay_ids=assays,
                 author=self.author,
                 open_transaction=False,
             )
@@ -324,7 +323,7 @@ async def upsert_sequencing_groups(
                 technology=sg.technology,
                 platform=sg.platform,
                 meta=sg.meta,
-                sequence_ids=assay_ids,
+                assay_ids=assay_ids,
                 open_transaction=False,
             )
 
diff --git a/db/python/layers/web.py b/db/python/layers/web.py
index 6fffe2e20..95979457e 100644
--- a/db/python/layers/web.py
+++ b/db/python/layers/web.py
@@ -16,12 +16,12 @@
 from db.python.tables.project import ProjectPermissionsTable
 from db.python.tables.sequencing_group import SequencingGroupTable
 from models.models import (
+    AssayInternal,
+    FamilySimpleInternal,
     NestedParticipantInternal,
     NestedSampleInternal,
     NestedSequencingGroupInternal,
-    AssayInternal,
     SearchItem,
-    FamilySimpleInternal,
 )
 from models.models.web import ProjectSummaryInternal, WebProject
 
@@ -82,7 +82,7 @@ def _project_summary_sample_query(self, grid_filter: list[SearchItem]):
         # the query to determine the total count, then take the selection of samples
         # for the current page. This is more efficient than doing 2 queries separately.
         sample_query = f"""
-        SELECT s.id, s.external_id, s.type, s.meta, s.participant_id
+        SELECT s.id, s.external_id, s.type, s.meta, s.participant_id, s.active
         FROM sample s
         LEFT JOIN assay a ON s.id = a.sample_id
         LEFT JOIN participant p ON p.id = s.participant_id
@@ -189,6 +189,7 @@ def _project_summary_process_sample_rows(
                 created_date=str(sample_id_start_times.get(s['id'], '')),
                 sequencing_groups=sg_models_by_sample_id.get(s['id'], []),
                 non_sequencing_assays=filtered_assay_models_by_sid.get(s['id'], []),
+                active=bool(ord(s['active'])),
             )
             for s in sample_rows
         ]
@@ -402,8 +403,8 @@ async def get_project_summary(
             sg_models_by_sample_id=seq_group_models_by_sample_id,
             sample_id_start_times=sample_id_start_times,
         )
-        # the pydantic model is casting to the id to a str, as that makes sense on the front end
-        # but cast back here to do the lookup
+        # the pydantic model is casting to the id to a str, as that makes sense on
+        # the front end but cast back here to do the lookup
         sid_to_pid = {s['id']: s['participant_id'] for s in sample_rows}
         smodels_by_pid = group_by(smodels, lambda s: sid_to_pid[int(s.id)])
 
@@ -429,7 +430,7 @@ async def get_project_summary(
                         reported_sex=None,
                         reported_gender=None,
                         karyotype=None,
-                        project=self.project,
+                        # project=self.project,
                     )
                 )
             elif pid not in pid_seen:
@@ -445,7 +446,7 @@ async def get_project_summary(
                         reported_sex=p['reported_sex'],
                         reported_gender=p['reported_gender'],
                         karyotype=p['karyotype'],
-                        project=self.project,
+                        # project=self.project,
                     )
                 )
 
diff --git a/db/python/tables/analysis.py b/db/python/tables/analysis.py
index d6d2fe41d..a1d4a4c82 100644
--- a/db/python/tables/analysis.py
+++ b/db/python/tables/analysis.py
@@ -2,15 +2,15 @@
 import dataclasses
 from collections import defaultdict
 from datetime import datetime
-from typing import List, Optional, Set, Tuple, Dict, Any
+from typing import Any, Dict, List, Optional, Set, Tuple
 
 from db.python.connect import DbBase, NotFoundError
 from db.python.tables.project import ProjectId
 from db.python.utils import (
-    to_db_json,
-    GenericFilterModel,
     GenericFilter,
+    GenericFilterModel,
     GenericMetaFilter,
+    to_db_json,
 )
 from models.enums import AnalysisStatus
 from models.models.analysis import AnalysisInternal
@@ -285,7 +285,7 @@ async def get_incomplete_analyses(
         """
         Gets details of analysis with status queued or in-progress
         """
-        _query = f"""
+        _query = """
 SELECT a.id as id, a.type as type, a.status as status,
         a.output as output, a_sg.sequencing_group_id as sequencing_group_id,
         a.project as project, a.meta as meta
@@ -339,7 +339,7 @@ async def get_latest_complete_analysis_for_sequencing_group_ids_by_type(
             if row['sequencing_group_id'] in seen_sequencing_group_ids:
                 continue
             seen_sequencing_group_ids.add(row['sequencing_group_id'])
-            analyses.append(AnalysisInternal.from_db(**row))
+            analyses.append(AnalysisInternal.from_db(**dict(row)))
 
         # reverse after timestamp_completed
         return analyses[::-1]
@@ -439,7 +439,7 @@ async def get_sample_cram_path_map_for_seqr(
                 seq_check = 'IN :seq_types'
                 values['seq_types'] = sequencing_types
 
-            filters.append(f'JSON_VALUE(a.meta, "$.sequencing_type") ' + seq_check)
+            filters.append('JSON_VALUE(a.meta, "$.sequencing_type") ' + seq_check)
 
         if participant_ids:
             filters.append('p.id IN :pids')
diff --git a/db/python/tables/project.py b/db/python/tables/project.py
index d7c002c28..ac01e18bb 100644
--- a/db/python/tables/project.py
+++ b/db/python/tables/project.py
@@ -1,22 +1,21 @@
 # pylint: disable=global-statement
 import asyncio
-from typing import Dict, List, Set, Iterable, Optional, Tuple, Any
-
 import json
 from datetime import datetime, timedelta
+from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
 
+from cpg_utils.cloud import get_cached_group_members
 from databases import Database
 from google.cloud import secretmanager
-from cpg_utils.cloud import get_cached_group_members
 
 from api.settings import MEMBERS_CACHE_LOCATION, is_all_access
 from db.python.utils import (
-    ProjectId,
     Forbidden,
+    InternalError,
     NoProjectAccess,
+    ProjectId,
     get_logger,
     to_db_json,
-    InternalError,
 )
 from models.models.project import Project
 
@@ -440,9 +439,9 @@ async def get_seqr_projects(self) -> list[dict[str, Any]]:
 
         projects = []
         for r in await self.connection.fetch_all(_query):
-            r = dict(r)
-            r['meta'] = json.loads(r['meta'] or '{}')
-            projects.append(r)
+            row = dict(r)
+            row['meta'] = json.loads(row['meta'] or '{}')
+            projects.append(row)
 
         return projects
 
diff --git a/db/python/tables/sequencing_group.py b/db/python/tables/sequencing_group.py
index a3a83e864..fefa36116 100644
--- a/db/python/tables/sequencing_group.py
+++ b/db/python/tables/sequencing_group.py
@@ -6,11 +6,11 @@
 
 from db.python.connect import DbBase, NoOpAenter, NotFoundError
 from db.python.utils import (
-    ProjectId,
-    to_db_json,
-    GenericFilterModel,
     GenericFilter,
+    GenericFilterModel,
     GenericMetaFilter,
+    ProjectId,
+    to_db_json,
 )
 from models.models.sequencing_group import SequencingGroupInternal
 
@@ -125,10 +125,10 @@ async def get_sequencing_groups_by_ids(
                 f'Couldn\'t find sequencing groups with internal id {ids})'
             )
 
-        rows = [SequencingGroupInternal.from_db(**dict(r)) for r in rows]
-        projects = set(r.project for r in rows)
+        sg_rows = [SequencingGroupInternal.from_db(**dict(r)) for r in rows]
+        projects = set(r.project for r in sg_rows)
 
-        return projects, rows
+        return projects, sg_rows
 
     async def get_assay_ids_by_sequencing_group_ids(
         self, ids: list[int]
@@ -172,7 +172,7 @@ async def get_all_sequencing_group_ids_by_sample_ids_by_type(
 
         return sequencing_group_ids_by_sample_ids_by_type
 
-    async def get_participant_ids_and_sequence_group_ids_for_sequencing_type(
+    async def get_participant_ids_and_sequencing_group_ids_for_sequencing_type(
         self, sequencing_type: str
     ) -> tuple[set[ProjectId], dict[int, list[int]]]:
         """
@@ -252,7 +252,7 @@ async def create_sequencing_group(
         type_: str,
         technology: str,
         platform: str,
-        sequence_ids: list[int],
+        assay_ids: list[int],
         meta: dict = None,
         author: str = None,
         open_transaction=True,
@@ -319,16 +319,16 @@ async def create_sequencing_group(
                 _query,
                 {**values, 'author': author or self.author},
             )
-            sequence_insert_values = [
+            assay_id_insert_values = [
                 {
                     'seqgroup': id_of_seq_group,
                     'assayid': s,
                     'author': author or self.author,
                 }
-                for s in sequence_ids
+                for s in assay_ids
             ]
             await self.connection.execute_many(
-                _seqg_linker_query, sequence_insert_values
+                _seqg_linker_query, assay_id_insert_values
             )
 
             return id_of_seq_group
diff --git a/etl/endpoint/main.py b/etl/endpoint/main.py
index e56679bd0..7a53fca68 100644
--- a/etl/endpoint/main.py
+++ b/etl/endpoint/main.py
@@ -3,12 +3,12 @@
 import logging
 import os
 import uuid
-import functions_framework
+
 import flask
+import functions_framework
 import google.cloud.bigquery as bq
-from google.cloud import pubsub_v1
-
 from cpg_utils.cloud import email_from_id_token
+from google.cloud import pubsub_v1  # type: ignore
 
 BIGQUERY_TABLE = os.getenv('BIGQUERY_TABLE')
 PUBSUB_TOPIC = os.getenv('PUBSUB_TOPIC')
diff --git a/metamist/parser/generic_parser.py b/metamist/parser/generic_parser.py
index cfdf6926f..a31fea483 100644
--- a/metamist/parser/generic_parser.py
+++ b/metamist/parser/generic_parser.py
@@ -1,47 +1,45 @@
 # pylint: disable=too-many-lines,too-many-instance-attributes,too-many-locals,unused-argument,assignment-from-none,invalid-name,ungrouped-imports
-import json
-import sys
 import asyncio
 import csv
+import json
 import logging
 import os
 import re
+import sys
 from abc import abstractmethod
 from collections import defaultdict
+from functools import wraps
 from io import StringIO
 from typing import (
-    List,
+    Any,
+    Coroutine,
     Dict,
-    Union,
-    Optional,
-    Tuple,
+    Hashable,
+    Iterable,
+    Iterator,
+    List,
     Match,
-    Any,
+    Optional,
     Sequence,
-    TypeVar,
-    Iterator,
-    Coroutine,
     Set,
-    Iterable,
-    Hashable,
+    Tuple,
+    TypeVar,
+    Union,
 )
-from functools import wraps
 
 from cloudpathlib import AnyPath
 
-from metamist.graphql import query_async, gql
-from metamist.parser.cloudhelper import CloudHelper, group_by
-
-from metamist.apis import SampleApi, AssayApi, AnalysisApi, ParticipantApi
+from metamist.apis import AnalysisApi, AssayApi, ParticipantApi, SampleApi
+from metamist.graphql import gql, query_async
 from metamist.models import (
     Analysis,
     AnalysisStatus,
+    AssayUpsert,
     ParticipantUpsert,
     SampleUpsert,
     SequencingGroupUpsert,
-    AssayUpsert,
 )
-
+from metamist.parser.cloudhelper import CloudHelper, group_by
 
 # https://mypy.readthedocs.io/en/stable/runtime_troubles.html#using-new-additions-to-the-typing-module
 if sys.version_info >= (3, 8):
@@ -322,8 +320,8 @@ def __init__(
     def to_sm(self) -> AssayUpsert:
         """Convert to SM upsert model"""
         return AssayUpsert(
-            type=self.assay_type,
             id=self.internal_id,
+            type=self.assay_type,
             external_ids=self.external_ids,
             # sample_id=self.s,
             meta=self.meta,
@@ -1080,7 +1078,9 @@ async def add_analyses(self, analyses_to_add, external_to_internal_id_map):
             for external_id, analysis in chunked_analysis:
                 # TODO: resolve this external_to_internal_id_map
                 # this one is going to be slightly harder :
-                analysis.sequence_group_ids = [external_to_internal_id_map[external_id]]
+                analysis.sequencing_group_ids = [
+                    external_to_internal_id_map[external_id]
+                ]
                 promises.append(
                     analysisapi.create_analysis_async(
                         project=proj, analysis_model=analysis
diff --git a/models/base.py b/models/base.py
index 133671761..389c38a56 100644
--- a/models/base.py
+++ b/models/base.py
@@ -2,7 +2,7 @@
 
 # annotate any external objects that must be instantiated with this
 # type to force openapi generator to allow for Nones (it will actually allow Any)
-OpenApiGenNoneType = bytes
+OpenApiGenNoneType = bytes | None
 
 
 class SMBase(BaseModel):
diff --git a/models/models/__init__.py b/models/models/__init__.py
index 24069f708..37f1068a9 100644
--- a/models/models/__init__.py
+++ b/models/models/__init__.py
@@ -1,60 +1,61 @@
 from models.models.analysis import (
-    AnalysisInternal,
     Analysis,
+    AnalysisInternal,
     DateSizeModel,
-    SequencingGroupSizeModel,
     ProjectSizeModel,
+    SequencingGroupSizeModel,
 )
 from models.models.assay import (
-    AssayInternal,
-    AssayUpsertInternal,
     Assay,
+    AssayInternal,
     AssayUpsert,
+    AssayUpsertInternal,
 )
 from models.models.family import (
-    FamilySimpleInternal,
+    Family,
     FamilyInternal,
     FamilySimple,
-    Family,
+    FamilySimpleInternal,
     PedRowInternal,
 )
 from models.models.participant import (
-    ParticipantInternal,
+    NestedParticipant,
     NestedParticipantInternal,
-    ParticipantUpsertInternal,
     Participant,
-    NestedParticipant,
+    ParticipantInternal,
     ParticipantUpsert,
+    ParticipantUpsertInternal,
 )
 from models.models.project import Project
 from models.models.sample import (
-    SampleInternal,
+    NestedSample,
     NestedSampleInternal,
-    SampleUpsertInternal,
     Sample,
-    NestedSample,
+    SampleInternal,
     SampleUpsert,
+    SampleUpsertInternal,
 )
 from models.models.search import (
-    SearchResponseData,
+    ErrorResponse,
     FamilySearchResponseData,
     ParticipantSearchResponseData,
     SampleSearchResponseData,
-    ErrorResponse,
-    SearchResponse,
     SearchItem,
+    SearchResponse,
+    SearchResponseData,
+    SequencingGroupSearchResponseData,
 )
 from models.models.sequencing_group import (
-    SequencingGroupInternal,
+    NestedSequencingGroup,
     NestedSequencingGroupInternal,
-    SequencingGroupUpsertInternal,
     SequencingGroup,
-    NestedSequencingGroup,
+    SequencingGroupInternal,
     SequencingGroupUpsert,
+    SequencingGroupUpsertInternal,
 )
 from models.models.web import (
+    PagingLinks,
+    ProjectSummary,
     ProjectSummaryInternal,
     WebProject,
-    ProjectSummary,
-    PagingLinks,
 )
diff --git a/models/models/analysis.py b/models/models/analysis.py
index a88d6dd99..f35e95a48 100644
--- a/models/models/analysis.py
+++ b/models/models/analysis.py
@@ -15,7 +15,7 @@
 class AnalysisInternal(SMBase):
     """Model for Analysis"""
 
-    id: int | None
+    id: int | None = None
     type: str
     status: AnalysisStatus
     output: str = None
diff --git a/models/models/assay.py b/models/models/assay.py
index 6e17a6ef7..04658ad44 100644
--- a/models/models/assay.py
+++ b/models/models/assay.py
@@ -1,11 +1,8 @@
 import json
 from typing import Any
 
-from models.base import SMBase, OpenApiGenNoneType
-from models.utils.sample_id_format import (
-    sample_id_format,
-    sample_id_transform_to_raw,
-)
+from models.base import OpenApiGenNoneType, SMBase
+from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw
 
 
 class AssayInternal(SMBase):
@@ -104,12 +101,12 @@ def to_internal(self):
         _sample_id = None
         if self.sample_id:
             # but may be provided directly when inserting directly
-            _sample_id = sample_id_transform_to_raw(self.sample_id)
+            _sample_id = sample_id_transform_to_raw(self.sample_id)  # type: ignore
 
         return AssayUpsertInternal(
-            id=self.id,
-            type=self.type,
-            external_ids=self.external_ids,
-            sample_id=_sample_id,
-            meta=self.meta,
+            id=self.id,  # type: ignore
+            type=self.type,  # type: ignore
+            external_ids=self.external_ids,  # type: ignore
+            sample_id=_sample_id,  # type: ignore
+            meta=self.meta,  # type: ignore
         )
diff --git a/models/models/participant.py b/models/models/participant.py
index c72451274..ab843e343 100644
--- a/models/models/participant.py
+++ b/models/models/participant.py
@@ -1,14 +1,14 @@
 import json
 
 from db.python.utils import ProjectId
-from models.base import SMBase, OpenApiGenNoneType
+from models.base import OpenApiGenNoneType, SMBase
+from models.models.family import FamilySimple, FamilySimpleInternal
 from models.models.sample import (
-    SampleUpsertInternal,
-    SampleUpsert,
-    NestedSampleInternal,
     NestedSample,
+    NestedSampleInternal,
+    SampleUpsert,
+    SampleUpsertInternal,
 )
-from models.models.family import FamilySimple, FamilySimpleInternal
 
 
 class ParticipantInternal(SMBase):
@@ -135,12 +135,12 @@ class ParticipantUpsert(SMBase):
     def to_internal(self):
         """Convert to internal model, doesn't really do much"""
         p = ParticipantUpsertInternal(
-            id=self.id,
-            external_id=self.external_id,
-            reported_sex=self.reported_sex,
-            reported_gender=self.reported_gender,
-            karyotype=self.karyotype,
-            meta=self.meta,
+            id=self.id,  # type: ignore
+            external_id=self.external_id,  # type: ignore
+            reported_sex=self.reported_sex,  # type: ignore
+            reported_gender=self.reported_gender,  # type: ignore
+            karyotype=self.karyotype,  # type: ignore
+            meta=self.meta,  # type: ignore
         )
 
         if self.samples:
diff --git a/models/models/sample.py b/models/models/sample.py
index 94fd3a901..a41c06463 100644
--- a/models/models/sample.py
+++ b/models/models/sample.py
@@ -1,17 +1,14 @@
 import json
 
-from models.base import SMBase, OpenApiGenNoneType
-from models.models.assay import AssayUpsertInternal, AssayUpsert, AssayInternal, Assay
+from models.base import OpenApiGenNoneType, SMBase
+from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal
 from models.models.sequencing_group import (
-    SequencingGroupUpsert,
     NestedSequencingGroup,
-    SequencingGroupUpsertInternal,
     NestedSequencingGroupInternal,
+    SequencingGroupUpsert,
+    SequencingGroupUpsertInternal,
 )
-from models.utils.sample_id_format import (
-    sample_id_format,
-    sample_id_transform_to_raw,
-)
+from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw
 
 
 class SampleInternal(SMBase):
@@ -143,6 +140,7 @@ def to_internal(self):
             type=self.type,
             participant_id=self.participant_id,
             active=self.active,
+            author='<EXT-TO-INT-CONVERSION>',
         )
 
 
@@ -180,12 +178,12 @@ def to_internal(self) -> SampleUpsertInternal:
 
         sample_upsert = SampleUpsertInternal(
             id=_id,
-            external_id=self.external_id,
-            meta=self.meta,
-            project=self.project,
-            type=self.type,
-            participant_id=self.participant_id,
-            active=self.active,
+            external_id=self.external_id,  # type: ignore
+            meta=self.meta,  # type: ignore
+            project=self.project,  # type: ignore
+            type=self.type,  # type: ignore
+            participant_id=self.participant_id,  # type: ignore
+            active=self.active,  # type: ignore
         )
 
         if self.sequencing_groups:
diff --git a/models/models/sequencing_group.py b/models/models/sequencing_group.py
index 36e250c73..1b0bbd3f4 100644
--- a/models/models/sequencing_group.py
+++ b/models/models/sequencing_group.py
@@ -1,11 +1,11 @@
 import json
 
-from models.base import SMBase, OpenApiGenNoneType
-from models.models.assay import AssayUpsert, AssayUpsertInternal, Assay, AssayInternal
-from models.utils.sample_id_format import sample_id_transform_to_raw, sample_id_format
+from models.base import OpenApiGenNoneType, SMBase
+from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal
+from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw
 from models.utils.sequencing_group_id_format import (
-    sequencing_group_id_transform_to_raw,
     sequencing_group_id_format,
+    sequencing_group_id_transform_to_raw,
 )
 
 
@@ -60,6 +60,7 @@ def to_external(self):
             type=self.type,
             technology=self.technology,
             platform=self.platform,
+            external_ids=self.external_ids,
             meta=self.meta,
             sample_id=sample_id_format(self.sample_id),
             assays=[a.to_external() for a in self.assays or []],
@@ -141,6 +142,7 @@ class SequencingGroup(SMBase):
     sample_id: str
     external_ids: dict[str, str]
     archived: bool
+    assays: list[Assay]
 
 
 class NestedSequencingGroup(SMBase):
@@ -169,7 +171,7 @@ class SequencingGroupUpsert(SMBase):
     sample_id: str | OpenApiGenNoneType = None
     external_ids: dict[str, str] | OpenApiGenNoneType = None
 
-    assays: list[AssayUpsert] | None = None
+    assays: list[AssayUpsert] | OpenApiGenNoneType = None
 
     def to_internal(self) -> SequencingGroupUpsertInternal:
         """
@@ -185,15 +187,15 @@ def to_internal(self) -> SequencingGroupUpsertInternal:
 
         sg_internal = SequencingGroupUpsertInternal(
             id=_id,
-            type=self.type,
-            technology=self.technology,
-            platform=self.platform.lower() if self.platform else None,
-            meta=self.meta,
+            type=self.type,  # type: ignore
+            technology=self.technology,  # type: ignore
+            platform=self.platform.lower() if self.platform else None,  # type: ignore
+            meta=self.meta,  # type: ignore
             sample_id=_sample_id,
-            external_ids=self.external_ids or {},
+            external_ids=self.external_ids or {},  # type: ignore
         )
 
         if self.assays is not None:
-            sg_internal.assays = [a.to_internal() for a in self.assays]
+            sg_internal.assays = [a.to_internal() for a in self.assays]  # type: ignore
 
         return sg_internal
diff --git a/mypy.ini b/mypy.ini
index 3403e0acb..418e757ed 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -4,10 +4,15 @@
 python_version = 3.10
 ; warn_return_any = True
 ; warn_unused_configs = True
+
 exclude = (build|update_sample_status)
+
 # Per-module options:
 plugins = strawberry.ext.mypy_plugin
 
+[mypy.db]
+disable_error_code = operator
+
 [mypy-sample_metadata.*]
 ignore_errors = true
 
@@ -44,3 +49,5 @@ ignore_missing_imports=True
 ignore_missing_imports = True
 [mypy-graphql]
 ignore_missing_imports = True
+[mypy-strawberry]
+ignore_errors = True
diff --git a/regenerate_api.py b/regenerate_api.py
index 07112163e..8e6a9fece 100755
--- a/regenerate_api.py
+++ b/regenerate_api.py
@@ -184,7 +184,7 @@ def generate_schema_file():
     Generate schema file and place in the metamist/graphql/ directory
     """
     command = ['strawberry', 'export-schema', 'api.graphql.schema:schema']
-    schema = subprocess.check_output(command, stderr=subprocess.STDOUT).decode()
+    schema = subprocess.check_output(command).decode()
 
     with open(os.path.join(MODULE_DIR, 'graphql/schema.graphql'), 'w+') as f:
         f.write(schema)
@@ -317,7 +317,7 @@ def main():
         while (not check_if_server_is_accessible()) and startup_tries > 0:
             startup_tries -= 1
             logger.info(
-                f'Dockerised API server is not ready yet. '
+                'Dockerised API server is not ready yet. '
                 + f'Retrying in {wait_time_in_seconds} seconds. '
                 + f'Remaining tries: {startup_tries}'
             )
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 1113f5753..0202733d1 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -9,3 +9,5 @@ pre-commit
 pylint
 testcontainers[mariadb]
 types-PyMySQL
+# some strawberry dependency
+strawberry-graphql[debug-server]==0.206.0
diff --git a/requirements.txt b/requirements.txt
index b5cc04e02..2affefbb3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ google-cloud-logging==2.7.0
 google-cloud-storage==1.43.0
 uvicorn==0.18.3
 fastapi[all]==0.85.1
-strawberry-graphql[fastapi]==0.177.1
+strawberry-graphql[fastapi]==0.206.0
 python-multipart==0.0.5
 databases[mysql]==0.6.1
 SQLAlchemy==1.4.41
diff --git a/scripts/20230420_sequencinggroupmigration.py b/scripts/20230420_sequencinggroupmigration.py
index a3d2a705a..3feeecdd5 100644
--- a/scripts/20230420_sequencinggroupmigration.py
+++ b/scripts/20230420_sequencinggroupmigration.py
@@ -27,7 +27,7 @@
 import json
 from collections import defaultdict
 from textwrap import dedent
-from typing import Any, List, Dict, Tuple
+from typing import Any, Dict, List, Tuple
 
 import click
 from databases import Database
@@ -338,7 +338,7 @@ async def migrate_analyses(connection: Database, dry_run: bool = True):
     )
     analysis_samples = await connection.fetch_all(analyses_query)
 
-    sequence_group_ids_of_duplicate_samples_query = dedent(
+    sequencing_group_ids_of_duplicate_samples_query = dedent(
         """
 SELECT sg.sample_id, sg.id, sg.type
 FROM sequencing_group sg
@@ -352,13 +352,13 @@ async def migrate_analyses(connection: Database, dry_run: bool = True):
 ORDER BY sg.sample_id DESC;
     """
     )
-    sequence_group_ids_of_duplicate_samples = await connection.fetch_all(
-        sequence_group_ids_of_duplicate_samples_query
+    sequencing_group_ids_of_duplicate_samples = await connection.fetch_all(
+        sequencing_group_ids_of_duplicate_samples_query
     )
     duplicate_sg_id_map: Dict[
         SampleId, Dict[SequenceType, SequenceGroupId]
     ] = defaultdict(dict)
-    for row in sequence_group_ids_of_duplicate_samples:
+    for row in sequencing_group_ids_of_duplicate_samples:
         duplicate_sg_id_map[row['sample_id']][row['type']] = row['id']
 
     values_to_insert: List[Tuple[int, SequenceGroupId]] = []
diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py
index fd1e74381..c14f13d79 100755
--- a/scripts/create_test_subset.py
+++ b/scripts/create_test_subset.py
@@ -1,5 +1,9 @@
 #!/usr/bin/env python3
-# pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument,too-many-arguments
+# type: ignore
+# pylint: skip-file
+
+
+# # pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument,too-many-arguments
 
 """ Example Invocation
 
@@ -11,7 +15,7 @@
 This example will populate acute-care-test with the metamist data for 4 families.
 """
 
-from typing import Optional
+import csv
 import logging
 import os
 import random
@@ -19,25 +23,19 @@
 import traceback
 import typing
 from collections import Counter
-import csv
+from typing import Optional
 
 import click
 from google.cloud import storage
 
 from metamist import exceptions
-from metamist.apis import (
-    AnalysisApi,
-    AssayApi,
-    SampleApi,
-    FamilyApi,
-    ParticipantApi,
-)
+from metamist.apis import AnalysisApi, AssayApi, FamilyApi, ParticipantApi, SampleApi
 from metamist.models import (
-    BodyGetAssaysByCriteria,
-    AssayUpsert,
-    SampleUpsert,
     Analysis,
     AnalysisStatus,
+    AssayUpsert,
+    BodyGetAssaysByCriteria,
+    SampleUpsert,
 )
 
 logger = logging.getLogger(__file__)
@@ -313,7 +311,7 @@ def main(
                 )
                 logger.info(f'Creating {a_type} analysis entry in test')
                 aapi.create_analysis(project=target_project, analysis=am)
-        logger.info(f'-')
+        logger.info('-')
 
 
 def transfer_families(
diff --git a/scripts/parse_ont_sheet.py b/scripts/parse_ont_sheet.py
index 82998215d..a157e0ff6 100644
--- a/scripts/parse_ont_sheet.py
+++ b/scripts/parse_ont_sheet.py
@@ -1,15 +1,15 @@
 #!/usr/bin/env python3
 # pylint: disable=too-many-instance-attributes,too-many-locals,unused-argument,wrong-import-order,unused-argument
-from typing import List
 import logging
+from typing import List
 
 import click
 
-from metamist.parser.generic_parser import ParsedSample, ParsedSequencingGroup
 from metamist.parser.generic_metadata_parser import (
-    run_as_sync,
     GenericMetadataParser,
+    run_as_sync,
 )
+from metamist.parser.generic_parser import ParsedSample, ParsedSequencingGroup
 
 logger = logging.getLogger(__file__)
 logger.addHandler(logging.StreamHandler())
@@ -97,14 +97,14 @@ def parse_fastqs_structure(fastqs) -> List[List[str]]:
         return [fastqs]
 
     async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup]:
-        sequence_groups = await super().group_assays(sample)
+        sequencing_groups = await super().group_assays(sample)
 
-        for sequence_group in sequence_groups:
+        for sequencing_group in sequencing_groups:
             failed_fastqs: list[str] = []
 
-            for r in sequence_group.rows:
+            for r in sequencing_group.rows:
                 parsed_failed_fastqs = await self.parse_files(
-                    sequence_group.sample.external_sid, r[Columns.FAIL_FASTQ_FILENAME]
+                    sequencing_group.sample.external_sid, r[Columns.FAIL_FASTQ_FILENAME]
                 )
                 if 'reads' not in parsed_failed_fastqs:
                     raise ValueError(
@@ -120,9 +120,9 @@ async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup
                     )
                 failed_fastqs.extend(parsed_failed_fastq_reads['fastq'])
 
-            sequence_group.meta['failed_reads'] = failed_fastqs
+            sequencing_group.meta['failed_reads'] = failed_fastqs
 
-        return sequence_groups
+        return sequencing_groups
 
 
 @click.command()
diff --git a/scripts/parse_ped.py b/scripts/parse_ped.py
index 894d0c241..f587e483b 100644
--- a/scripts/parse_ped.py
+++ b/scripts/parse_ped.py
@@ -1,12 +1,9 @@
 """ A really simple script to import a pedigree file """
 
 import click
+from cloudpathlib import AnyPath
 
-from cloudpathlib import CloudPath
-
-from metamist.apis import (
-    FamilyApi,
-)
+from metamist.apis import FamilyApi
 
 
 @click.command()
@@ -17,7 +14,8 @@ def main(ped_file_path: str, project: str):
 
     fapi = FamilyApi()
 
-    with CloudPath(ped_file_path).open() as ped_file:
+    # pylint: disable=no-member
+    with AnyPath(ped_file_path).open() as ped_file:  # type: ignore
         fapi.import_pedigree(
             file=ped_file,
             has_header=True,
diff --git a/scripts/sync_seqr.py b/scripts/sync_seqr.py
index 67967e437..12be0fd93 100644
--- a/scripts/sync_seqr.py
+++ b/scripts/sync_seqr.py
@@ -1,27 +1,28 @@
 # pylint: disable=missing-timeout,unnecessary-lambda-assignment,import-outside-toplevel,too-many-locals
 import asyncio
-import os
-import re
-import json
 import datetime
+import json
 import logging
+import os
+import re
 import traceback
 from collections import defaultdict
-from typing import Any
 from io import StringIO
+from typing import Any
 
 import aiohttp
 import yaml
 from cloudpathlib import AnyPath
-from metamist.graphql import query_async
-from metamist.model.analysis_status import AnalysisStatus
-from metamist.model.export_type import ExportType
-from metamist.model.analysis_query_model import AnalysisQueryModel
+
 from metamist.apis import (
-    SeqrApi,
-    ProjectApi,
     AnalysisApi,
+    ProjectApi,
+    SeqrApi,
 )
+from metamist.graphql import query_async
+from metamist.model.analysis_query_model import AnalysisQueryModel
+from metamist.model.analysis_status import AnalysisStatus
+from metamist.model.export_type import ExportType
 from metamist.parser.generic_parser import chunk
 
 loggers_to_silence = [
@@ -410,7 +411,7 @@ async def update_es_index(
 
     fn_path = os.path.join(MAP_LOCATION, filename)
     # pylint: disable=no-member
-    with AnyPath(fn_path).open('w+') as f:
+    with AnyPath(fn_path).open('w+') as f:  # type: ignore
         f.write('\n'.join(rows_to_write))
 
     if check_metamist:  # len(es_index_analyses) > 0:
@@ -678,7 +679,9 @@ def sync_all_datasets(sequencing_type: str, ignore: set[str] = None):
             continue
         try:
             el.run_until_complete(
-                sync_dataset_async(project_name, seqr_guid, sequencing_type=sequencing_type)
+                sync_dataset_async(
+                    project_name, seqr_guid, sequencing_type=sequencing_type
+                )
             )
         except Exception as e:  # pylint: disable=broad-exception-caught
             print(
diff --git a/test/test_analysis.py b/test/test_analysis.py
index ec03f0702..ef7e1acac 100644
--- a/test/test_analysis.py
+++ b/test/test_analysis.py
@@ -1,21 +1,20 @@
 # pylint: disable=invalid-overridden-method
-from datetime import timedelta, datetime
-
+from datetime import datetime, timedelta
 from test.testbase import DbIsolatedTest, run_as_sync
 
-from db.python.tables.analysis import AnalysisFilter
-from db.python.utils import GenericFilter
-from db.python.layers.assay import AssayLayer
 from db.python.layers.analysis import AnalysisLayer
+from db.python.layers.assay import AssayLayer
 from db.python.layers.sample import SampleLayer
 from db.python.layers.sequencing_group import SequencingGroupLayer
+from db.python.tables.analysis import AnalysisFilter
+from db.python.utils import GenericFilter
+from models.enums import AnalysisStatus
 from models.models import (
     AnalysisInternal,
     AssayUpsertInternal,
-    SequencingGroupUpsertInternal,
     SampleUpsertInternal,
+    SequencingGroupUpsertInternal,
 )
-from models.enums import AnalysisStatus
 
 
 class TestAnalysis(DbIsolatedTest):
@@ -132,7 +131,7 @@ async def test_get_analysis(self):
             AnalysisInternal(
                 type='analysis-runner',
                 status=AnalysisStatus.UNKNOWN,
-                sequence_group_ids=[],
+                sequencing_group_ids=[],
                 meta={},
             )
         )
@@ -148,7 +147,7 @@ async def test_get_analysis(self):
                 id=a_id,
                 type='analysis-runner',
                 status=AnalysisStatus.UNKNOWN,
-                sequence_group_ids=[],
+                sequencing_group_ids=[],
                 output=None,
                 timestamp_completed=None,
                 project=1,
diff --git a/test/test_assay.py b/test/test_assay.py
index af641a278..cde5f242a 100644
--- a/test/test_assay.py
+++ b/test/test_assay.py
@@ -1,10 +1,11 @@
 from test.testbase import DbIsolatedTest, run_as_sync
+
 from pymysql.err import IntegrityError
 
 from db.python.connect import NotFoundError
-from db.python.layers.sample import SampleLayer
-from db.python.layers.assay import AssayLayer
 from db.python.enum_tables import AssayTypeTable
+from db.python.layers.assay import AssayLayer
+from db.python.layers.sample import SampleLayer
 from db.python.tables.assay import AssayFilter
 from db.python.utils import GenericFilter
 from models.models.assay import AssayUpsertInternal
@@ -200,11 +201,20 @@ async def test_getting_assay_by_external_id(self):
             )
         )
 
-        fquery_1 = AssayFilter(external_id='SEQ01', project=self.project_id)
+        fquery_1 = AssayFilter(
+            external_id=GenericFilter(eq='SEQ01'),
+            project=GenericFilter(eq=self.project_id),
+        )
         self.assertEqual(seq1.id, (await self.assaylayer.query(fquery_1))[0].id)
-        fquery_2 = AssayFilter(external_id='EXT_SEQ1', project=self.project_id)
+        fquery_2 = AssayFilter(
+            external_id=GenericFilter(eq='EXT_SEQ1'),
+            project=GenericFilter(eq=self.project_id),
+        )
         self.assertEqual(seq1.id, (await self.assaylayer.query(fquery_2))[0].id)
-        fquery_3 = AssayFilter(external_id='SEQ02', project=self.project_id)
+        fquery_3 = AssayFilter(
+            external_id=GenericFilter(eq='SEQ02'),
+            project=GenericFilter(eq=self.project_id),
+        )
         self.assertEqual(seq2.id, (await self.assaylayer.query(fquery_3))[0].id)
 
     @run_as_sync
@@ -285,20 +295,22 @@ async def search_result_to_ids(filter_: AssayFilter):
         )
         self.assertSetEqual(
             {seq1_id, seq2_id},
-            await search_result_to_ids(AssayFilter(meta={'common': 'common'})),
+            await search_result_to_ids(
+                AssayFilter(meta={'common': GenericFilter(eq='common')})
+            ),
         )
 
         # sample meta
         self.assertSetEqual(
             {seq1_id, seq2_id},
             await search_result_to_ids(
-                AssayFilter(sample_meta={'collection-year': '2022'})
+                AssayFilter(sample_meta={'collection-year': GenericFilter(eq='2022')})
             ),
         )
         self.assertSetEqual(
             set(),
             await search_result_to_ids(
-                AssayFilter(sample_meta={'unknown_key': '2022'})
+                AssayFilter(sample_meta={'unknown_key': GenericFilter(eq='2022')})
             ),
         )
 
@@ -315,7 +327,7 @@ async def search_result_to_ids(filter_: AssayFilter):
             {seq2_id},
             await search_result_to_ids(
                 AssayFilter(
-                    sample_meta={'collection-year': '2022'},
+                    sample_meta={'collection-year': GenericFilter(eq='2022')},
                     external_id=GenericFilter(in_=['SEQ02']),
                 )
             ),
diff --git a/test/test_generic_auditor.py b/test/test_generic_auditor.py
index 4fb526dc9..21ef3f6b5 100644
--- a/test/test_generic_auditor.py
+++ b/test/test_generic_auditor.py
@@ -1,16 +1,16 @@
-from collections import namedtuple
 import unittest
-from unittest.mock import MagicMock, patch
+import unittest.mock
+from collections import namedtuple
+
 from metamist.audit.generic_auditor import GenericAuditor
 
-# pylint: disable=dangerous-default-value
 # noqa: B006
 
 
 class TestGenericAuditor(unittest.TestCase):
     """Test the audit helper functions"""
 
-    @patch('metamist.audit.generic_auditor.query')
+    @unittest.mock.patch('metamist.audit.generic_auditor.query')
     def test_get_participant_data_for_dataset(self, mock_query):
         """Only participants with a non-empty samples field should be returned"""
         auditor = GenericAuditor(
@@ -360,7 +360,7 @@ def test_get_sequence_mapping_warning_logging(self):
                 log.output[0],
             )
 
-    @patch('metamist.audit.generic_auditor.query')
+    @unittest.mock.patch('metamist.audit.generic_auditor.query')
     def test_query_genome_analyses_crams(self, mock_query):
         """Test that only the genome analysis crams for a sample map dictionary are returned"""
         auditor = GenericAuditor(
@@ -412,7 +412,7 @@ def test_query_genome_analyses_crams(self, mock_query):
 
         self.assertDictEqual(test_result, expected_result)
 
-    @patch('metamist.audit.generic_auditor.query')
+    @unittest.mock.patch('metamist.audit.generic_auditor.query')
     def test_query_genome_and_exome_analyses_crams(self, mock_query):
         """Test that both the genome and exome analysis crams for a sample map dictionary are returned"""
         auditor = GenericAuditor(
@@ -472,7 +472,7 @@ def test_query_genome_and_exome_analyses_crams(self, mock_query):
 
         self.assertDictEqual(test_result, expected_result)
 
-    @patch('metamist.audit.generic_auditor.query')
+    @unittest.mock.patch('metamist.audit.generic_auditor.query')
     def test_query_broken_analyses_crams(self, mock_query):
         """
         All analysis crams must have 'sequencing_type' meta field,
@@ -506,7 +506,7 @@ def test_query_broken_analyses_crams(self, mock_query):
                 assay_sg_id_map={1: 'CPG123'}
             )
 
-    @patch('metamist.audit.generic_auditor.query')
+    @unittest.mock.patch('metamist.audit.generic_auditor.query')
     def test_query_analyses_crams_warning(self, mock_query):
         """Warn if the sample_ids field is absent and the sample meta field is used instead"""
         auditor = GenericAuditor(
@@ -541,7 +541,7 @@ def test_query_analyses_crams_warning(self, mock_query):
                 log.output[0],
             )
 
-    @patch('metamist.audit.generic_auditor.query')
+    @unittest.mock.patch('metamist.audit.generic_auditor.query')
     def test_analyses_for_sgs_without_crams(self, mock_query):
         """Log any analyses found for samples without completed CRAMs"""
         auditor = GenericAuditor(
@@ -569,7 +569,9 @@ def test_analyses_for_sgs_without_crams(self, mock_query):
         }
 
         with self.assertLogs(level='WARNING') as log:
-            _ = auditor.analyses_for_sgs_without_crams(sgs_without_crams)
+            # catch the warning logs from here and check below
+            auditor.analyses_for_sgs_without_crams(sgs_without_crams)
+
             self.assertEqual(len(log.output), 8)  # 8 analysis types checked
             self.assertEqual(len(log.records), 8)
             self.assertIn(
@@ -577,7 +579,21 @@ def test_analyses_for_sgs_without_crams(self, mock_query):
                 log.output[0],
             )
 
-    def test_get_complete_and_incomplete_sgs(self):
+    @unittest.mock.patch(
+        'metamist.audit.generic_auditor.GenericAuditor.get_gcs_bucket_subdirs_to_search'
+    )
+    @unittest.mock.patch(
+        'metamist.audit.generic_auditor.GenericAuditor.find_files_in_gcs_buckets_subdirs'
+    )
+    @unittest.mock.patch(
+        'metamist.audit.generic_auditor.GenericAuditor.analyses_for_sgs_without_crams'
+    )
+    def test_get_complete_and_incomplete_sgs(
+        self,
+        mock_analyses_for_sgs_without_crams,
+        mock_find_files_in_gcs_buckets_subdirs,
+        mock_get_gcs_bucket_subdirs,
+    ):
         """Report on samples that have completed CRAMs and those that dont"""
         assay_sg_id_map = {  # noqa: B006
             1: 'CPG123',
@@ -591,17 +607,15 @@ def test_get_complete_and_incomplete_sgs(self):
         auditor = GenericAuditor(
             dataset='dev', sequencing_type=['genome', 'exome'], file_types=('fastq',)
         )
-        auditor.get_gcs_bucket_subdirs_to_search = MagicMock()
-        auditor.find_files_in_gcs_buckets_subdirs = MagicMock()
-        auditor.analyses_for_sgs_without_crams = MagicMock()
 
-        auditor.get_gcs_bucket_subdirs_to_search.return_value = {
+        mock_get_gcs_bucket_subdirs.return_value = {
             'cpg-dataset-main': ['cram', 'exome/cram']
         }
-        auditor.find_files_in_gcs_buckets_subdirs.return_value = [
+        mock_find_files_in_gcs_buckets_subdirs.return_value = [
             'gs://cpg-dataset-main/cram/CPG123.cram',
             'gs://cpg-dataset-main/exome/cram/CPG456.cram',
         ]
+        mock_analyses_for_sgs_without_crams.return_value = None
 
         result = auditor.get_complete_and_incomplete_sgs(
             assay_sg_id_map=assay_sg_id_map,
@@ -615,8 +629,16 @@ def test_get_complete_and_incomplete_sgs(self):
 
         self.assertDictEqual(result, expected_result)
 
-    async def test_check_for_uningested_or_moved_assays(self):
-        """Test 2 ingested reads, one ingested and moved read, and one uningested read"""
+    @unittest.mock.patch('metamist.audit.generic_auditor.GenericAuditor.file_size')
+    @unittest.mock.patch(
+        'metamist.audit.generic_auditor.GenericAuditor.find_sequence_files_in_gcs_bucket'
+    )
+    async def test_check_for_uningested_or_moved_assays(
+        self, mock_find_sequence_files_in_gcs_bucket, mock_file_size
+    ):
+        """
+        Test 2 ingested reads, one ingested and moved read, and one uningested read
+        """
         auditor = GenericAuditor(
             dataset='dev', sequencing_type=['genome'], file_types=('fastq',)
         )
@@ -627,16 +649,14 @@ async def test_check_for_uningested_or_moved_assays(self):
         sg_sample_id_map = {'CPG123': 'EXT123'}
         assay_sg_id_map = {1: 'CPG123'}
         sample_internal_external_id_map = {'CPG123': 'EXT123'}
-        auditor.find_sequence_files_in_gcs_bucket = MagicMock()
-        auditor.find_sequence_files_in_gcs_bucket.return_value = [
+        mock_find_sequence_files_in_gcs_bucket.return_value = [
             'read1.fq',
             'read2.fq',
             'dir2/read3.fq',
             'read4.fq',
         ]
 
-        auditor.file_size = MagicMock()
-        auditor.file_size.return_value = 12
+        mock_file_size.return_value = 12
 
         (
             uningested_sequence_paths,
diff --git a/test/test_generic_filters.py b/test/test_generic_filters.py
index 343047c96..cc01e7f80 100644
--- a/test/test_generic_filters.py
+++ b/test/test_generic_filters.py
@@ -15,12 +15,6 @@ class GenericFilterTest(GenericFilterModel):
 class TestGenericFilters(unittest.TestCase):
     """Test generic filters SQL generation"""
 
-    def test_post_init_correction(self):
-        """Test that the post init correction works"""
-        filter_ = GenericFilterTest(test_string='test')
-        self.assertIsInstance(filter_.test_string, GenericFilter)
-        self.assertEqual(filter_.test_string.eq, 'test')
-
     def test_basic_no_override(self):
         """Test that the basic filter converts to SQL as expected"""
         filter_ = GenericFilterTest(test_string=GenericFilter(eq='test'))
diff --git a/test/test_graphql.py b/test/test_graphql.py
index 95b817931..c61eed238 100644
--- a/test/test_graphql.py
+++ b/test/test_graphql.py
@@ -1,20 +1,19 @@
 from test.testbase import DbIsolatedTest, run_as_sync
+
 from graphql.error import GraphQLError, GraphQLSyntaxError
 
 import api.graphql.schema
-from db.python.layers import ParticipantLayer, AnalysisLayer
+from db.python.layers import AnalysisLayer, ParticipantLayer
+from metamist.graphql import configure_sync_client, gql, validate
+from models.enums import AnalysisStatus
 from models.models import (
-    SampleUpsertInternal,
+    AnalysisInternal,
+    AssayUpsertInternal,
     ParticipantUpsertInternal,
+    SampleUpsertInternal,
     SequencingGroupUpsertInternal,
-    AssayUpsertInternal,
-    AnalysisInternal,
 )
 from models.utils.sequencing_group_id_format import sequencing_group_id_format
-from models.enums import AnalysisStatus
-
-from metamist.graphql import gql, validate, configure_sync_client
-
 
 default_assay_meta = {
     'sequencing_type': 'genome',
@@ -24,7 +23,6 @@
 
 
 def _get_single_participant_upsert():
-
     return ParticipantUpsertInternal(
         external_id='Demeter',
         meta={},
@@ -43,20 +41,20 @@ def _get_single_participant_upsert():
                                 type='sequencing',
                                 meta={
                                     'reads': [
-                                            {
-                                                'basename': 'sample_id001.filename-R1.fastq.gz',
-                                                'checksum': None,
-                                                'class': 'File',
-                                                'location': '/path/to/sample_id001.filename-R1.fastq.gz',
-                                                'size': 111,
-                                            },
-                                            {
-                                                'basename': 'sample_id001.filename-R2.fastq.gz',
-                                                'checksum': None,
-                                                'class': 'File',
-                                                'location': '/path/to/sample_id001.filename-R2.fastq.gz',
-                                                'size': 111,
-                                            },
+                                        {
+                                            'basename': 'sample_id001.filename-R1.fastq.gz',
+                                            'checksum': None,
+                                            'class': 'File',
+                                            'location': '/path/to/sample_id001.filename-R1.fastq.gz',
+                                            'size': 111,
+                                        },
+                                        {
+                                            'basename': 'sample_id001.filename-R2.fastq.gz',
+                                            'checksum': None,
+                                            'class': 'File',
+                                            'location': '/path/to/sample_id001.filename-R2.fastq.gz',
+                                            'size': 111,
+                                        },
                                     ],
                                     'reads_type': 'fastq',
                                     'batch': 'M001',
@@ -114,7 +112,7 @@ def test_validate_provided_schema(self):
         (strawberry has an as_str() method)
         """
         client = configure_sync_client(
-            schema=api.graphql.schema.schema.as_str(), auth_token='FAKE'
+            schema=api.graphql.schema.schema.as_str(), auth_token='FAKE'  # type: ignore
         )
         validate(TEST_QUERY, client=client)
 
diff --git a/test/test_import_individual_metadata.py b/test/test_import_individual_metadata.py
index ae51f7a20..adb65622e 100644
--- a/test/test_import_individual_metadata.py
+++ b/test/test_import_individual_metadata.py
@@ -1,5 +1,7 @@
 from test.testbase import DbIsolatedTest, run_as_sync
 
+from databases.interfaces import Record
+
 from db.python.layers.participant import ParticipantLayer
 from models.models.participant import ParticipantUpsertInternal
 
@@ -21,20 +23,22 @@ async def test_import_many_hpo_terms(self):
             'HPO Term 3',
             'HPO Term 20',
         ]
-        rows = [['TP01', 'HP:0000001', 'HP:0000002', 'HP:0000003', 'HP:0000004']]
+        rows_to_insert = [
+            ['TP01', 'HP:0000001', 'HP:0000002', 'HP:0000003', 'HP:0000004']
+        ]
 
-        await pl.generic_individual_metadata_importer(headers, rows)
+        await pl.generic_individual_metadata_importer(headers, rows_to_insert)
 
-        rows = list(
+        db_rows: list[Record] = list(
             await self.connection.connection.fetch_all(
                 'SELECT participant_id, description, value FROM participant_phenotypes'
             )
         )
 
-        self.assertEqual(1, len(rows))
-        self.assertEqual('HPO Terms (present)', rows[0]['description'])
+        self.assertEqual(1, len(db_rows))
+        self.assertEqual('HPO Terms (present)', db_rows[0]['description'])
         self.assertEqual(
-            '"HP:0000001,HP:0000002,HP:0000003,HP:0000004"', rows[0]['value']
+            '"HP:0000001,HP:0000002,HP:0000003,HP:0000004"', db_rows[0]['value']
         )
 
     @run_as_sync
@@ -50,12 +54,12 @@ async def test_import_basic_metadata(self):
         )
 
         headers = ['Individual ID', 'HPO Term 20', 'Age of Onset']
-        rows = [
+        rows_to_insert = [
             ['TP01', 'HP:0000020', 'Congenital'],
             ['TP02', 'HP:00000021; HP:023', 'Infantile'],
         ]
 
-        await pl.generic_individual_metadata_importer(headers, rows)
+        await pl.generic_individual_metadata_importer(headers, rows_to_insert)
 
         rows = list(
             await self.connection.connection.fetch_all(
diff --git a/test/test_parse_generic_metadata.py b/test/test_parse_generic_metadata.py
index ed95a4fbc..ebcc5ec75 100644
--- a/test/test_parse_generic_metadata.py
+++ b/test/test_parse_generic_metadata.py
@@ -1,33 +1,31 @@
 import unittest
 from datetime import datetime
 from io import StringIO
+from test.testbase import DbIsolatedTest, run_as_sync
 from unittest.mock import patch
 
-from test.testbase import run_as_sync, DbIsolatedTest
-
 import api.graphql.schema
 from db.python.layers import ParticipantLayer
+from metamist.graphql import configure_sync_client, validate
+from metamist.parser.generic_metadata_parser import GenericMetadataParser
+from metamist.parser.generic_parser import (
+    QUERY_MATCH_ASSAYS,
+    QUERY_MATCH_PARTICIPANTS,
+    QUERY_MATCH_SAMPLES,
+    QUERY_MATCH_SEQUENCING_GROUPS,
+    ParsedParticipant,
+    ParsedSample,
+    ParsedSequencingGroup,
+)
 from models.models import (
+    AssayUpsertInternal,
     ParticipantUpsertInternal,
     SampleUpsertInternal,
     SequencingGroupUpsertInternal,
-    AssayUpsertInternal,
 )
 from models.utils.sample_id_format import sample_id_format
 from models.utils.sequencing_group_id_format import sequencing_group_id_format
 
-from metamist.graphql import validate, configure_sync_client
-from metamist.parser.generic_parser import (
-    ParsedParticipant,
-    ParsedSample,
-    QUERY_MATCH_PARTICIPANTS,
-    QUERY_MATCH_SAMPLES,
-    QUERY_MATCH_SEQUENCING_GROUPS,
-    QUERY_MATCH_ASSAYS,
-    ParsedSequencingGroup,
-)
-from metamist.parser.generic_metadata_parser import GenericMetadataParser
-
 
 def _get_basic_participant_to_upsert():
     default_assay_meta = {
@@ -96,7 +94,7 @@ def test_queries(self):
 
         # only need to apply schema to the first client to create, then it gets cached
         client = configure_sync_client(
-            schema=api.graphql.schema.schema.as_str(), auth_token='FAKE'
+            schema=api.graphql.schema.schema.as_str(), auth_token='FAKE'  # type: ignore
         )
         validate(QUERY_MATCH_PARTICIPANTS)
         validate(QUERY_MATCH_SAMPLES, client=client)
@@ -332,11 +330,11 @@ async def test_rows_with_participants(self, mock_graphql_query):
 
         # Call generic parser
         file_contents = '\n'.join(rows)
-        summary, participants = await parser.parse_manifest(
+        summary, prows = await parser.parse_manifest(
             StringIO(file_contents), delimiter='\t', dry_run=True
         )
 
-        participants: list[ParsedParticipant] = participants
+        participants: list[ParsedParticipant] = prows
 
         self.assertEqual(3, summary['participants']['insert'])
         self.assertEqual(0, summary['participants']['update'])
@@ -749,7 +747,9 @@ async def test_matching_sequencing_groups_and_assays(
         mock_datetime_added.return_value = datetime.fromisoformat('2022-02-02T22:22:22')
 
         player = ParticipantLayer(self.connection)
-        participant = await player.upsert_participant(_get_basic_participant_to_upsert())
+        participant = await player.upsert_participant(
+            _get_basic_participant_to_upsert()
+        )
 
         filenames = [
             'sample_id001.filename-R1.fastq.gz',
diff --git a/test/test_parse_ont_processor.py b/test/test_parse_ont_processor.py
index c14d95e73..a3d301754 100644
--- a/test/test_parse_ont_processor.py
+++ b/test/test_parse_ont_processor.py
@@ -1,8 +1,7 @@
 import unittest
 from io import StringIO
-from unittest.mock import patch
-
 from test.testbase import run_as_sync
+from unittest.mock import patch
 
 from scripts.process_ont_products import OntProductParser
 
@@ -36,7 +35,7 @@ async def test_single_row_all_files_exist(
             dry_run=True,
         )
 
-        parser.skip_checking_gcs_objects = True
+        # parser.skip_checking_gcs_objects = True
         fs = [
             'Sample01.bam',
             'Sample01.sv.vcf.gz',
@@ -44,7 +43,7 @@ async def test_single_row_all_files_exist(
             'Sample01.indels.vcf.gz',
         ]
         parser.filename_map = {k: 'gs://BUCKET/FAKE/' + k for k in fs}
-        parser.skip_checking_gcs_objects = True
+        # parser.skip_checking_gcs_objects = True
 
         file_contents = '\n'.join(rows)
         analyses = await parser.parse_manifest(
diff --git a/test/test_parse_ont_sheet.py b/test/test_parse_ont_sheet.py
index 72b1acdc0..534bb50f1 100644
--- a/test/test_parse_ont_sheet.py
+++ b/test/test_parse_ont_sheet.py
@@ -1,11 +1,10 @@
 from io import StringIO
+from test.testbase import DbIsolatedTest, run_as_sync
 from unittest.mock import patch
 
-from test.testbase import run_as_sync, DbIsolatedTest
-
 from db.python.layers import ParticipantLayer
-from models.models import ParticipantUpsertInternal, SampleUpsertInternal
 from metamist.parser.generic_parser import ParsedParticipant
+from models.models import ParticipantUpsertInternal, SampleUpsertInternal
 from scripts.parse_ont_sheet import OntParser
 
 
@@ -125,6 +124,6 @@ async def test_simple_sheet(self, mock_graphql_query):
             ],
         }
         self.maxDiff = None
-        sequence_group = participants[0].samples[0].sequencing_groups[0]
-        self.assertDictEqual(seqgroup_meta, sequence_group.meta)
-        self.assertDictEqual(meta_dict, sequence_group.assays[0].meta)
+        sequencing_group = participants[0].samples[0].sequencing_groups[0]
+        self.assertDictEqual(seqgroup_meta, sequencing_group.meta)
+        self.assertDictEqual(meta_dict, sequencing_group.assays[0].meta)
diff --git a/test/test_pedigree.py b/test/test_pedigree.py
index 77d395863..8966c8cc3 100644
--- a/test/test_pedigree.py
+++ b/test/test_pedigree.py
@@ -1,9 +1,8 @@
 from test.testbase import DbIsolatedTest, run_as_sync
 
-from models.models.participant import ParticipantUpsertInternal
-
 from db.python.layers.family import FamilyLayer
 from db.python.layers.participant import ParticipantLayer
+from models.models.participant import ParticipantUpsertInternal
 
 
 class TestPedigree(DbIsolatedTest):
@@ -14,10 +13,10 @@ async def test_import_get_pedigree(self):
         """Test import + get pedigree"""
         fl = FamilyLayer(self.connection)
 
-        rows = [
-            ['FAM01', 'EX01_father', '', '', 1, 1],
-            ['FAM01', 'EX01_mother', '', '', 2, 1],
-            ['FAM01', 'EX01_subject', 'EX01_father', 'EX01_mother', 1, 2],
+        rows: list[list[str]] = [
+            ['FAM01', 'EX01_father', '', '', '1', '1'],
+            ['FAM01', 'EX01_mother', '', '', '2', '1'],
+            ['FAM01', 'EX01_subject', 'EX01_father', 'EX01_mother', '1', '2'],
         ]
 
         await fl.import_pedigree(
diff --git a/test/test_sample.py b/test/test_sample.py
index b256a2ae0..e5b8639b7 100644
--- a/test/test_sample.py
+++ b/test/test_sample.py
@@ -1,7 +1,7 @@
 from test.testbase import DbIsolatedTest, run_as_sync
 
-from models.models.sample import SampleUpsertInternal
 from db.python.layers.sample import SampleLayer
+from models.models.sample import SampleUpsertInternal
 
 
 class TestSample(DbIsolatedTest):
@@ -17,7 +17,7 @@ async def setUp(self) -> None:
     @run_as_sync
     async def test_add_sample(self):
         """Test inserting a sample"""
-        s = await self.slayer.upsert_sample(
+        sample = await self.slayer.upsert_sample(
             SampleUpsertInternal(
                 external_id='Test01',
                 type='blood',
@@ -30,8 +30,7 @@ async def test_add_sample(self):
             'SELECT id, type, meta, project FROM sample'
         )
         self.assertEqual(1, len(samples))
-        s = samples[0]
-        self.assertEqual(1, s['id'])
+        self.assertEqual(sample.id, samples[0]['id'])
 
     @run_as_sync
     async def test_get_sample(self):
diff --git a/test/test_search.py b/test/test_search.py
index fa92cf80f..ca6718ec5 100644
--- a/test/test_search.py
+++ b/test/test_search.py
@@ -1,18 +1,25 @@
 from test.testbase import DbIsolatedTest, run_as_sync
 
+from db.python.layers.family import FamilyLayer
 from db.python.layers.participant import ParticipantLayer
 from db.python.layers.sample import SampleLayer
 from db.python.layers.search import SearchLayer
-from db.python.layers.family import FamilyLayer
 from db.python.layers.sequencing_group import SequencingGroupLayer
 from db.python.tables.family_participant import FamilyParticipantTable
-
 from models.enums import SearchResponseType
-from models.models.family import PedRowInternal
-from models.models.sample import sample_id_format, SampleUpsertInternal
-from models.models.participant import ParticipantUpsertInternal
-from models.models.sequencing_group import SequencingGroupUpsertInternal, sequencing_group_id_format
-from models.models.assay import AssayUpsertInternal
+from models.models import (
+    AssayUpsertInternal,
+    FamilySearchResponseData,
+    ParticipantSearchResponseData,
+    ParticipantUpsertInternal,
+    PedRowInternal,
+    SampleSearchResponseData,
+    SampleUpsertInternal,
+    SequencingGroupSearchResponseData,
+    SequencingGroupUpsertInternal,
+)
+from models.models.sample import sample_id_format
+from models.models.sequencing_group import sequencing_group_id_format
 
 
 class TestSample(DbIsolatedTest):
@@ -68,7 +75,11 @@ async def test_search_isolated_sample_by_id(self):
         self.assertEqual(1, len(results))
         self.assertEqual(cpg_id, results[0].title)
         self.assertEqual(cpg_id, results[0].data.id)
-        self.assertListEqual(['EX001'], results[0].data.sample_external_ids)
+
+        result_data = results[0].data
+        self.assertIsInstance(result_data, SampleSearchResponseData)
+        assert isinstance(result_data, SampleSearchResponseData)
+        self.assertListEqual(['EX001'], result_data.sample_external_ids)
 
     @run_as_sync
     async def test_search_isolated_sequencing_group_by_id(self):
@@ -97,19 +108,24 @@ async def test_search_isolated_sequencing_group_by_id(self):
                                 'sequencing_type': 'transcriptome',
                                 'sequencing_technology': 'long-read',
                                 'sequencing_platform': 'illumina',
-                            }
+                            },
                         )
-                    ]
+                    ],
                 )
             ]
         )
         cpg_sg_id = sequencing_group_id_format(sg[0].id)
-        results = await self.schlay.search(query=cpg_sg_id, project_ids=[self.project_id])
+        results = await self.schlay.search(
+            query=cpg_sg_id, project_ids=[self.project_id]
+        )
 
         self.assertEqual(1, len(results))
         self.assertEqual(cpg_sg_id, results[0].title)
-        self.assertEqual(cpg_sg_id, results[0].data.id)
-        self.assertEqual(cpg_sg_id, results[0].data.sg_external_id)
+        result_data = results[0].data
+        assert isinstance(result_data, SequencingGroupSearchResponseData)
+        self.assertIsInstance(result_data, SequencingGroupSearchResponseData)
+        self.assertEqual(cpg_sg_id, result_data.id)
+        self.assertEqual(cpg_sg_id, result_data.sg_external_id)
 
     @run_as_sync
     async def test_search_isolated_sample_by_external_id(self):
@@ -125,12 +141,16 @@ async def test_search_isolated_sample_by_external_id(self):
         cpg_id = sample_id_format(sample.id)
 
         self.assertEqual(1, len(results))
-        result = results[0]
-        self.assertEqual(cpg_id, result.title)
-        self.assertEqual(cpg_id, result.data.id)
-        self.assertListEqual(['EX001'], result.data.sample_external_ids)
-        self.assertListEqual([], result.data.participant_external_ids)
-        self.assertListEqual([], result.data.family_external_ids)
+
+        self.assertEqual(cpg_id, results[0].title)
+        result_data = results[0].data
+
+        self.assertIsInstance(result_data, SampleSearchResponseData)
+        assert isinstance(result_data, SampleSearchResponseData)
+        self.assertEqual(cpg_id, result_data.id)
+        self.assertListEqual(['EX001'], result_data.sample_external_ids)
+        self.assertListEqual([], result_data.participant_external_ids)
+        self.assertListEqual([], result_data.family_external_ids)
 
     @run_as_sync
     async def test_search_participant_isolated(self):
@@ -145,12 +165,13 @@ async def test_search_participant_isolated(self):
             query='PART01', project_ids=[self.project_id]
         )
         self.assertEqual(1, len(results))
-        result = results[0]
-        self.assertEqual(p.id, result.data.id)
-        self.assertEqual('PART01', result.title)
-        self.assertListEqual(['PART01'], result.data.participant_external_ids)
-        self.assertListEqual([], result.data.family_external_ids)
-        self.assertRaises(AttributeError, lambda: result.data.sample_external_ids)
+
+        self.assertEqual('PART01', results[0].title)
+        result_data = results[0].data
+        assert isinstance(result_data, ParticipantSearchResponseData)
+        self.assertEqual(p.id, result_data.id)
+        self.assertListEqual(['PART01'], result_data.participant_external_ids)
+        self.assertListEqual([], result_data.family_external_ids)
 
     @run_as_sync
     async def test_search_family(self):
@@ -164,11 +185,11 @@ async def test_search_family(self):
         )
         self.assertEqual(1, len(results))
         result = results[0]
-        self.assertEqual(f_id, result.data.id)
         self.assertEqual('FAMXX01', result.title)
-        self.assertListEqual(['FAMXX01'], result.data.family_external_ids)
-        self.assertRaises(AttributeError, lambda: result.data.participant_external_ids)
-        self.assertRaises(AttributeError, lambda: result.data.sample_external_ids)
+        result_data = result.data
+        assert isinstance(result_data, FamilySearchResponseData)
+        self.assertEqual(f_id, result_data.id)
+        self.assertListEqual(['FAMXX01'], result_data.family_external_ids)
 
     @run_as_sync
     async def test_search_mixed(self):
@@ -195,7 +216,7 @@ async def test_search_mixed(self):
         sample = await self.slayer.upsert_sample(
             SampleUpsertInternal(
                 external_id='X:SAM001',
-                sample_type='blood',
+                type='blood',
                 participant_id=p.id,
             )
         )
@@ -214,19 +235,26 @@ async def test_search_mixed(self):
         sample_result = next(
             r for r in all_results if r.type == SearchResponseType.SAMPLE
         )
+        family_result_data = family_result.data
+        participant_result_data = participant_result.data
+        sample_result_data = sample_result.data
+
+        assert isinstance(family_result_data, FamilySearchResponseData)
+        assert isinstance(participant_result_data, ParticipantSearchResponseData)
+        assert isinstance(sample_result_data, SampleSearchResponseData)
 
         # linked family matches
         self.assertEqual('X:FAM01', family_result.title)
 
         # linked participant matches
         self.assertEqual('X:PART01', participant_result.title)
-        self.assertListEqual(['X:FAM01'], participant_result.data.family_external_ids)
+        self.assertListEqual(['X:FAM01'], participant_result_data.family_external_ids)
 
         # linked sample matches
         cpg_id = sample_id_format(sample.id)
-        self.assertEqual(cpg_id, sample_result.data.id)
-        self.assertListEqual(['X:SAM001'], sample_result.data.sample_external_ids)
-        self.assertListEqual(['X:FAM01'], participant_result.data.family_external_ids)
+        self.assertEqual(cpg_id, sample_result_data.id)
+        self.assertListEqual(['X:SAM001'], sample_result_data.sample_external_ids)
+        self.assertListEqual(['X:FAM01'], participant_result_data.family_external_ids)
         self.assertListEqual(
-            ['X:PART01'], participant_result.data.participant_external_ids
+            ['X:PART01'], participant_result_data.participant_external_ids
         )
diff --git a/test/test_web.py b/test/test_web.py
index 4e6cb53f6..cc5a0bb63 100644
--- a/test/test_web.py
+++ b/test/test_web.py
@@ -1,27 +1,27 @@
 from test.testbase import DbIsolatedTest, run_as_sync
 
+from db.python.layers import (
+    AssayLayer,
+    ParticipantLayer,
+    SampleLayer,
+    SequencingGroupLayer,
+    WebLayer,
+)
 from models.enums import MetaSearchEntityPrefix
 from models.models import (
+    Assay,
+    AssayInternal,
+    AssayUpsertInternal,
     ParticipantUpsertInternal,
+    ProjectSummaryInternal,
     SampleUpsertInternal,
+    SearchItem,
     SequencingGroupUpsertInternal,
-    AssayUpsertInternal,
-    ProjectSummaryInternal,
-    AssayInternal,
-    Assay,
+    WebProject,
 )
-from models.models import WebProject, SearchItem
 from models.utils.sample_id_format import sample_id_transform_to_raw
 from models.utils.sequencing_group_id_format import sequencing_group_id_transform_to_raw
 
-from db.python.layers import (
-    AssayLayer,
-    SequencingGroupLayer,
-    SampleLayer,
-    ParticipantLayer,
-    WebLayer,
-)
-
 default_assay_meta = {
     'sequencing_type': 'genome',
     'sequencing_technology': 'short-read',
@@ -71,20 +71,20 @@ def get_test_participant():
                                 type='sequencing',
                                 meta={
                                     'reads': [
-                                            {
-                                                'basename': 'sample_id001.filename-R1.fastq.gz',
-                                                'checksum': None,
-                                                'class': 'File',
-                                                'location': '/path/to/sample_id001.filename-R1.fastq.gz',
-                                                'size': 111,
-                                            },
-                                            {
-                                                'basename': 'sample_id001.filename-R2.fastq.gz',
-                                                'checksum': None,
-                                                'class': 'File',
-                                                'location': '/path/to/sample_id001.filename-R2.fastq.gz',
-                                                'size': 111,
-                                            },
+                                        {
+                                            'basename': 'sample_id001.filename-R1.fastq.gz',
+                                            'checksum': None,
+                                            'class': 'File',
+                                            'location': '/path/to/sample_id001.filename-R1.fastq.gz',
+                                            'size': 111,
+                                        },
+                                        {
+                                            'basename': 'sample_id001.filename-R2.fastq.gz',
+                                            'checksum': None,
+                                            'class': 'File',
+                                            'location': '/path/to/sample_id001.filename-R2.fastq.gz',
+                                            'size': 111,
+                                        },
                                     ],
                                     'reads_type': 'fastq',
                                     'batch': 'M001',
@@ -204,9 +204,7 @@ async def test_project_summary_empty(self):
 
         # Expect an empty project
         expected = ProjectSummaryInternal(
-            project=WebProject(
-                **{'id': 1, 'name': 'test', 'meta': {}, 'dataset': 'test'}
-            ),
+            project=WebProject(id=1, name='test', meta={}, dataset='test'),
             total_samples=0,
             total_samples_in_query=0,
             total_participants=0,
@@ -308,9 +306,7 @@ async def project_summary_with_filter_no_results(self):
             ],
         )
         empty_result = ProjectSummaryInternal(
-            project=WebProject(
-                **{'id': 1, 'name': 'test', 'meta': {}, 'dataset': 'test'}
-            ),
+            project=WebProject(id=1, name='test', meta={}, dataset='test'),
             total_samples=0,
             total_samples_in_query=0,
             total_participants=0,
@@ -455,12 +451,10 @@ async def test_field_with_space(self):
             token=0,
             grid_filter=[
                 SearchItem(
-                    **{
-                        'model_type': MetaSearchEntityPrefix.ASSAY,
-                        'query': 'field wi',
-                        'field': 'field with spaces',
-                        'is_meta': True,
-                    }
+                    model_type=MetaSearchEntityPrefix.ASSAY,
+                    query='field wi',
+                    field='field with spaces',
+                    is_meta=True,
                 )
             ],
         )
diff --git a/web/src/pages/project/ProjectGrid.tsx b/web/src/pages/project/ProjectGrid.tsx
index 52dbeaa64..83a9fda74 100644
--- a/web/src/pages/project/ProjectGrid.tsx
+++ b/web/src/pages/project/ProjectGrid.tsx
@@ -372,7 +372,7 @@ const ProjectGrid: React.FunctionComponent<ProjectGridProps> = ({
                                                                 : '1px solid var(--color-border-default)',
                                                         backgroundColor,
                                                     }}
-                                                    key={`${s.id}sequence_group.${k}`}
+                                                    key={`${s.id}sequencing_group.${k}`}
                                                     rowSpan={(seq.assays ?? []).length}
                                                 >
                                                     {k === 'id' ? (

From 78cafc7970ef1adeafd7e4a5005a7f7b7d31e500 Mon Sep 17 00:00:00 2001
From: Michael Franklin <22381693+illusional@users.noreply.github.com>
Date: Mon, 18 Sep 2023 12:27:09 +1000
Subject: [PATCH 2/3] Add participant phenotypes to graphql (#545)

* Add participant phenotypes to graphql

* Add test for graphql phenotypes

* Fix unrelated linting issues

* Slight linting updates

* PR cleanup

---------

Co-authored-by: Michael Franklin <illusional@users.noreply.github.com>
---
 api/graphql/loaders.py          | 38 +++++++++++++++++++++++----------
 api/graphql/schema.py           | 33 ++++++++++++++--------------
 db/python/layers/participant.py | 30 ++++++++++++++++++++++++--
 scripts/parse_ped.py            |  2 +-
 test/test_graphql.py            | 27 +++++++++++++++++++++++
 5 files changed, 99 insertions(+), 31 deletions(-)

diff --git a/api/graphql/loaders.py b/api/graphql/loaders.py
index 2a54fc514..905297009 100644
--- a/api/graphql/loaders.py
+++ b/api/graphql/loaders.py
@@ -13,26 +13,26 @@
 from db.python.connect import NotFoundError
 from db.python.layers import (
     AnalysisLayer,
-    SampleLayer,
     AssayLayer,
+    FamilyLayer,
     ParticipantLayer,
+    SampleLayer,
     SequencingGroupLayer,
-    FamilyLayer,
 )
 from db.python.tables.analysis import AnalysisFilter
 from db.python.tables.assay import AssayFilter
 from db.python.tables.project import ProjectPermissionsTable
 from db.python.tables.sample import SampleFilter
 from db.python.tables.sequencing_group import SequencingGroupFilter
-from db.python.utils import ProjectId, GenericFilter
+from db.python.utils import GenericFilter, ProjectId
 from models.models import (
-    AssayInternal,
-    SampleInternal,
-    SequencingGroupInternal,
     AnalysisInternal,
-    ParticipantInternal,
+    AssayInternal,
     FamilyInternal,
+    ParticipantInternal,
     Project,
+    SampleInternal,
+    SequencingGroupInternal,
 )
 
 
@@ -53,6 +53,8 @@ class LoaderKeys(enum.Enum):
     SAMPLES_FOR_PARTICIPANTS = 'samples_for_participants'
     SAMPLES_FOR_PROJECTS = 'samples_for_projects'
 
+    PHENOTYPES_FOR_PARTICIPANTS = 'phenotypes_for_participants'
+
     PARTICIPANTS_FOR_IDS = 'participants_for_ids'
     PARTICIPANTS_FOR_FAMILIES = 'participants_for_families'
     PARTICIPANTS_FOR_PROJECTS = 'participants_for_projects'
@@ -291,9 +293,7 @@ async def load_participants_for_ids(
     p_by_id = {p.id: p for p in persons}
     missing_pids = set(participant_ids) - set(p_by_id.keys())
     if missing_pids:
-        raise NotFoundError(
-            f'Could not find participants with ids {missing_pids}'
-        )
+        raise NotFoundError(f'Could not find participants with ids {missing_pids}')
     return [p_by_id.get(p) for p in participant_ids]
 
 
@@ -400,7 +400,23 @@ async def load_analyses_for_sequencing_groups(
     return by_sg_id
 
 
-async def get_context(request: Request, connection=get_projectless_db_connection):  # pylint: disable=unused-argument
+@connected_data_loader(LoaderKeys.PHENOTYPES_FOR_PARTICIPANTS)
+async def load_phenotypes_for_participants(
+    participant_ids: list[int], connection
+) -> list[dict]:
+    """
+    Data loader for phenotypes for participants
+    """
+    player = ParticipantLayer(connection)
+    participant_phenotypes = await player.get_phenotypes_for_participants(
+        participant_ids=participant_ids
+    )
+    return [participant_phenotypes.get(pid, {}) for pid in participant_ids]
+
+
+async def get_context(
+    request: Request, connection=get_projectless_db_connection
+):  # pylint: disable=unused-argument
     """Get loaders / cache context for strawberyy GraphQL"""
     mapped_loaders = {k: fn(connection) for k, fn in loaders.items()}
     return {
diff --git a/api/graphql/schema.py b/api/graphql/schema.py
index 8befa8867..7255821ed 100644
--- a/api/graphql/schema.py
+++ b/api/graphql/schema.py
@@ -14,16 +14,10 @@
 from strawberry.fastapi import GraphQLRouter
 from strawberry.types import Info
 
-from api.graphql.filters import (
-    GraphQLFilter,
-    GraphQLMetaFilter,
-)
-from api.graphql.loaders import (
-    get_context,
-    LoaderKeys,
-)
+from api.graphql.filters import GraphQLFilter, GraphQLMetaFilter
+from api.graphql.loaders import LoaderKeys, get_context
 from db.python import enum_tables
-from db.python.layers import AnalysisLayer, SequencingGroupLayer, SampleLayer
+from db.python.layers import AnalysisLayer, SampleLayer, SequencingGroupLayer
 from db.python.layers.assay import AssayLayer
 from db.python.layers.family import FamilyLayer
 from db.python.tables.analysis import AnalysisFilter
@@ -34,21 +28,19 @@
 from db.python.utils import GenericFilter
 from models.enums import AnalysisStatus
 from models.models import (
-    SampleInternal,
-    ParticipantInternal,
-    Project,
     AnalysisInternal,
+    AssayInternal,
     FamilyInternal,
+    ParticipantInternal,
+    Project,
+    SampleInternal,
     SequencingGroupInternal,
-    AssayInternal,
 )
 from models.models.sample import sample_id_transform_to_raw
-from models.utils.sample_id_format import (
-    sample_id_format,
-)
+from models.utils.sample_id_format import sample_id_format
 from models.utils.sequencing_group_id_format import (
-    sequencing_group_id_transform_to_raw,
     sequencing_group_id_format,
+    sequencing_group_id_transform_to_raw,
 )
 
 enum_methods = {}
@@ -336,6 +328,13 @@ async def samples(
         samples = await info.context[LoaderKeys.SAMPLES_FOR_PARTICIPANTS].load(q)
         return [GraphQLSample.from_internal(s) for s in samples]
 
+    @strawberry.field
+    async def phenotypes(
+        self, info: Info, root: 'GraphQLParticipant'
+    ) -> strawberry.scalars.JSON:
+        loader = info.context[LoaderKeys.PHENOTYPES_FOR_PARTICIPANTS]
+        return await loader.load(root.id)
+
     @strawberry.field
     async def families(
         self, info: Info, root: 'GraphQLParticipant'
diff --git a/db/python/layers/participant.py b/db/python/layers/participant.py
index 216427e31..44d6d4db2 100644
--- a/db/python/layers/participant.py
+++ b/db/python/layers/participant.py
@@ -2,9 +2,9 @@
 import re
 from collections import defaultdict
 from enum import Enum
-from typing import Dict, List, Tuple, Optional, Any
+from typing import Any, Dict, List, Optional, Tuple
 
-from db.python.connect import NotFoundError, NoOpAenter
+from db.python.connect import NoOpAenter, NotFoundError
 from db.python.layers.base import BaseLayer
 from db.python.layers.sample import SampleLayer
 from db.python.tables.family import FamilyTable
@@ -335,6 +335,21 @@ async def fill_in_missing_participants(self):
 
         return f'Updated {len(sample_ids_to_update)} records'
 
+    async def insert_participant_phenotypes(
+        self, participant_phenotypes: dict[int, dict]
+    ):
+        """
+        Insert participant phenotypes, with format: {pid: {key: value}}
+        """
+        ppttable = ParticipantPhenotypeTable(self.connection)
+        return await ppttable.add_key_value_rows(
+            [
+                (pid, pk, pv)
+                for pid, phenotypes in participant_phenotypes.items()
+                for pk, pv in phenotypes.items()
+            ]
+        )
+
     async def generic_individual_metadata_importer(
         self,
         headers: List[str],
@@ -653,6 +668,17 @@ async def update_many_participant_external_ids(
 
     # region PHENOTYPES / SEQR
 
+    async def get_phenotypes_for_participants(
+        self, participant_ids: list[int]
+    ) -> dict[int, dict[str, Any]]:
+        """
+        Get phenotypes for participants keyed by by pid
+        """
+        ppttable = ParticipantPhenotypeTable(self.connection)
+        return await ppttable.get_key_value_rows_for_participant_ids(
+            participant_ids=participant_ids
+        )
+
     async def get_seqr_individual_template(
         self,
         project: int,
diff --git a/scripts/parse_ped.py b/scripts/parse_ped.py
index f587e483b..831ddd402 100644
--- a/scripts/parse_ped.py
+++ b/scripts/parse_ped.py
@@ -15,7 +15,7 @@ def main(ped_file_path: str, project: str):
     fapi = FamilyApi()
 
     # pylint: disable=no-member
-    with AnyPath(ped_file_path).open() as ped_file:  # type: ignore
+    with AnyPath(ped_file_path).open() as ped_file:
         fapi.import_pedigree(
             file=ped_file,
             has_header=True,
diff --git a/test/test_graphql.py b/test/test_graphql.py
index c61eed238..a52a5f947 100644
--- a/test/test_graphql.py
+++ b/test/test_graphql.py
@@ -229,3 +229,30 @@ async def test_sg_analyses_query(self):
         self.assertIn('id', analyses[0])
         self.assertIn('meta', analyses[0])
         self.assertIn('output', analyses[0])
+
+    @run_as_sync
+    async def test_participant_phenotypes(self):
+        """
+        Test getting participant phentypes in graphql
+        """
+        # insert participant
+        p = await self.player.upsert_participant(
+            ParticipantUpsertInternal(external_id='Demeter', meta={}, samples=[])
+        )
+
+        phenotypes = {'phenotype1': 'value1', 'phenotype2': {'number': 123}}
+        # insert participant_phenotypes
+        await self.player.insert_participant_phenotypes({p.id: phenotypes})
+
+        q = """
+query MyQuery($pid: Int!) {
+  participant(id: $pid) {
+    phenotypes
+  }
+}"""
+
+        resp = await self.run_graphql_query_async(q, {'pid': p.id})
+
+        self.assertIn('participant', resp)
+        self.assertIn('phenotypes', resp['participant'])
+        self.assertDictEqual(phenotypes, resp['participant']['phenotypes'])

From f128b70ca71d33537c9d0922f196901dc3f23d2f Mon Sep 17 00:00:00 2001
From: John Marshall <john.marshall@populationgenomics.org.au>
Date: Mon, 18 Sep 2023 15:33:57 +1200
Subject: [PATCH 3/3] Use current GitHub Actions versions (and avoid
 ::set-output) (#520)

Prevent "The following action(s) uses node12 which is deprecated"
warnings by updating to the current releases of the Actions used.
(Node.js 12 is scheduled for removal from Actions runners next month.)
Use the current releases instead of `@main` too, as we don't want to
be affected by bleeding-edge bugs.

Rewrite ::set-output as a write to $GITHUB_OUTPUT instead;
the ::save-state and ::set-output commands are also deprecated.
---
 .github/workflows/deploy.yaml |  8 ++++----
 .github/workflows/lint.yaml   |  4 ++--
 .github/workflows/test.yaml   | 12 ++++++------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index ca5c077f1..997b1129d 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -21,10 +21,10 @@ jobs:
       run:
         shell: bash -eo pipefail -l {0}
     steps:
-      - uses: actions/checkout@main
+      - uses: actions/checkout@v3
 
       - name: "gcloud setup"
-        uses: google-github-actions/setup-gcloud@v0
+        uses: google-github-actions/setup-gcloud@v1
         with:
           project_id: sample-metadata
           service_account_key: ${{ secrets.GCP_SERVER_DEPLOY_KEY }}
@@ -33,11 +33,11 @@ jobs:
         run: |
           gcloud auth configure-docker australia-southeast1-docker.pkg.dev
 
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v4
         with:
           python-version: "3.10"
 
-      - uses: actions/setup-java@v2
+      - uses: actions/setup-java@v3
         with:
           distribution: "temurin" # See 'Supported distributions' for available options
           java-version: "17"
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index 68f65dc86..71c330c5a 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -14,9 +14,9 @@ jobs:
       run:
         shell: bash -eo pipefail -l {0}
     steps:
-      - uses: actions/checkout@main
+      - uses: actions/checkout@v3
 
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v4
         with:
           python-version: "3.10"
 
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index aaffc014b..e15e52f8e 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,13 +16,13 @@ jobs:
       run:
         shell: bash -eo pipefail -l {0}
     steps:
-      - uses: actions/checkout@main
+      - uses: actions/checkout@v3
 
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v4
         with:
           python-version: "3.10"
 
-      - uses: actions/setup-java@v2
+      - uses: actions/setup-java@v3
         with:
           distribution: "temurin" # See 'Supported distributions' for available options
           java-version: "17"
@@ -70,10 +70,10 @@ jobs:
           rc=$?
           coverage xml
 
-          echo "::set-output name=rc::$rc"
+          echo "rc=$rc" >> $GITHUB_OUTPUT
 
       - name: "Upload coverage report"
-        uses: codecov/codecov-action@v2
+        uses: codecov/codecov-action@v3
         with:
           files: ./coverage.xml
 
@@ -89,7 +89,7 @@ jobs:
 
       - name: Fail if tests are not passing
         if: ${{ steps.runtests.outputs.rc != 0 }}
-        uses: actions/github-script@v3
+        uses: actions/github-script@v6
         with:
           script: |
             core.setFailed('Unit tests failed with rc = ${{ steps.runtests.outputs.rc }}')