From f177019d9c3927419b016b6a6831e2ff2f3c9afd Mon Sep 17 00:00:00 2001 From: Vivian Bakiris <79084890+vivbak@users.noreply.github.com> Date: Thu, 13 Jun 2024 09:14:01 +1000 Subject: [PATCH 1/3] Fix mismatch between epid and esid (#815) --- scripts/create_test_subset.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py index 250abf284..e4fb476fe 100755 --- a/scripts/create_test_subset.py +++ b/scripts/create_test_subset.py @@ -278,7 +278,7 @@ def transfer_samples_sgs_assays( sample_to_sg_attribute_map: dict[Tuple[str, str], SequencingGroupAttributes] = {} old_sid_to_new_sid: dict[str, str] = {} for s in samples: - exid_old_sid = (s['participant']['externalId'], s['id']) + exid_old_sid = (s['externalId'], s['id']) if exid_old_sid not in sample_to_sg_attribute_map: sample_to_sg_attribute_map[exid_old_sid] = {} for sg in s['sequencingGroups']: @@ -384,7 +384,7 @@ def get_new_sg_id( sid (str): The sample id to search for. new_sg_attributes (tuple[str, str, str]): The attributes of the sequencing group to search for. old_sid_to_new_sid (dict[str, str]): A map from old sample ids to new sample ids. - sample_to_sg_attribute_map (dict[tuple, dict[tuple, str]]): A map from (peid, sid) keys to a map of sequencing group attribute keys to old sequencing group ids. + sample_to_sg_attribute_map (dict[tuple, dict[tuple, str]]): A map from (seid, sid) keys to a map of sequencing group attribute keys to old sequencing group ids. new_sg_data (dict[dict, Any]): The data containing the new samples and their sequencing groups. @@ -415,14 +415,14 @@ def get_new_sg_id( new_sample['id'] == old_sid_to_new_sid[old_sid] ): # If new sample maps to old sample for new_sg in new_sample['sequencingGroups']: - peid_sid_key = (new_sample['externalId'], old_sid) + seid_sid_key = (new_sample['externalId'], old_sid) sg_attribute_key = ( new_sg['type'], new_sg['platform'], new_sg['technology'], ) if ( - sample_to_sg_attribute_map[peid_sid_key].get(new_sg_attributes) + sample_to_sg_attribute_map[seid_sid_key].get(new_sg_attributes) and new_sg_attributes == sg_attribute_key ): new_sg_id = new_sg['id'] From c74851381a6de03ec8e9b9a9764bedca7899f4e7 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Thu, 13 Jun 2024 18:43:34 +1200 Subject: [PATCH 2/3] Implement multiple external IDs for participants and samples (#659) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Create participant_external_id and sample_external_id database tables * Pluralise external_ids in participant and sample models * Query *_external_id tables and implement upserting into them Change these simple sample/participant table SELECTs to also join the *_external_id table into the query. Update comments of SampleLayer etc methods that call through to these to indicate that they can look up samples via this table too. In WebDb._project_summary_sample_query(), we use a hack to divert external_id queries to the appropriate peid/seid *_external_id table. Fix documentation typo in SampleTable.get_sample_by_id(). Add from_db_json() utility, the opposite of to_db_json(). Add PRIMARY_EXTERNAL_ORG constant to clarify primary org name usage. Decode JSON-encoded external_ids fields in SampleInternal.from_db() and ParticipantInternal.from_db() routines. * Use the primary external_id for GraphQLParticipant/GraphQLSample [TODO] In future we will want to represent the extra external_ids in GraphQLParticipant/GraphQLSample too. But this stopgap lets us defer that until after the tests/ directory is adjusted. * Use the primary external_id for ParsedParticipant/ParsedSample et al [TODO] In future we will want to represent the extra external_ids in ParsedParticipant/ParsedSample too. But this stopgap lets us defer that until after the tests/ directory is adjusted. [TODO] These would prefer metamist.models to export PRIMARY_EXTERNAL_ORG (or from somewhere similar) so they didn't need to define it themselves. * Adjust tests to account for s/external_id/external_ids/g test_sample.py: Add a test exercising get_sample_id_map_by_external_ids(). test_upsert.py: Update direct SQL queries to use the new tables and to ensure that their rows are returned in the expected order. * Add tests exercising multiple external_id functionality * Remove obsolete participant/sample table external_id constraints But first we need to create indexes for the correspondingly named foreign keys, which previously re-used these constraints' composite indexes. This avoids "Cannot drop index needed in a foreign key constraint" errors. * Bodge SampleLayer.get_history_of_sample() Attempting to update the history query interfered with the intended behaviour of audit_log_ids. Revert the changes to propagate audit_log_id within untouched sample_external_id rows, and revert to the previous sample-table-only get_history_of_sample() with limitations: for now, it doesn't return external_ids or audit_log_ids. * Update db/project.xml per review Make external_ids unique per project regardless of external org (name) and adjust tests accordingly. Make the new tables' audit_log_id field non-NULL, and cons up a new audit_log entry for the external_id migrations to refer to. Drop the old external_id fields' not-NULL constraint before clearing their values -- previously omitted as this was tested only on empty tables! This is in a separate changeSet as system_versioning_alter_history=1 is needed, and we'd prefer not to set that during the main changeSet. * For now, create_test_subset.py copies only the primary external_id [TODO] In future we will represent the extra external_ids in GraphQLSample too and be able to copy all of them. * Bump version: 7.0.3 → 7.1.0 --- .bumpversion.cfg | 2 +- api/graphql/schema.py | 5 +- api/server.py | 2 +- db/project.xml | 139 ++++++++++ db/python/connect.py | 2 + db/python/layers/family.py | 4 +- db/python/layers/participant.py | 16 +- db/python/layers/sample.py | 8 +- db/python/layers/search.py | 2 +- db/python/layers/web.py | 21 +- db/python/tables/participant.py | 186 +++++++++---- db/python/tables/project.py | 2 + db/python/tables/sample.py | 174 +++++++++--- db/python/utils.py | 5 + deploy/python/version.txt | 2 +- metamist/parser/generic_parser.py | 6 +- models/base.py | 5 + models/models/__init__.py | 2 +- models/models/participant.py | 27 +- models/models/sample.py | 25 +- scripts/create_test_subset.py | 4 +- setup.py | 2 +- test/data/generate_data.py | 4 +- test/data/generate_ourdna_data.py | 14 +- test/data/generate_seqr_project_data.py | 4 +- test/test_analysis.py | 3 +- test/test_assay.py | 11 +- test/test_audit_log.py | 4 +- test/test_cohort.py | 8 +- test/test_external_id.py | 349 ++++++++++++++++++++++++ test/test_get_participants.py | 15 +- test/test_graphql.py | 11 +- test/test_import_individual_metadata.py | 8 +- test/test_models.py | 17 +- test/test_ourdna_dashboard.py | 13 +- test/test_parse_existing_cohort.py | 10 +- test/test_parse_generic_metadata.py | 5 +- test/test_parse_ont_sheet.py | 10 +- test/test_pedigree.py | 6 +- test/test_sample.py | 17 +- test/test_search.py | 15 +- test/test_sequencing_groups.py | 3 +- test/test_update_participant_family.py | 8 +- test/test_upsert.py | 44 ++- test/test_web.py | 9 +- web/package.json | 2 +- 46 files changed, 990 insertions(+), 241 deletions(-) create mode 100644 test/test_external_id.py diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 6732d7c89..73c00e4b3 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 7.0.4 +current_version = 7.1.0 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P[A-z0-9-]+) diff --git a/api/graphql/schema.py b/api/graphql/schema.py index 16c3f4744..0fcbe90fb 100644 --- a/api/graphql/schema.py +++ b/api/graphql/schema.py @@ -41,6 +41,7 @@ from db.python.utils import GenericFilter from models.enums import AnalysisStatus from models.models import ( + PRIMARY_EXTERNAL_ORG, AnalysisInternal, AssayInternal, AuditLogInternal, @@ -636,7 +637,7 @@ class GraphQLParticipant: def from_internal(internal: ParticipantInternal) -> 'GraphQLParticipant': return GraphQLParticipant( id=internal.id, - external_id=internal.external_id, + external_id=internal.external_ids[PRIMARY_EXTERNAL_ORG], meta=internal.meta, reported_sex=internal.reported_sex, reported_gender=internal.reported_gender, @@ -725,7 +726,7 @@ class GraphQLSample: def from_internal(sample: SampleInternal) -> 'GraphQLSample': return GraphQLSample( id=sample_id_format(sample.id), - external_id=sample.external_id, + external_id=sample.external_ids[PRIMARY_EXTERNAL_ORG], active=sample.active, meta=sample.meta, type=sample.type, diff --git a/api/server.py b/api/server.py index b5f1834c3..82b8d32fd 100644 --- a/api/server.py +++ b/api/server.py @@ -20,7 +20,7 @@ from db.python.utils import get_logger # This tag is automatically updated by bump2version -_VERSION = '7.0.4' +_VERSION = '7.1.0' logger = get_logger() diff --git a/db/project.xml b/db/project.xml index f9550b803..44000c615 100644 --- a/db/project.xml +++ b/db/project.xml @@ -1281,4 +1281,143 @@ ALTER TABLE cohort_sequencing_group ADD SYSTEM VERSIONING; ALTER TABLE analysis_cohort ADD SYSTEM VERSIONING; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ALTER TABLE participant_external_id ADD SYSTEM VERSIONING; + ALTER TABLE sample_external_id ADD SYSTEM VERSIONING; + + + INSERT INTO audit_log (author, on_behalf_of, ar_guid, comment, auth_project, meta) + VALUES ('liquibase', NULL, NULL, 'participant/sample external_id migration', NULL, NULL) + RETURNING @audit_log_id := id; + + INSERT INTO participant_external_id (project, participant_id, name, external_id, audit_log_id) + SELECT project, id, '', external_id, @audit_log_id + FROM participant; + + INSERT INTO sample_external_id (project, sample_id, name, external_id, audit_log_id) + SELECT project, id, '', external_id, @audit_log_id + FROM sample; + + + + SET @@system_versioning_alter_history = 1; + + + UPDATE participant SET external_id = NULL + + + + UPDATE sample SET external_id = NULL + + diff --git a/db/python/connect.py b/db/python/connect.py index 7bedf571e..5456c511f 100644 --- a/db/python/connect.py +++ b/db/python/connect.py @@ -29,6 +29,8 @@ 'analysis_sequencing_group', 'analysis_sample', 'assay_external_id', + 'participant_external_id', + 'sample_external_id', 'sequencing_group_external_id', 'family', 'family_participant', diff --git a/db/python/layers/family.py b/db/python/layers/family.py index d861f9d32..183921b27 100644 --- a/db/python/layers/family.py +++ b/db/python/layers/family.py @@ -12,9 +12,9 @@ from db.python.tables.participant import ParticipantTable from db.python.tables.sample import SampleTable from db.python.utils import GenericFilter, NotFoundError +from models.models import PRIMARY_EXTERNAL_ORG, ProjectId from models.models.family import FamilyInternal, PedRow, PedRowInternal from models.models.participant import ParticipantUpsertInternal -from models.models.project import ProjectId class FamilyLayer(BaseLayer): @@ -299,7 +299,7 @@ async def import_pedigree( continue upserted_participant = await participant_table.upsert_participant( ParticipantUpsertInternal( - external_id=row.individual_id, + external_ids={PRIMARY_EXTERNAL_ORG: row.individual_id}, reported_sex=row.sex, ) ) diff --git a/db/python/layers/participant.py b/db/python/layers/participant.py index 6b1bffd71..ff82ee0c4 100644 --- a/db/python/layers/participant.py +++ b/db/python/layers/participant.py @@ -20,9 +20,9 @@ NotFoundError, split_generic_terms, ) +from models.models import PRIMARY_EXTERNAL_ORG, ProjectId from models.models.family import PedRowInternal from models.models.participant import ParticipantInternal, ParticipantUpsertInternal -from models.models.project import ProjectId HPO_REGEX_MATCHER = re.compile(r'HP\:\d+$') @@ -250,6 +250,7 @@ async def get_participants_by_ids( ) -> list[ParticipantInternal]: """ Get participants by IDs + (note that the list returned need not be ordered as per the pids argument) """ projects, participants = await self.pttable.get_participants_by_ids(pids) @@ -301,7 +302,10 @@ async def fill_in_missing_participants(self): ) ) external_sample_map_with_no_pid = { - sample.external_id: sample.id for sample in samples_with_no_pid + sample.external_ids[PRIMARY_EXTERNAL_ORG]: sample.id for sample in samples_with_no_pid + } + external_ids_by_primary = { + sample.external_ids[PRIMARY_EXTERNAL_ORG]: sample.external_ids for sample in samples_with_no_pid } ext_sample_id_to_pid = {} @@ -328,7 +332,7 @@ async def fill_in_missing_participants(self): for external_id in external_participant_ids_to_add: sample_id = external_sample_map_with_no_pid[external_id] participant_id = await self.pttable.create_participant( - external_id=external_id, + external_ids=external_ids_by_primary[external_id], reported_sex=None, reported_gender=None, karyotype=None, @@ -410,7 +414,7 @@ async def generic_individual_metadata_importer( ) for ex_pid in missing_participant_eids: external_pid_map[ex_pid] = await self.pttable.create_participant( - external_id=ex_pid, + external_ids={PRIMARY_EXTERNAL_ORG: ex_pid}, reported_sex=None, reported_gender=None, karyotype=None, @@ -605,7 +609,7 @@ async def upsert_participant( ) await self.pttable.update_participant( participant_id=participant.id, - external_id=participant.external_id, + external_ids=participant.external_ids, reported_sex=participant.reported_sex, reported_gender=participant.reported_gender, meta=participant.meta, @@ -614,7 +618,7 @@ async def upsert_participant( else: participant.id = await self.pttable.create_participant( - external_id=participant.external_id, + external_ids=participant.external_ids, reported_sex=participant.reported_sex, reported_gender=participant.reported_gender, karyotype=participant.karyotype, diff --git a/db/python/layers/sample.py b/db/python/layers/sample.py index 45858db1f..c3bf342d6 100644 --- a/db/python/layers/sample.py +++ b/db/python/layers/sample.py @@ -91,7 +91,7 @@ async def get_sample_by_id( async def get_single_by_external_id( self, external_id, project: ProjectId, check_active=True ) -> SampleInternal: - """Get a Sample by its external_id""" + """Get a Sample by (any of) its external_id(s)""" return await self.st.get_single_by_external_id( external_id, project, check_active=check_active ) @@ -102,7 +102,7 @@ async def get_sample_id_map_by_external_ids( project: ProjectId = None, allow_missing=False, ) -> dict[str, int]: - """Get map of samples {external_id: internal_id}""" + """Get map of samples {(any) external_id: internal_id}""" external_ids_set = set(external_ids) _project = project or self.connection.project assert _project @@ -231,7 +231,7 @@ async def upsert_sample( async with with_function(): if not sample.id: sample.id = await self.st.insert_sample( - external_id=sample.external_id, + external_ids=sample.external_ids, sample_type=sample.type, active=True, meta=sample.meta, @@ -242,7 +242,7 @@ async def upsert_sample( # Otherwise update await self.st.update_sample( id_=sample.id, # type: ignore - external_id=sample.external_id, + external_ids=sample.external_ids, meta=sample.meta, participant_id=sample.participant_id, type_=sample.type, diff --git a/db/python/layers/search.py b/db/python/layers/search.py index 98187250d..36f5f1a5c 100644 --- a/db/python/layers/search.py +++ b/db/python/layers/search.py @@ -66,7 +66,7 @@ async def _get_search_result_for_sample( except NotFoundError: return None - sample_eids = [sample.external_id] + sample_eids = list(sample.external_ids.values()) participant_id = int(sample.participant_id) if sample.participant_id else None participant_eids: list[str] = [] family_eids: list[str] = [] diff --git a/db/python/layers/web.py b/db/python/layers/web.py index 05be7d240..4366e52ed 100644 --- a/db/python/layers/web.py +++ b/db/python/layers/web.py @@ -71,6 +71,8 @@ def _project_summary_sample_query(self, grid_filter: list[SearchItem]): # protect against SQL injection attacks raise ValueError('Invalid characters in field') if not query.is_meta: + if field == 'external_id': + prefix += 'eid' # this field is in its own table q = f'{prefix}.{field} LIKE :{key}' else: # double double quote field to allow white space @@ -84,8 +86,10 @@ def _project_summary_sample_query(self, grid_filter: list[SearchItem]): # the query to determine the total count, then take the selection of samples # for the current page. This is more efficient than doing 2 queries separately. sample_query = f""" - SELECT s.id, s.external_id, s.type, s.meta, s.participant_id, s.active + SELECT s.id, JSON_OBJECTAGG(seid.name, seid.external_id) AS external_ids, + s.type, s.meta, s.participant_id, s.active FROM sample s + LEFT JOIN sample_external_id seid ON s.id = seid.sample_id LEFT JOIN assay a ON s.id = a.sample_id LEFT JOIN participant p ON p.id = s.participant_id LEFT JOIN family_participant fp on s.participant_id = fp.participant_id @@ -185,7 +189,7 @@ def _project_summary_process_sample_rows( smodels = [ NestedSampleInternal( id=s['id'], - external_id=s['external_id'], + external_ids=json.loads(s['external_ids']), type=s['type'], meta=json.loads(s['meta']) or {}, created_date=str(sample_id_start_times.get(s['id'], '')), @@ -337,9 +341,12 @@ async def get_project_summary( # participant p_query = """ - SELECT id, external_id, meta, reported_sex, reported_gender, karyotype - FROM participant - WHERE id in :pids + SELECT p.id, JSON_OBJECTAGG(peid.name, peid.external_id) AS external_ids, + p.meta, p.reported_sex, p.reported_gender, p.karyotype + FROM participant p + INNER JOIN participant_external_id peid ON p.id = peid.participant_id + WHERE p.id in :pids + GROUP BY p.id """ participant_promise = self.connection.fetch_all(p_query, {'pids': pids}) @@ -427,7 +434,7 @@ async def get_project_summary( pmodels.append( NestedParticipantInternal( id=None, - external_id=None, + external_ids=None, meta=None, families=[], samples=[s], @@ -443,7 +450,7 @@ async def get_project_summary( pmodels.append( NestedParticipantInternal( id=p['id'], - external_id=p['external_id'], + external_ids=json.loads(p['external_ids']), meta=json.loads(p['meta']), families=pid_to_families.get(p['id'], []), samples=list(smodels_by_pid.get(p['id'])), diff --git a/db/python/tables/participant.py b/db/python/tables/participant.py index 60d6c7111..cde5ed0e7 100644 --- a/db/python/tables/participant.py +++ b/db/python/tables/participant.py @@ -2,9 +2,8 @@ from typing import Any from db.python.tables.base import DbBase -from db.python.utils import NotFoundError, escape_like_term, to_db_json -from models.models.participant import ParticipantInternal -from models.models.project import ProjectId +from db.python.utils import NotFoundError, escape_like_term, from_db_json, to_db_json +from models.models import PRIMARY_EXTERNAL_ORG, ParticipantInternal, ProjectId class ParticipantTable(DbBase): @@ -14,14 +13,14 @@ class ParticipantTable(DbBase): keys_str = ', '.join( [ - 'id', - 'external_id', - 'reported_sex', - 'reported_gender', - 'karyotype', - 'meta', - 'project', - 'audit_log_id', + 'p.id', + 'JSON_OBJECTAGG(peid.name, peid.external_id) AS external_ids', + 'p.reported_sex', + 'p.reported_gender', + 'p.karyotype', + 'p.meta', + 'p.project', + 'p.audit_log_id', ] ) @@ -45,8 +44,11 @@ async def get_participants_by_ids( """Get participants by IDs""" _query = f""" SELECT {self.keys_str} - FROM participant - WHERE id in :ids""" + FROM participant p + INNER JOIN participant_external_id peid ON p.id = peid.participant_id + WHERE p.id IN :ids + GROUP BY p.id + """ rows = await self.connection.fetch_all(_query, {'ids': ids}) ds = [dict(r) for r in rows] projects = set(d.get('project') for d in ds) @@ -60,19 +62,23 @@ async def get_participants( """ values: dict[str, Any] = {'project': project} if internal_participant_ids: - _query = f""" - SELECT {self.keys_str} - FROM participant - WHERE project = :project AND id in :ids""" + wheres = 'p.project = :project AND p.id IN :ids' values['ids'] = internal_participant_ids else: - _query = f'SELECT {self.keys_str} FROM participant WHERE project = :project' + wheres = 'p.project = :project' + _query = f""" + SELECT {self.keys_str} + FROM participant p + INNER JOIN participant_external_id peid ON p.id = peid.participant_id + WHERE {wheres} + GROUP BY p.id + """ rows = await self.connection.fetch_all(_query, values) return [ParticipantInternal.from_db(dict(r)) for r in rows] async def create_participant( self, - external_id: str, + external_ids: dict[str, str], reported_sex: int | None, reported_gender: str | None, karyotype: str | None, @@ -85,27 +91,49 @@ async def create_participant( if not (project or self.project): raise ValueError('Must provide project to create participant') + if not external_ids or external_ids.get(PRIMARY_EXTERNAL_ORG, None) is None: + raise ValueError('Participant must have primary external_id') + + audit_log_id = await self.audit_log_id() + _query = """ INSERT INTO participant - (external_id, reported_sex, reported_gender, karyotype, meta, audit_log_id, project) + (reported_sex, reported_gender, karyotype, meta, audit_log_id, project) VALUES - (:external_id, :reported_sex, :reported_gender, :karyotype, :meta, :audit_log_id, :project) + (:reported_sex, :reported_gender, :karyotype, :meta, :audit_log_id, :project) RETURNING id """ - return await self.connection.fetch_val( + new_id = await self.connection.fetch_val( _query, { - 'external_id': external_id, 'reported_sex': reported_sex, 'reported_gender': reported_gender, 'karyotype': karyotype, 'meta': to_db_json(meta or {}), - 'audit_log_id': await self.audit_log_id(), + 'audit_log_id': audit_log_id, 'project': project or self.project, }, ) + _eid_query = """ + INSERT INTO participant_external_id (project, participant_id, name, external_id, audit_log_id) + VALUES (:project, :pid, :name, :external_id, :audit_log_id) + """ + _eid_values = [ + { + 'project': project or self.project, + 'pid': new_id, + 'name': name.lower(), + 'external_id': eid, + 'audit_log_id': audit_log_id, + } + for name, eid in external_ids.items() if eid is not None + ] + await self.connection.execute_many(_eid_query, _eid_values) + + return new_id + async def update_participants( self, participant_ids: list[int], @@ -151,7 +179,7 @@ async def update_participants( async def update_participant( self, participant_id: int, - external_id: str | None, + external_ids: dict[str, str | None] | None, reported_sex: int | None, reported_gender: str | None, karyotype: str | None, @@ -160,12 +188,56 @@ async def update_participant( """ Update participant """ + audit_log_id = await self.audit_log_id() updaters = ['audit_log_id = :audit_log_id'] - fields = {'pid': participant_id, 'audit_log_id': await self.audit_log_id()} + fields = {'pid': participant_id, 'audit_log_id': audit_log_id} + + if external_ids: + to_delete = [k.lower() for k, v in external_ids.items() if v is None] + to_update = {k.lower(): v for k, v in external_ids.items() if v is not None} + + if PRIMARY_EXTERNAL_ORG in to_delete: + raise ValueError("Can't remove participant's primary external_id") + + if to_delete: + # Set audit_log_id to this transaction before deleting the rows + _audit_update_query = """ + UPDATE participant_external_id + SET audit_log_id = :audit_log_id + WHERE participant_id = :pid AND name IN :names + """ + await self.connection.execute( + _audit_update_query, + {'pid': participant_id, 'names': to_delete, 'audit_log_id': audit_log_id}, + ) - if external_id: - updaters.append('external_id = :external_id') - fields['external_id'] = external_id + _delete_query = """ + DELETE FROM participant_external_id + WHERE participant_id = :pid AND name IN :names + """ + await self.connection.execute(_delete_query, {'pid': participant_id, 'names': to_delete}) + + if to_update: + _query = 'SELECT project FROM participant WHERE id = :pid' + row = await self.connection.fetch_one(_query, {'pid': participant_id}) + project = row['project'] + + _update_query = """ + INSERT INTO participant_external_id (project, participant_id, name, external_id, audit_log_id) + VALUES (:project, :pid, :name, :external_id, :audit_log_id) + ON DUPLICATE KEY UPDATE external_id = :external_id, audit_log_id = :audit_log_id + """ + _eid_values = [ + { + 'project': project, + 'pid': participant_id, + 'name': name, + 'external_id': eid, + 'audit_log_id': audit_log_id, + } + for name, eid in to_update.items() + ] + await self.connection.execute_many(_update_query, _eid_values) if reported_sex: updaters.append('reported_sex = :reported_sex') @@ -204,9 +276,10 @@ async def get_id_map_by_external_ids( return {} _query = """ - SELECT external_id, id - FROM participant - WHERE external_id in :external_ids AND project = :project""" + SELECT external_id, participant_id AS id + FROM participant_external_id + WHERE external_id IN :external_ids AND project = :project + """ results = await self.connection.fetch_all( _query, { @@ -221,13 +294,17 @@ async def get_id_map_by_external_ids( async def get_id_map_by_internal_ids( self, internal_participant_ids: list[int], allow_missing=False ) -> dict[int, str]: - """Get map of {internal_id: external_participant_id}""" + """Get map of {internal_id: primary_external_participant_id}""" if len(internal_participant_ids) == 0: return {} - _query = 'SELECT id, external_id FROM participant WHERE id in :ids' + _query = """ + SELECT participant_id AS id, external_id + FROM participant_external_id + WHERE participant_id IN :ids AND name = :PRIMARY_EXTERNAL_ORG + """ results = await self.connection.fetch_all( - _query, {'ids': internal_participant_ids} + _query, {'ids': internal_participant_ids, 'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG} ) id_map: dict[int, str] = {r['id']: r['external_id'] for r in results} @@ -249,11 +326,13 @@ async def get_participants_by_families( self, family_ids: list[int] ) -> tuple[set[ProjectId], dict[int, list[ParticipantInternal]]]: """Get list of participants keyed by families, duplicates results""" - _query = """ - SELECT project, fp.family_id, p.id, p.external_id, p.reported_sex, p.reported_gender, p.karyotype, p.meta + _query = f""" + SELECT fp.family_id, {self.keys_str} FROM participant p INNER JOIN family_participant fp ON fp.participant_id = p.id + INNER JOIN participant_external_id peid ON p.id = peid.participant_id WHERE fp.family_id IN :fids + GROUP BY p.id """ rows = await self.connection.fetch_all(_query, {'fids': family_ids}) retmap = defaultdict(list) @@ -269,14 +348,20 @@ async def get_participants_by_families( async def update_many_participant_external_ids( self, internal_to_external_id: dict[int, str] ): - """Update many participant external_ids through the {internal: external} map""" + """Update many participant primary external_ids through the {internal: external} map""" _query = """ - UPDATE participant + UPDATE participant_external_id SET external_id = :external_id, audit_log_id = :audit_log_id - WHERE id = :participant_id""" + WHERE participant_id = :participant_id AND name = :PRIMARY_EXTERNAL_ORG + """ audit_log_id = await self.audit_log_id() mapped_values = [ - {'participant_id': k, 'external_id': v, 'audit_log_id': audit_log_id} + { + 'participant_id': k, + 'external_id': v, + 'audit_log_id': audit_log_id, + 'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG, + } for k, v in internal_to_external_id.items() ] await self.connection.execute_many(_query, mapped_values) @@ -286,15 +371,19 @@ async def get_external_ids_by_participant( self, participant_ids: list[int] ) -> dict[int, list[str]]: """ - Get list of external IDs for a participant, - This method signature is partially future-proofed for multiple-external IDs + Get lists of external IDs per participant """ if not participant_ids: return {} - _query = 'SELECT id, external_id FROM participant WHERE id in :pids' + _query = """ + SELECT participant_id AS id, JSON_ARRAYAGG(external_id) AS external_ids + FROM participant_external_id + WHERE participant_id IN :pids + GROUP BY participant_id + """ rows = await self.connection.fetch_all(_query, {'pids': participant_ids}) - return {r['id']: [r['external_id']] for r in rows} + return {r['id']: from_db_json(r['external_ids']) for r in rows} async def get_external_participant_id_to_internal_sequencing_group_id_map( self, project: ProjectId, sequencing_type: str | None = None @@ -313,8 +402,9 @@ async def get_external_participant_id_to_internal_sequencing_group_id_map( values['sequencing_type'] = sequencing_type _query = f""" -SELECT p.external_id, sg.id +SELECT peid.external_id, sg.id FROM participant p +INNER JOIN participant_external_id peid ON p.id = peid.participant_id INNER JOIN sample s ON p.id = s.participant_id INNER JOIN sequencing_group sg ON sg.sample_id = s.id WHERE {' AND '.join(wheres)} @@ -329,8 +419,8 @@ async def search( Search by some term, return [ProjectId, ParticipantId, ExternalId] """ _query = """ - SELECT project, id, external_id - FROM participant + SELECT project, participant_id AS id, external_id + FROM participant_external_id WHERE project in :project_ids AND external_id LIKE :search_pattern LIMIT :limit """ diff --git a/db/python/tables/project.py b/db/python/tables/project.py index 4a8854a3d..b08305044 100644 --- a/db/python/tables/project.py +++ b/db/python/tables/project.py @@ -434,6 +434,8 @@ async def delete_project_data( ); DELETE FROM family WHERE project = :project; DELETE FROM sequencing_group_external_id WHERE project = :project; +DELETE FROM sample_external_id WHERE project = :project; +DELETE FROM participant_external_id WHERE project = :project; DELETE FROM assay_external_id WHERE project = :project; DELETE FROM sequencing_group_assay WHERE sequencing_group_id IN ( SELECT sg.id FROM sequencing_group sg diff --git a/db/python/tables/sample.py b/db/python/tables/sample.py index 10639d914..0ce68f76c 100644 --- a/db/python/tables/sample.py +++ b/db/python/tables/sample.py @@ -12,7 +12,7 @@ escape_like_term, to_db_json, ) -from models.models.project import ProjectId +from models.models import PRIMARY_EXTERNAL_ORG, ProjectId from models.models.sample import SampleInternal, sample_id_format @@ -42,13 +42,13 @@ class SampleTable(DbBase): table_name = 'sample' common_get_keys = [ - 'id', - 'external_id', - 'participant_id', - 'meta', - 'active', - 'type', - 'project', + 's.id', + 'JSON_OBJECTAGG(seid.name, seid.external_id) AS external_ids', + 's.participant_id', + 's.meta', + 's.active', + 's.type', + 's.project', ] # region GETS @@ -66,14 +66,25 @@ async def query( self, filter_: SampleFilter ) -> tuple[set[ProjectId], list[SampleInternal]]: """Query samples""" - wheres, values = filter_.to_sql(field_overrides={}) + _sql_overrides = { + 'id': 's.id', + 'external_id': 'seid.external_id', + 'participant_id': 's.participant_id', + 'meta': 's.meta', + 'active': 's.active', + 'type': 's.type', + 'project': 's.project', + } + wheres, values = filter_.to_sql(_sql_overrides) if not wheres: raise ValueError(f'Invalid filter: {filter_}') common_get_keys_str = ', '.join(self.common_get_keys) _query = f""" SELECT {common_get_keys_str} - FROM sample + FROM sample s + INNER JOIN sample_external_id seid ON s.id = seid.sample_id WHERE {wheres} + GROUP BY s.id """ rows = await self.connection.fetch_all(_query, values) samples = [SampleInternal.from_db(dict(r)) for r in rows] @@ -83,7 +94,7 @@ async def query( async def get_sample_by_id( self, internal_id: int ) -> tuple[ProjectId, SampleInternal]: - """Get a Sample by its external_id""" + """Get a Sample by its internal_id""" projects, samples = await self.query( SampleFilter(id=GenericFilter(eq=internal_id)) ) @@ -98,7 +109,7 @@ async def get_single_by_external_id( self, external_id, project: ProjectId, check_active=True ) -> SampleInternal: """ - Get a single sample by its external_id + Get a single sample by (any of) its external_id(s) """ _, samples = await self.query( SampleFilter( @@ -136,7 +147,7 @@ async def get_sample_id_to_project_map( async def insert_sample( self, - external_id: str, + external_ids: dict[str, str], sample_type: str, active: bool, meta: dict | None, @@ -146,14 +157,16 @@ async def insert_sample( """ Create a new sample, and add it to database """ + if not external_ids or external_ids.get(PRIMARY_EXTERNAL_ORG, None) is None: + raise ValueError('Sample must have primary external_id') + audit_log_id = await self.audit_log_id() kv_pairs = [ - ('external_id', external_id), ('participant_id', participant_id), ('meta', to_db_json(meta or {})), ('type', sample_type), ('active', active), - ('audit_log_id', await self.audit_log_id()), + ('audit_log_id', audit_log_id), ('project', project or self.project), ] @@ -171,6 +184,22 @@ async def insert_sample( dict(kv_pairs), ) + _eid_query = """ + INSERT INTO sample_external_id (project, sample_id, name, external_id, audit_log_id) + VALUES (:project, :id, :name, :external_id, :audit_log_id) + """ + _eid_values = [ + { + 'project': project or self.project, + 'id': id_of_new_sample, + 'name': name.lower(), + 'external_id': eid, + 'audit_log_id': audit_log_id, + } + for name, eid in external_ids.items() if eid is not None + ] + await self.connection.execute_many(_eid_query, _eid_values) + return id_of_new_sample async def update_sample( @@ -178,13 +207,14 @@ async def update_sample( id_: int, meta: dict | None, participant_id: int | None, - external_id: str | None, + external_ids: dict[str, str | None] | None, type_: str | None, active: bool = None, ): """Update a single sample""" - values: dict[str, Any] = {'audit_log_id': await self.audit_log_id()} + audit_log_id = await self.audit_log_id() + values: dict[str, Any] = {'audit_log_id': audit_log_id} fields = [ 'audit_log_id = :audit_log_id', ] @@ -192,9 +222,52 @@ async def update_sample( values['participant_id'] = participant_id fields.append('participant_id = :participant_id') - if external_id: - values['external_id'] = external_id - fields.append('external_id = :external_id') + if external_ids: + to_delete = [k.lower() for k, v in external_ids.items() if v is None] + to_update = {k.lower(): v for k, v in external_ids.items() if v is not None} + + if PRIMARY_EXTERNAL_ORG in to_delete: + raise ValueError("Can't remove sample's primary external_id") + + if to_delete: + # Set audit_log_id to this transaction before deleting the rows + _audit_update_query = """ + UPDATE sample_external_id + SET audit_log_id = :audit_log_id + WHERE sample_id = :id AND name IN :names + """ + await self.connection.execute( + _audit_update_query, + {'id': id_, 'names': to_delete, 'audit_log_id': audit_log_id}, + ) + + _delete_query = """ + DELETE FROM sample_external_id + WHERE sample_id = :id AND name IN :names + """ + await self.connection.execute(_delete_query, {'id': id_, 'names': to_delete}) + + if to_update: + _query = 'SELECT project FROM sample WHERE id = :id' + row = await self.connection.fetch_one(_query, {'id': id_}) + project = row['project'] + + _update_query = """ + INSERT INTO sample_external_id (project, sample_id, name, external_id, audit_log_id) + VALUES (:project, :id, :name, :external_id, :audit_log_id) + ON DUPLICATE KEY UPDATE external_id = :external_id, audit_log_id = :audit_log_id + """ + _eid_values = [ + { + 'project': project, + 'id': id_, + 'name': name, + 'external_id': eid, + 'audit_log_id': audit_log_id, + } + for name, eid in to_update.items() + ] + await self.connection.execute_many(_update_query, _eid_values) if type_: values['type'] = type_ @@ -345,9 +418,10 @@ async def search( """ _query = """ - SELECT project, id, external_id, participant_id - FROM sample - WHERE project in :project_ids AND external_id LIKE :search_pattern + SELECT s.project, s.id, seid.external_id, s.participant_id + FROM sample s + INNER JOIN sample_external_id seid ON s.id = seid.sample_id + WHERE s.project IN :project_ids AND seid.external_id LIKE :search_pattern LIMIT :limit """.strip() rows = await self.connection.fetch_all( @@ -376,9 +450,9 @@ async def get_sample_id_map_by_external_ids( raise ValueError('Must specify project when getting by external ids') _query = """\ - SELECT id, external_id - FROM sample - WHERE external_id in :external_ids AND project = :project + SELECT sample_id AS id, external_id + FROM sample_external_id + WHERE external_id IN :external_ids AND project = :project """ rows = await self.connection.fetch_all( _query, {'external_ids': external_ids, 'project': project or self.project} @@ -390,9 +464,13 @@ async def get_sample_id_map_by_external_ids( async def get_sample_id_map_by_internal_ids( self, raw_internal_ids: list[int] ) -> tuple[Iterable[ProjectId], dict[int, str]]: - """Get map of external sample id to internal id""" - _query = 'SELECT id, external_id, project FROM sample WHERE id in :ids' - values = {'ids': raw_internal_ids} + """Get map of (primary) external sample id by internal id""" + _query = """ + SELECT sample_id AS id, external_id, project + FROM sample_external_id + WHERE sample_id IN :ids AND name = :PRIMARY_EXTERNAL_ORG + """ + values = {'ids': raw_internal_ids, 'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG} rows = await self.connection.fetch_all(_query, values) sample_id_map = {el['id']: el['external_id'] for el in rows} @@ -404,9 +482,14 @@ async def get_all_sample_id_map_by_internal_ids( self, project: ProjectId ) -> dict[int, str]: """Get sample id map for all samples""" - _query = 'SELECT id, external_id FROM sample WHERE project = :project' + _query = """ + SELECT s.id, seid.external_id + FROM sample s + INNER JOIN sample_external_id seid ON s.id = seid.sample_id + WHERE s.project = :project AND name = :PRIMARY_EXTERNAL_ORG + """ rows = await self.connection.fetch_all( - _query, {'project': project or self.project} + _query, {'project': project or self.project, 'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG} ) return {el[0]: el[1] for el in rows} @@ -428,16 +511,19 @@ async def get_samples_create_date(self, sample_ids: list[int]) -> dict[int, date async def get_history_of_sample(self, id_: int): """Get all versions (history) of a sample""" + # TODO Re-implement this for the separate sample_external_id table. Doing a join and/or aggregating + # the external_ids wreaks havoc with FOR SYSTEM_TIME ALL queries, collapsing the history we want to + # see into one aggregate record. For now, leave the query as is, with external_ids unavailable. keys = [ 'id', - 'external_id', + """'{"(not available)": "(not available)"}' AS external_ids""", 'participant_id', 'meta', - 'active+1 as active', + 'active+1 AS active', 'type', 'project', 'author', - 'audit_log_id', + # 'audit_log_id', # TODO SampleInternal does not allow an audit_log_id field ] keys_str = ', '.join(keys) _query = f'SELECT {keys_str} FROM sample FOR SYSTEM_TIME ALL WHERE id = :id' @@ -454,18 +540,20 @@ async def get_samples_with_missing_participants_by_internal_id( ) -> list[SampleInternal]: """Get samples with missing participants""" keys = [ - 'id', - 'external_id', - 'participant_id', - 'meta', - 'active', - 'type', - 'project', + 's.id', + 'JSON_OBJECTAGG(seid.name, seid.external_id) AS external_ids', + 's.participant_id', + 's.meta', + 's.active', + 's.type', + 's.project', ] _query = f""" SELECT {', '.join(keys)} - FROM sample - WHERE participant_id IS NULL AND project = :project + FROM sample s + INNER JOIN sample_external_id seid ON s.id = seid.sample_id + WHERE s.participant_id IS NULL AND s.project = :project + GROUP BY s.id """ rows = await self.connection.fetch_all( _query, {'project': project or self.project} diff --git a/db/python/utils.py b/db/python/utils.py index 1ea11e3d8..909aa87b5 100644 --- a/db/python/utils.py +++ b/db/python/utils.py @@ -431,6 +431,11 @@ def get_logger(): return _logger +def from_db_json(text): + """Convert DB's JSON text to Python object""" + return json.loads(text) + + def to_db_json(val): """Convert val to json for DB""" # return psycopg2.extras.Json(val) diff --git a/deploy/python/version.txt b/deploy/python/version.txt index 4489f5a6d..a3fcc7121 100644 --- a/deploy/python/version.txt +++ b/deploy/python/version.txt @@ -1 +1 @@ -7.0.4 +7.1.0 diff --git a/metamist/parser/generic_parser.py b/metamist/parser/generic_parser.py index c3a9cb64c..2f77b49c1 100644 --- a/metamist/parser/generic_parser.py +++ b/metamist/parser/generic_parser.py @@ -52,6 +52,8 @@ logger = logging.getLogger(__file__) logger.setLevel(logging.INFO) +PRIMARY_EXTERNAL_ORG = '' + FASTQ_EXTENSIONS = ('.fq.gz', '.fastq.gz', '.fq', '.fastq') BAM_EXTENSIONS = ('.bam',) CRAM_EXTENSIONS = ('.cram',) @@ -217,7 +219,7 @@ def to_sm(self) -> ParticipantUpsert: samples = [s.to_sm() for s in self.samples] return ParticipantUpsert( id=self.internal_pid, - external_id=self.external_pid, + external_ids={PRIMARY_EXTERNAL_ORG: self.external_pid}, reported_sex=self.reported_sex, reported_gender=self.reported_gender, karyotype=self.karyotype, @@ -252,7 +254,7 @@ def to_sm(self) -> SampleUpsert: """Convert to SM upsert model""" return SampleUpsert( id=self.internal_sid, - external_id=self.external_sid, + external_ids={PRIMARY_EXTERNAL_ORG: self.external_sid}, type=self.sample_type, meta=self.meta, active=True, diff --git a/models/base.py b/models/base.py index c9f2d9c63..6d2710887 100644 --- a/models/base.py +++ b/models/base.py @@ -6,6 +6,11 @@ OpenApiGenNoneType = bytes | None +# The primary {sample,participant}_external_id entry, previously {sample,participant}.external_id +# TODO This will eventually be removed when all analysis endpoints have been updated to record an external org +PRIMARY_EXTERNAL_ORG = '' + + class SMBase(BaseModel): """Base object for all models""" diff --git a/models/models/__init__.py b/models/models/__init__.py index ac0a9e3f2..3bfa75be8 100644 --- a/models/models/__init__.py +++ b/models/models/__init__.py @@ -1,4 +1,4 @@ -from models.base import parse_sql_bool +from models.base import PRIMARY_EXTERNAL_ORG, parse_sql_bool from models.models.analysis import ( Analysis, AnalysisInternal, diff --git a/models/models/participant.py b/models/models/participant.py index de0dc6ef5..75b3fbfe4 100644 --- a/models/models/participant.py +++ b/models/models/participant.py @@ -16,7 +16,7 @@ class ParticipantInternal(SMBase): id: int project: ProjectId - external_id: str = None + external_ids: dict[str, str] reported_sex: int | None = None reported_gender: str | None = None karyotype: str | None = None @@ -26,9 +26,10 @@ class ParticipantInternal(SMBase): @classmethod def from_db(cls, data: dict): - """Convert from db keys, mainly converting parsing meta""" - if 'meta' in data and isinstance(data['meta'], str): - data['meta'] = json.loads(data['meta']) + """Convert from db keys, mainly converting JSON-encoded fields""" + for key in ['external_ids', 'meta']: + if key in data and isinstance(data[key], str): + data[key] = json.loads(data[key]) return ParticipantInternal(**data) @@ -37,7 +38,7 @@ def to_external(self): return Participant( id=self.id, project=self.project, - external_id=self.external_id, + external_ids=self.external_ids, reported_sex=self.reported_sex, reported_gender=self.reported_gender, karyotype=self.karyotype, @@ -49,7 +50,7 @@ class NestedParticipantInternal(SMBase): """ParticipantInternal with nested samples""" id: int - external_id: str = None + external_ids: dict[str, str] reported_sex: int | None = None reported_gender: str | None = None karyotype: str | None = None @@ -61,7 +62,7 @@ def to_external(self): """Convert to transport model""" return NestedParticipant( id=self.id, - external_id=self.external_id, + external_ids=self.external_ids, reported_sex=self.reported_sex, reported_gender=self.reported_gender, karyotype=self.karyotype, @@ -75,7 +76,7 @@ class ParticipantUpsertInternal(SMBase): """Internal upsert model for participant""" id: int | None = None - external_id: str = None + external_ids: dict[str, str | None] | None = None reported_sex: int | None = None reported_gender: str | None = None karyotype: str | None = None @@ -87,7 +88,7 @@ def to_external(self): """Convert to transport model""" return ParticipantUpsert( id=self.id, - external_id=self.external_id, + external_ids=self.external_ids, # type: ignore reported_sex=self.reported_sex, reported_gender=self.reported_gender, karyotype=self.karyotype, @@ -101,7 +102,7 @@ class Participant(SMBase): id: int project: ProjectId - external_id: str = None + external_ids: dict[str, str] reported_sex: int | None = None reported_gender: str | None = None karyotype: str | None = None @@ -112,7 +113,7 @@ class NestedParticipant(SMBase): """External participant model with nested samples""" id: int - external_id: str = None + external_ids: dict[str, str] reported_sex: int | None = None reported_gender: str | None = None karyotype: str | None = None @@ -126,7 +127,7 @@ class ParticipantUpsert(SMBase): """External upsert model for participant""" id: int | OpenApiGenNoneType = None - external_id: str | OpenApiGenNoneType = None + external_ids: dict[str, str | OpenApiGenNoneType] | OpenApiGenNoneType = None reported_sex: int | OpenApiGenNoneType = None reported_gender: str | OpenApiGenNoneType = None karyotype: str | OpenApiGenNoneType = None @@ -138,7 +139,7 @@ def to_internal(self): """Convert to internal model, doesn't really do much""" p = ParticipantUpsertInternal( id=self.id, # type: ignore - external_id=self.external_id, # type: ignore + external_ids=self.external_ids, # type: ignore reported_sex=self.reported_sex, # type: ignore reported_gender=self.reported_gender, # type: ignore karyotype=self.karyotype, # type: ignore diff --git a/models/models/sample.py b/models/models/sample.py index 9961554b6..1499b6c6b 100644 --- a/models/models/sample.py +++ b/models/models/sample.py @@ -15,7 +15,7 @@ class SampleInternal(SMBase): """Internal model for a Sample""" id: int - external_id: str + external_ids: dict[str, str] meta: dict project: int type: str | None @@ -39,13 +39,16 @@ def from_db(d: dict): if isinstance(meta, str): meta = json.loads(meta) + if 'external_ids' in d and isinstance(d['external_ids'], str): + d['external_ids'] = json.loads(d['external_ids']) + return SampleInternal(id=_id, type=str(type_), meta=meta, active=active, **d) def to_external(self): """Convert to transport model""" return Sample( id=sample_id_format(self.id), - external_id=self.external_id, + external_ids=self.external_ids, meta=self.meta, project=self.project, type=self.type, @@ -58,7 +61,7 @@ class NestedSampleInternal(SMBase): """SampleInternal with nested sequencing groups and assays""" id: int - external_id: str + external_ids: dict[str, str] meta: dict type: str | None active: bool | None @@ -71,7 +74,7 @@ def to_external(self): """Convert to transport model""" return NestedSample( id=sample_id_format(self.id), - external_id=self.external_id, + external_ids=self.external_ids, meta=self.meta, type=self.type, created_date=self.created_date, @@ -86,7 +89,7 @@ class SampleUpsertInternal(SMBase): """Internal upsert model for sample""" id: int | None = None - external_id: str | None = None + external_ids: dict[str, str | None] | None = None meta: dict | None = None project: int | None = None type: str | None = None @@ -104,7 +107,7 @@ def to_external(self): return SampleUpsert( id=_id, - external_id=self.external_id, + external_ids=self.external_ids, # type: ignore meta=self.meta, project=self.project, type=self.type, @@ -121,7 +124,7 @@ class Sample(SMBase): """Model for a Sample""" id: str - external_id: str + external_ids: dict[str, str] meta: dict project: int type: str | None @@ -133,7 +136,7 @@ def to_internal(self): """Convert to internal model""" return SampleInternal( id=sample_id_transform_to_raw(self.id), - external_id=self.external_id, + external_ids=self.external_ids, meta=self.meta, project=self.project, type=self.type, @@ -147,7 +150,7 @@ class NestedSample(SMBase): """External sample model with nested sequencing groups and assays""" id: str - external_id: str + external_ids: dict[str, str] meta: dict type: str | None created_date: str | None @@ -159,7 +162,7 @@ class SampleUpsert(SMBase): """Upsert model for a Sample""" id: str | OpenApiGenNoneType = None - external_id: str | OpenApiGenNoneType = None + external_ids: dict[str, str | OpenApiGenNoneType] | OpenApiGenNoneType = None meta: dict | OpenApiGenNoneType = None project: int | OpenApiGenNoneType = None type: str | OpenApiGenNoneType = None @@ -177,7 +180,7 @@ def to_internal(self) -> SampleUpsertInternal: sample_upsert = SampleUpsertInternal( id=_id, - external_id=self.external_id, # type: ignore + external_ids=self.external_ids, # type: ignore meta=self.meta, # type: ignore project=self.project, # type: ignore type=self.type, # type: ignore diff --git a/scripts/create_test_subset.py b/scripts/create_test_subset.py index e4fb476fe..43af110c0 100755 --- a/scripts/create_test_subset.py +++ b/scripts/create_test_subset.py @@ -45,6 +45,8 @@ DEFAULT_SAMPLES_N = 10 +PRIMARY_EXTERNAL_ORG = '' + QUERY_ALL_DATA = gql( """ query getAllData($project: String!, $sids: [String!]) { @@ -297,7 +299,7 @@ def transfer_samples_sgs_assays( existing_pid = upserted_participant_map[s['participant']['externalId']] sample_upsert = SampleUpsert( - external_id=s['externalId'], + external_ids={PRIMARY_EXTERNAL_ORG: s['externalId']}, type=sample_type or None, meta=(copy_files_in_dict(s['meta'], project) or {}), participant_id=existing_pid, diff --git a/setup.py b/setup.py index b24093e17..b5f0be4f6 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ setup( name=PKG, # This tag is automatically updated by bump2version - version='7.0.4', + version='7.1.0', description='Python API for interacting with the Sample API system', long_description=readme, long_description_content_type='text/markdown', diff --git a/test/data/generate_data.py b/test/data/generate_data.py index 9cd04b690..02b44b53b 100755 --- a/test/data/generate_data.py +++ b/test/data/generate_data.py @@ -22,6 +22,8 @@ from metamist.models import AssayUpsert, SampleUpsert, SequencingGroupUpsert from metamist.parser.generic_parser import chunk +PRIMARY_EXTERNAL_ORG = '' + EMOJIS = [':)', ':(', ':/', ':\'('] default_ped_location = str(Path(__file__).parent / 'greek-myth-forgeneration.ped') @@ -114,7 +116,7 @@ def generate_random_number_within_distribution(): nsamples = generate_random_number_within_distribution() for _ in range(nsamples): sample = SampleUpsert( - external_id=f'GRK{sample_id_index}', + external_ids={PRIMARY_EXTERNAL_ORG: f'GRK{sample_id_index}'}, type=random.choice(sample_types), meta={ 'collection_date': datetime.datetime.now() diff --git a/test/data/generate_ourdna_data.py b/test/data/generate_ourdna_data.py index d012ff641..9772957a4 100644 --- a/test/data/generate_ourdna_data.py +++ b/test/data/generate_ourdna_data.py @@ -13,15 +13,17 @@ from metamist.apis import ParticipantApi from metamist.models import ParticipantUpsert, SampleUpsert +PRIMARY_EXTERNAL_ORG = '' + PARTICIPANTS = [ ParticipantUpsert( - external_id='EX01', + external_ids={PRIMARY_EXTERNAL_ORG: 'EX01'}, reported_sex=2, karyotype='XX', meta={'consent': True, 'field': 1}, samples=[ SampleUpsert( - external_id='Test01', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test01'}, type='blood', active=True, meta={ @@ -45,13 +47,13 @@ ], ), ParticipantUpsert( - external_id='EX02', + external_ids={PRIMARY_EXTERNAL_ORG: 'EX02'}, reported_sex=1, karyotype='XY', meta={'field': 2}, samples=[ SampleUpsert( - external_id='Test02', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test02'}, type='blood', active=True, meta={ @@ -75,13 +77,13 @@ ], ), ParticipantUpsert( - external_id='EX03', + external_ids={PRIMARY_EXTERNAL_ORG: 'EX03'}, reported_sex=2, karyotype='XX', meta={'consent': True, 'field': 3}, samples=[ SampleUpsert( - external_id='Test03', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test03'}, type='blood', active=True, meta={ diff --git a/test/data/generate_seqr_project_data.py b/test/data/generate_seqr_project_data.py index 5a3f7f5bf..2daa6c31f 100644 --- a/test/data/generate_seqr_project_data.py +++ b/test/data/generate_seqr_project_data.py @@ -17,6 +17,8 @@ from metamist.models import AssayUpsert, SampleUpsert, SequencingGroupUpsert from metamist.parser.generic_parser import chunk +PRIMARY_EXTERNAL_ORG = '' + NAMES = [ 'SOLAR', 'LUNAR', @@ -275,7 +277,7 @@ async def generate_sample_entries( nsamples = generate_random_number_within_distribution(default_count_probabilities) for i in range(nsamples): sample = SampleUpsert( - external_id=f'{participant_eid}_{i+1}', + external_ids={PRIMARY_EXTERNAL_ORG: f'{participant_eid}_{i+1}'}, type=random.choice(sample_types), meta={ 'collection_date': datetime.datetime.now() diff --git a/test/test_analysis.py b/test/test_analysis.py index e6e31b517..46d4b7bfc 100644 --- a/test/test_analysis.py +++ b/test/test_analysis.py @@ -9,6 +9,7 @@ from db.python.utils import GenericFilter from models.enums import AnalysisStatus from models.models import ( + PRIMARY_EXTERNAL_ORG, AnalysisInternal, AssayUpsertInternal, SampleUpsertInternal, @@ -30,7 +31,7 @@ async def setUp(self) -> None: sample = await self.sl.upsert_sample( SampleUpsertInternal( - external_id='Test01', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test01'}, type='blood', meta={'meta': 'meta ;)'}, active=True, diff --git a/test/test_assay.py b/test/test_assay.py index 6c26d26ad..2d9d458f5 100644 --- a/test/test_assay.py +++ b/test/test_assay.py @@ -7,8 +7,11 @@ from db.python.layers.sample import SampleLayer from db.python.tables.assay import AssayFilter from db.python.utils import GenericFilter, NotFoundError -from models.models.assay import AssayUpsertInternal -from models.models.sample import SampleUpsertInternal +from models.models import ( + PRIMARY_EXTERNAL_ORG, + AssayUpsertInternal, + SampleUpsertInternal, +) default_sequencing_meta = { 'sequencing_type': 'genome', @@ -34,7 +37,7 @@ async def setUp(self) -> None: self.sample_id_raw = ( await self.slayer.upsert_sample( SampleUpsertInternal( - external_id=self.external_sample_id, + external_ids={PRIMARY_EXTERNAL_ORG: self.external_sample_id}, type='blood', active=True, meta={'Testing': 'test_assay'}, @@ -221,7 +224,7 @@ async def test_query(self): """Test query_assays in different combinations""" sample = await self.slayer.upsert_sample( SampleUpsertInternal( - external_id='SAM_TEST_QUERY', + external_ids={PRIMARY_EXTERNAL_ORG: 'SAM_TEST_QUERY'}, type='blood', active=True, meta={'collection-year': '2022'}, diff --git a/test/test_audit_log.py b/test/test_audit_log.py index 56b758585..9d468a17d 100644 --- a/test/test_audit_log.py +++ b/test/test_audit_log.py @@ -1,7 +1,7 @@ from test.testbase import DbIsolatedTest, run_as_sync from db.python.layers.sample import SampleLayer -from models.models.sample import SampleUpsertInternal +from models.models import PRIMARY_EXTERNAL_ORG, SampleUpsertInternal class TestChangelog(DbIsolatedTest): @@ -16,7 +16,7 @@ async def test_insert_sample(self): slayer = SampleLayer(self.connection) sample = await slayer.upsert_sample( SampleUpsertInternal( - external_id='Test01', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test01'}, type='blood', active=True, meta={'meta': 'meta ;)'}, diff --git a/test/test_cohort.py b/test/test_cohort.py index 2c8607f62..185bf9ad0 100644 --- a/test/test_cohort.py +++ b/test/test_cohort.py @@ -6,7 +6,11 @@ from db.python.layers import CohortLayer, SampleLayer from db.python.tables.cohort import CohortFilter from db.python.utils import GenericFilter -from models.models import SampleUpsertInternal, SequencingGroupUpsertInternal +from models.models import ( + PRIMARY_EXTERNAL_ORG, + SampleUpsertInternal, + SequencingGroupUpsertInternal, +) from models.models.cohort import ( CohortCriteria, CohortCriteriaInternal, @@ -198,7 +202,7 @@ def get_sample_model( """Create a minimal sample""" return SampleUpsertInternal( meta={}, - external_id=f'EXID{eid}', + external_ids={PRIMARY_EXTERNAL_ORG: f'EXID{eid}'}, type=s_type, sequencing_groups=[ SequencingGroupUpsertInternal( diff --git a/test/test_external_id.py b/test/test_external_id.py new file mode 100644 index 000000000..63055ebff --- /dev/null +++ b/test/test_external_id.py @@ -0,0 +1,349 @@ +from test.testbase import DbIsolatedTest, run_as_sync + +from pymysql.err import IntegrityError + +from db.python.layers import FamilyLayer, ParticipantLayer, SampleLayer +from db.python.utils import NotFoundError +from models.models import ( + PRIMARY_EXTERNAL_ORG, + ParticipantUpsertInternal, + SampleUpsertInternal, + SequencingGroupUpsertInternal, +) + + +class TestParticipant(DbIsolatedTest): + """Test participant external ids""" + + @run_as_sync + async def setUp(self): + super().setUp() + self.player = ParticipantLayer(self.connection) + + self.p1 = await self.player.upsert_participant( + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'P1', 'CONTROL': '86', 'KAOS': 'shoe'}, + ) + ) + self.p1_external_ids = {k.lower(): v for k, v in self.p1.external_ids.items()} + + self.p2 = await self.player.upsert_participant( + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'P2'}, + ) + ) + + async def insert(self, participant_id, org, external_id): + """Directly insert into participant_external_id table""" + query = """ + INSERT INTO participant_external_id (project, participant_id, name, external_id, audit_log_id) + VALUES (:project, :id, :org, :external_id, :audit_log_id) + """ + values = { + 'project': self.project_id, + 'id': participant_id, + 'org': org, + 'external_id': external_id, + 'audit_log_id': await self.audit_log_id(), + } + await self.connection.connection.execute(query, values) + + @run_as_sync + async def test_constraints(self): + """Verify that database constraints prevent duplicate external_ids""" + # Can't have two primary eids + with self.assertRaises(IntegrityError): + await self.insert(self.p1.id, PRIMARY_EXTERNAL_ORG, 'P86') + + # Can't have two eids in the same external organisation + with self.assertRaises(IntegrityError): + await self.insert(self.p1.id, 'CONTROL', 'Maxwell') + + # Can have eids in lots of organisations, but not if the eid duplicates one in a different org + await self.insert(self.p1.id, 'OTHER1', 'Maxwell') + with self.assertRaises(IntegrityError): + await self.insert(self.p1.id, 'OTHER2', '86') + + # Another participant can't have the same eid + with self.assertRaises(IntegrityError): + await self.insert(self.p2.id, 'CONTROL', '86') + + # But it can have its own eid + await self.insert(self.p2.id, 'CONTROL', '99') + + @run_as_sync + async def test_insert(self): + """Test inserting new participants with various external_id combinations""" + with self.assertRaises(ValueError): + _ = await self.player.upsert_participant( + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: None}, + ) + ) + + with self.assertRaises(ValueError): + _ = await self.player.upsert_participant(ParticipantUpsertInternal(external_ids={'OTHER': 'P1'})) + + result = await self.player.upsert_participant( + ParticipantUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'P10', 'A': 'A1', 'B': None, 'C': 'C1'}, + ) + ) + participants = await self.player.get_participants_by_ids([result.id]) + self.assertDictEqual(participants[0].external_ids, {PRIMARY_EXTERNAL_ORG: 'P10', 'a': 'A1', 'c': 'C1'}) + + @run_as_sync + async def test_update(self): + """Test updating existing participants with various external_id combinations""" + with self.assertRaises(ValueError): + _ = await self.player.upsert_participant( + ParticipantUpsertInternal( + id=self.p1.id, + external_ids={PRIMARY_EXTERNAL_ORG: None}, + ) + ) + + result = await self.player.upsert_participant( + ParticipantUpsertInternal( + id=self.p1.id, + external_ids={PRIMARY_EXTERNAL_ORG: 'P1B', 'CONTROL': '86B', 'KAOS': None, 'B': None, 'C': 'A1'}, + ) + ) + participants = await self.player.get_participants_by_ids([result.id]) + self.assertDictEqual(participants[0].external_ids, {PRIMARY_EXTERNAL_ORG: 'P1B', 'control': '86B', 'c': 'A1'}) + + @run_as_sync + async def test_fill_in_missing(self): + """Exercise fill_in_missing_participants() method""" + slayer = SampleLayer(self.connection) + + s1 = await slayer.upsert_sample( + SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'E1'}, participant_id=self.p1.id), + ) + sa = await slayer.upsert_sample(SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EA', 'foo': 'FA'})) + sb = await slayer.upsert_sample(SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EB', 'foo': 'FB'})) + + result = await self.player.fill_in_missing_participants() + self.assertEqual(result, 'Updated 2 records') + + samples = await slayer.get_samples_by(sample_ids=[s1.id, sa.id, sb.id]) + + participants = await self.player.get_participants_by_ids([s.participant_id for s in samples]) + p_map = {p.id: p for p in participants} + + for s in samples: + expected_eids = self.p1_external_ids if s.id == s1.id else s.external_ids + self.assertEqual(p_map[s.participant_id].external_ids, expected_eids) + + @run_as_sync + async def test_get_by_families(self): + """Exercise get_participants_by_families() method""" + flayer = FamilyLayer(self.connection) + fid = await flayer.create_family(external_id='Jones') + + child = await self.player.upsert_participant( + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'P20', 'd': 'D20'}), + ) + + await self.player.add_participant_to_family( + family_id=fid, + participant_id=child.id, + paternal_id=self.p1.id, + maternal_id=self.p2.id, + affected=2, + ) + + result = await self.player.get_participants_by_families([fid]) + self.assertEqual(len(result), 1) + self.assertEqual(len(result[fid]), 1) + self.assertDictEqual(result[fid][0].external_ids, {PRIMARY_EXTERNAL_ORG: 'P20', 'd': 'D20'}) + + @run_as_sync + async def test_update_many(self): + """Exercise update_many_participant_external_ids() method""" + result = await self.player.update_many_participant_external_ids({self.p1.id: 'P1B', self.p2.id: 'P2B'}) + self.assertTrue(result) + + participants = await self.player.get_participants_by_ids([self.p1.id, self.p2.id]) + p_map = {p.id: p for p in participants} + outp1 = p_map[self.p1.id] + outp2 = p_map[self.p2.id] + self.assertDictEqual(outp1.external_ids, {PRIMARY_EXTERNAL_ORG: 'P1B', 'control': '86', 'kaos': 'shoe'}) + self.assertDictEqual(outp2.external_ids, {PRIMARY_EXTERNAL_ORG: 'P2B'}) + + @run_as_sync + async def test_get_etoi_map(self): + """Exercise get_external_participant_id_to_internal_sequencing_group_id_map() method""" + slayer = SampleLayer(self.connection) + + _ = await slayer.upsert_sample( + SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'SE1'}, participant_id=self.p1.id), + ) + + s2 = await slayer.upsert_sample( + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'SE2', 'other': 'SO1'}, + participant_id=self.p1.id, + sequencing_groups=[ + SequencingGroupUpsertInternal( + type='genome', + technology='short-read', + platform='illumina', + assays=[], + ), + ], + ), + ) + + result = await self.player.get_external_participant_id_to_internal_sequencing_group_id_map(self.project_id) + self.assertEqual(len(result), 3) + for eid, sgid in result: + self.assertIn(eid, self.p1.external_ids.values()) + self.assertEqual(sgid, s2.sequencing_groups[0].id) + + +class TestSample(DbIsolatedTest): + """Test sample external ids""" + + @run_as_sync + async def setUp(self): + super().setUp() + self.slayer = SampleLayer(self.connection) + + self.s1 = await self.slayer.upsert_sample( + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'S1', 'CONTROL': '86', 'KAOS': 'shoe'}, + ) + ) + + self.s2 = await self.slayer.upsert_sample( + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'S2'}, + ) + ) + + async def insert(self, sample_id, org, external_id): + """Directly insert into sample_external_id table""" + query = """ + INSERT INTO sample_external_id (project, sample_id, name, external_id, audit_log_id) + VALUES (:project, :id, :org, :external_id, :audit_log_id) + """ + values = { + 'project': self.project_id, + 'id': sample_id, + 'org': org, + 'external_id': external_id, + 'audit_log_id': await self.audit_log_id(), + } + await self.connection.connection.execute(query, values) + + @run_as_sync + async def test_constraints(self): + """Verify that database constraints prevent duplicate external_ids""" + # Can't have two primary eids + with self.assertRaises(IntegrityError): + await self.insert(self.s1.id, PRIMARY_EXTERNAL_ORG, 'S86') + + # Can't have two eids in the same external organisation + with self.assertRaises(IntegrityError): + await self.insert(self.s1.id, 'CONTROL', 'Maxwell') + + # Can have eids in lots of organisations, but not if the eid duplicates one in a different org + await self.insert(self.s1.id, 'OTHER1', 'Maxwell') + with self.assertRaises(IntegrityError): + await self.insert(self.s1.id, 'OTHER2', '86') + + # Another sample can't have the same eid + with self.assertRaises(IntegrityError): + await self.insert(self.s2.id, 'CONTROL', '86') + + # But it can have its own eid + await self.insert(self.s2.id, 'CONTROL', '99') + + @run_as_sync + async def test_insert(self): + """Test inserting new samples with various external_id combinations""" + with self.assertRaises(ValueError): + _ = await self.slayer.upsert_sample( + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: None}, + ) + ) + + with self.assertRaises(ValueError): + _ = await self.slayer.upsert_sample(SampleUpsertInternal(external_ids={'OTHER': 'S1'})) + + result = await self.slayer.upsert_sample( + SampleUpsertInternal( + external_ids={PRIMARY_EXTERNAL_ORG: 'S10', 'A': 'A1', 'B': None, 'C': 'C1'}, + ) + ) + sample = await self.slayer.get_sample_by_id(result.id) + self.assertDictEqual(sample.external_ids, {PRIMARY_EXTERNAL_ORG: 'S10', 'a': 'A1', 'c': 'C1'}) + + @run_as_sync + async def test_update(self): + """Test updating existing samples with various external_id combinations""" + with self.assertRaises(ValueError): + _ = await self.slayer.upsert_sample( + SampleUpsertInternal( + id=self.s1.id, + external_ids={PRIMARY_EXTERNAL_ORG: None}, + ) + ) + + result = await self.slayer.upsert_sample( + SampleUpsertInternal( + id=self.s1.id, + external_ids={PRIMARY_EXTERNAL_ORG: 'S1B', 'CONTROL': '86B', 'KAOS': None, 'B': None, 'C': 'A1'}, + ) + ) + sample = await self.slayer.get_sample_by_id(result.id) + self.assertDictEqual(sample.external_ids, {PRIMARY_EXTERNAL_ORG: 'S1B', 'control': '86B', 'c': 'A1'}) + + @run_as_sync + async def test_get_single(self): + """Exercise get_single_by_external_id() method""" + with self.assertRaises(NotFoundError): + _ = await self.slayer.get_single_by_external_id('non-existent', self.project_id) + + result = await self.slayer.get_single_by_external_id('86', self.project_id) + self.assertEqual(result.id, self.s1.id) + + result = await self.slayer.get_single_by_external_id('S2', self.project_id) + self.assertEqual(result.id, self.s2.id) + + @run_as_sync + async def test_get_internal_to_external(self): + """Exercise get_internal_to_external_sample_id_map() method""" + result = await self.slayer.get_internal_to_external_sample_id_map([self.s1.id, self.s2.id]) + self.assertDictEqual(result, {self.s1.id: 'S1', self.s2.id: 'S2'}) + + @run_as_sync + async def test_get_all(self): + """Exercise get_all_sample_id_map_by_internal_ids() method""" + result = await self.slayer.get_all_sample_id_map_by_internal_ids(self.project_id) + self.assertDictEqual(result, {self.s1.id: 'S1', self.s2.id: 'S2'}) + + @run_as_sync + async def test_get_history(self): + """Exercise get_history_of_sample() method""" + # First create some history + await self.slayer.upsert_sample(SampleUpsertInternal(id=self.s1.id, meta={'foo': 'bar'})) + + await self.slayer.upsert_sample( + SampleUpsertInternal( + id=self.s1.id, + external_ids={'fruit': 'apple'}, + meta={'fruit': 'banana'}, + ) + ) + + sample = await self.slayer.get_sample_by_id(self.s1.id) + + result = await self.slayer.get_history_of_sample(self.s1.id) + self.assertEqual(len(result), 3) + self.assertDictEqual(result[0].meta, {}) + self.assertDictEqual(result[1].meta, {'foo': 'bar'}) + self.assertDictEqual(result[2].meta, {'foo': 'bar', 'fruit': 'banana'}) + self.assertDictEqual(result[2].meta, sample.meta) diff --git a/test/test_get_participants.py b/test/test_get_participants.py index 48680611f..5790e0fa1 100644 --- a/test/test_get_participants.py +++ b/test/test_get_participants.py @@ -1,7 +1,7 @@ from test.testbase import DbIsolatedTest, run_as_sync from db.python.layers.participant import ParticipantLayer -from models.models.participant import ParticipantUpsertInternal +from models.models import PRIMARY_EXTERNAL_ORG, ParticipantUpsertInternal class TestParticipant(DbIsolatedTest): @@ -11,17 +11,20 @@ class TestParticipant(DbIsolatedTest): async def setUp(self) -> None: super().setUp() + self.ex01 = {PRIMARY_EXTERNAL_ORG: 'EX01', 'other': 'OTHER1'} + self.ex02 = {PRIMARY_EXTERNAL_ORG: 'EX02'} + pl = ParticipantLayer(self.connection) await pl.upsert_participants( [ ParticipantUpsertInternal( - external_id='EX01', + external_ids=self.ex01, reported_sex=2, karyotype='XX', meta={'field': 1}, ), ParticipantUpsertInternal( - external_id='EX02', + external_ids=self.ex02, reported_sex=1, karyotype='XY', meta={'field': 2}, @@ -37,11 +40,11 @@ async def test_get_all_participants(self): self.assertEqual(2, len(ps)) - self.assertEqual('EX01', ps[0].external_id) + self.assertEqual(self.ex01, ps[0].external_ids) self.assertEqual(1, ps[0].meta['field']) self.assertEqual('XX', ps[0].karyotype) - self.assertEqual('EX02', ps[1].external_id) + self.assertEqual(self.ex02, ps[1].external_ids) @run_as_sync async def test_get_participant_by_eid(self): @@ -51,6 +54,6 @@ async def test_get_participant_by_eid(self): self.assertEqual(1, len(ps)) - self.assertEqual('EX02', ps[0].external_id) + self.assertEqual(self.ex02, ps[0].external_ids) self.assertEqual(2, ps[0].meta['field']) self.assertEqual('XY', ps[0].karyotype) diff --git a/test/test_graphql.py b/test/test_graphql.py index 5dff32b88..41ad61e14 100644 --- a/test/test_graphql.py +++ b/test/test_graphql.py @@ -8,6 +8,7 @@ from metamist.graphql import configure_sync_client, gql, validate from models.enums import AnalysisStatus from models.models import ( + PRIMARY_EXTERNAL_ORG, AnalysisInternal, AssayUpsertInternal, ParticipantUpsertInternal, @@ -25,11 +26,11 @@ def _get_single_participant_upsert(): return ParticipantUpsertInternal( - external_id='Demeter', + external_ids={PRIMARY_EXTERNAL_ORG: 'Demeter'}, meta={}, samples=[ SampleUpsertInternal( - external_id='sample_id001', + external_ids={PRIMARY_EXTERNAL_ORG: 'sample_id001'}, meta={}, type='blood', sequencing_groups=[ @@ -197,10 +198,10 @@ async def test_query_sample_by_meta(self): await self.player.upsert_participant( ParticipantUpsertInternal( meta={}, - external_id='Demeter', + external_ids={PRIMARY_EXTERNAL_ORG: 'Demeter'}, samples=[ SampleUpsertInternal( - external_id='sample_id001', + external_ids={PRIMARY_EXTERNAL_ORG: 'sample_id001'}, meta={'thisKey': 'value'}, ) ], @@ -278,7 +279,7 @@ async def test_participant_phenotypes(self): """ # insert participant p = await self.player.upsert_participant( - ParticipantUpsertInternal(external_id='Demeter', meta={}, samples=[]) + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'Demeter'}, meta={}, samples=[]) ) phenotypes = {'phenotype1': 'value1', 'phenotype2': {'number': 123}} diff --git a/test/test_import_individual_metadata.py b/test/test_import_individual_metadata.py index adb65622e..447be8535 100644 --- a/test/test_import_individual_metadata.py +++ b/test/test_import_individual_metadata.py @@ -3,7 +3,7 @@ from databases.interfaces import Record from db.python.layers.participant import ParticipantLayer -from models.models.participant import ParticipantUpsertInternal +from models.models import PRIMARY_EXTERNAL_ORG, ParticipantUpsertInternal class TestImportIndividualMetadata(DbIsolatedTest): @@ -14,7 +14,7 @@ async def test_import_many_hpo_terms(self): """Test import hpo terms from many columns""" pl = ParticipantLayer(self.connection) - await pl.upsert_participant(ParticipantUpsertInternal(external_id='TP01')) + await pl.upsert_participant(ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'TP01'})) headers = [ 'Individual ID', @@ -48,8 +48,8 @@ async def test_import_basic_metadata(self): await pl.upsert_participants( [ - ParticipantUpsertInternal(external_id='TP01'), - ParticipantUpsertInternal(external_id='TP02'), + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'TP01'}), + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'TP02'}), ] ) diff --git a/test/test_models.py b/test/test_models.py index 2f395a5c8..e066eeb33 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1,6 +1,7 @@ from unittest import TestCase from models.models import ( + PRIMARY_EXTERNAL_ORG, ParticipantUpsert, ParticipantUpsertInternal, SampleUpsert, @@ -14,20 +15,20 @@ class TestParticipantModels(TestCase): def test_participant_to_internal_basic(self): """Test converting a basic participant to internal model""" - external = ParticipantUpsert(external_id='hey-hey') + external = ParticipantUpsert(external_ids={PRIMARY_EXTERNAL_ORG: 'hey-hey'}) internal = external.to_internal() self.assertIsInstance(internal, ParticipantUpsertInternal) - self.assertEqual('hey-hey', internal.external_id) + self.assertDictEqual({PRIMARY_EXTERNAL_ORG: 'hey-hey'}, internal.external_ids) def test_participant_to_external_basic(self): """Test converting a basic participant to external model""" - internal = ParticipantUpsertInternal(id=1, external_id='hey-hey') + internal = ParticipantUpsertInternal(id=1, external_ids={PRIMARY_EXTERNAL_ORG: 'hey-hey'}) external = internal.to_external() self.assertIsInstance(external, ParticipantUpsert) self.assertEqual(1, external.id) - self.assertEqual('hey-hey', external.external_id) + self.assertDictEqual({PRIMARY_EXTERNAL_ORG: 'hey-hey'}, external.external_ids) class TestSampleModels(TestCase): @@ -35,17 +36,17 @@ class TestSampleModels(TestCase): def test_sample_to_internal_basic(self): """Test converting a basic sample to internal model""" - external = SampleUpsert(external_id='hey-hey') + external = SampleUpsert(external_ids={PRIMARY_EXTERNAL_ORG: 'hey-hey'}) internal = external.to_internal() self.assertIsInstance(internal, SampleUpsertInternal) - self.assertEqual('hey-hey', internal.external_id) + self.assertDictEqual({PRIMARY_EXTERNAL_ORG: 'hey-hey'}, internal.external_ids) def test_sample_to_external_basic(self): """Test converting a basic sample to external model""" - internal = SampleUpsertInternal(id=1, external_id='hey-hey') + internal = SampleUpsertInternal(id=1, external_ids={PRIMARY_EXTERNAL_ORG: 'hey-hey'}) external = internal.to_external() self.assertIsInstance(external, SampleUpsert) self.assertEqual(sample_id_format(1), external.id) - self.assertEqual('hey-hey', external.external_id) + self.assertDictEqual({PRIMARY_EXTERNAL_ORG: 'hey-hey'}, external.external_ids) diff --git a/test/test_ourdna_dashboard.py b/test/test_ourdna_dashboard.py index a21bbae26..c71ca62cd 100644 --- a/test/test_ourdna_dashboard.py +++ b/test/test_ourdna_dashboard.py @@ -7,6 +7,7 @@ from db.python.layers.participant import ParticipantLayer from db.python.layers.sample import SampleLayer from models.models import ( + PRIMARY_EXTERNAL_ORG, OurDNADashboard, ParticipantUpsertInternal, SampleUpsert, @@ -33,13 +34,13 @@ async def setUp(self) -> None: participants = await self.pl.upsert_participants( [ ParticipantUpsertInternal( - external_id='EX01', + external_ids={PRIMARY_EXTERNAL_ORG: 'EX01'}, reported_sex=2, karyotype='XX', meta={'consent': True, 'field': 1}, samples=[ SampleUpsertInternal( - external_id='Test01', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test01'}, meta={ 'collection-time': '2022-07-03 13:28:00', 'processing-site': 'Garvan', @@ -63,13 +64,13 @@ async def setUp(self) -> None: ], ), ParticipantUpsertInternal( - external_id='EX02', + external_ids={PRIMARY_EXTERNAL_ORG: 'EX02'}, reported_sex=1, karyotype='XY', meta={'field': 2}, samples=[ SampleUpsertInternal( - external_id='Test02', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test02'}, meta={ 'collection-time': '2022-07-03 13:28:00', 'processing-site': 'BBV', @@ -93,13 +94,13 @@ async def setUp(self) -> None: ], ), ParticipantUpsertInternal( - external_id='EX03', + external_ids={PRIMARY_EXTERNAL_ORG: 'EX03'}, reported_sex=2, karyotype='XX', meta={'consent': True, 'field': 3}, samples=[ SampleUpsertInternal( - external_id='Test03', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test03'}, meta={ # 'collection-time': '2022-07-03 13:28:00', 'processing-site': 'Garvan', diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index e59262c7d..5f2b37e7c 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -5,7 +5,11 @@ from db.python.layers import ParticipantLayer from metamist.parser.generic_parser import ParsedParticipant -from models.models import ParticipantUpsertInternal, SampleUpsertInternal +from models.models import ( + PRIMARY_EXTERNAL_ORG, + ParticipantUpsertInternal, + SampleUpsertInternal, +) from scripts.parse_existing_cohort import Columns, ExistingCohortParser @@ -193,10 +197,10 @@ async def test_existing_row( await player.upsert_participants( [ ParticipantUpsertInternal( - external_id='EXTID1234', + external_ids={PRIMARY_EXTERNAL_ORG: 'EXTID1234'}, samples=[ SampleUpsertInternal( - external_id='EXTID1234', + external_ids={PRIMARY_EXTERNAL_ORG: 'EXTID1234'}, ) ], ) diff --git a/test/test_parse_generic_metadata.py b/test/test_parse_generic_metadata.py index b05e65ada..a7f2d1d22 100644 --- a/test/test_parse_generic_metadata.py +++ b/test/test_parse_generic_metadata.py @@ -21,6 +21,7 @@ ParsedSequencingGroup, ) from models.models import ( + PRIMARY_EXTERNAL_ORG, AssayUpsertInternal, ParticipantUpsertInternal, SampleUpsertInternal, @@ -38,11 +39,11 @@ def _get_basic_participant_to_upsert(): } return ParticipantUpsertInternal( - external_id='Demeter', + external_ids={PRIMARY_EXTERNAL_ORG: 'Demeter'}, meta={}, samples=[ SampleUpsertInternal( - external_id='sample_id001', + external_ids={PRIMARY_EXTERNAL_ORG: 'sample_id001'}, meta={}, type='blood', sequencing_groups=[ diff --git a/test/test_parse_ont_sheet.py b/test/test_parse_ont_sheet.py index 6a9c5f38d..17694abd8 100644 --- a/test/test_parse_ont_sheet.py +++ b/test/test_parse_ont_sheet.py @@ -4,7 +4,11 @@ from db.python.layers import ParticipantLayer from metamist.parser.generic_parser import ParsedParticipant -from models.models import ParticipantUpsertInternal, SampleUpsertInternal +from models.models import ( + PRIMARY_EXTERNAL_ORG, + ParticipantUpsertInternal, + SampleUpsertInternal, +) from scripts.parse_ont_sheet import OntParser @@ -26,10 +30,10 @@ async def test_simple_sheet(self, mock_graphql_query): await player.upsert_participants( [ ParticipantUpsertInternal( - external_id='Sample01', + external_ids={PRIMARY_EXTERNAL_ORG: 'Sample01'}, samples=[ SampleUpsertInternal( - external_id='Sample01', + external_ids={PRIMARY_EXTERNAL_ORG: 'Sample01'}, ) ], ) diff --git a/test/test_pedigree.py b/test/test_pedigree.py index 8966c8cc3..f263ffd32 100644 --- a/test/test_pedigree.py +++ b/test/test_pedigree.py @@ -2,7 +2,7 @@ from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer -from models.models.participant import ParticipantUpsertInternal +from models.models import PRIMARY_EXTERNAL_ORG, ParticipantUpsertInternal class TestPedigree(DbIsolatedTest): @@ -51,12 +51,12 @@ async def test_pedigree_without_family(self): await pl.upsert_participant( ParticipantUpsertInternal( - external_id='EX01', + external_ids={PRIMARY_EXTERNAL_ORG: 'EX01'}, reported_sex=1, ) ) await pl.upsert_participant( - ParticipantUpsertInternal(external_id='EX02', reported_sex=None) + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX02'}, reported_sex=None) ) rows = await fl.get_pedigree( diff --git a/test/test_sample.py b/test/test_sample.py index e5b8639b7..b1795c0b7 100644 --- a/test/test_sample.py +++ b/test/test_sample.py @@ -1,7 +1,7 @@ from test.testbase import DbIsolatedTest, run_as_sync from db.python.layers.sample import SampleLayer -from models.models.sample import SampleUpsertInternal +from models.models import PRIMARY_EXTERNAL_ORG, SampleUpsertInternal class TestSample(DbIsolatedTest): @@ -19,7 +19,7 @@ async def test_add_sample(self): """Test inserting a sample""" sample = await self.slayer.upsert_sample( SampleUpsertInternal( - external_id='Test01', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test01'}, type='blood', active=True, meta={'meta': 'meta ;)'}, @@ -32,13 +32,16 @@ async def test_add_sample(self): self.assertEqual(1, len(samples)) self.assertEqual(sample.id, samples[0]['id']) + mapping = await self.slayer.get_sample_id_map_by_external_ids(['Test01']) + self.assertDictEqual({'Test01': sample.id}, mapping) + @run_as_sync async def test_get_sample(self): """Test getting formed sample""" meta_dict = {'meta': 'meta ;)'} s = await self.slayer.upsert_sample( SampleUpsertInternal( - external_id='Test01', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test01'}, type='blood', active=True, meta=meta_dict, @@ -56,21 +59,21 @@ async def test_update_sample(self): meta_dict = {'meta': 'meta ;)'} s = await self.slayer.upsert_sample( SampleUpsertInternal( - external_id='Test01', + external_ids={PRIMARY_EXTERNAL_ORG: 'Test01'}, type='blood', active=True, meta=meta_dict, ) ) - new_external_id = 'Test02' + new_external_id_dict = {PRIMARY_EXTERNAL_ORG: 'Test02'} await self.slayer.upsert_sample( - SampleUpsertInternal(id=s.id, external_id=new_external_id) + SampleUpsertInternal(id=s.id, external_ids=new_external_id_dict) ) sample = await self.slayer.get_by_id(s.id, check_project_id=False) - self.assertEqual(new_external_id, sample.external_id) + self.assertDictEqual(new_external_id_dict, sample.external_ids) # @run_as_sync # async def test_update_sample(self): diff --git a/test/test_search.py b/test/test_search.py index d5498b098..6bf90d00f 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -8,6 +8,7 @@ from db.python.tables.family_participant import FamilyParticipantTable from models.enums import SearchResponseType from models.models import ( + PRIMARY_EXTERNAL_ORG, AssayUpsertInternal, FamilySearchResponseData, ParticipantSearchResponseData, @@ -52,7 +53,7 @@ async def test_search_unavailable_sample_by_internal_id(self): Mock this in testing by limiting scope to non-existent project IDs """ sample = await self.slayer.upsert_sample( - SampleUpsertInternal(external_id='EX001', type='blood') + SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX001'}, type='blood') ) cpg_id = sample_id_format(sample.id) @@ -67,7 +68,7 @@ async def test_search_isolated_sample_by_id(self): Search by valid CPG sample ID (special case) """ sample = await self.slayer.upsert_sample( - SampleUpsertInternal(external_id='EX001', type='blood') + SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX001'}, type='blood') ) cpg_id = sample_id_format(sample.id) results = await self.schlay.search(query=cpg_id, project_ids=[self.project_id]) @@ -87,7 +88,7 @@ async def test_search_isolated_sequencing_group_by_id(self): Search by valid CPG sequencing group ID (special case) """ sample = await self.slayer.upsert_sample( - SampleUpsertInternal(external_id='EXS001', type='blood') + SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EXS001'}, type='blood') ) sg = await self.sglayer.upsert_sequencing_groups( [ @@ -134,7 +135,7 @@ async def test_search_isolated_sample_by_external_id(self): should only return one result """ sample = await self.slayer.upsert_sample( - SampleUpsertInternal(external_id='EX001', type='blood') + SampleUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX001'}, type='blood') ) results = await self.schlay.search(query='EX001', project_ids=[self.project_id]) @@ -159,7 +160,7 @@ async def test_search_participant_isolated(self): should only return one result """ p = await self.player.upsert_participant( - ParticipantUpsertInternal(external_id='PART01') + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'PART01'}) ) results = await self.schlay.search( query='PART01', project_ids=[self.project_id] @@ -197,7 +198,7 @@ async def test_search_mixed(self): fptable = FamilyParticipantTable(self.connection) p = await self.player.upsert_participant( - ParticipantUpsertInternal(external_id='X:PART01') + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'X:PART01'}) ) f_id = await self.flayer.create_family(external_id='X:FAM01') await fptable.create_rows( @@ -216,7 +217,7 @@ async def test_search_mixed(self): sample = await self.slayer.upsert_sample( SampleUpsertInternal( - external_id='X:SAM001', + external_ids={PRIMARY_EXTERNAL_ORG: 'X:SAM001'}, type='blood', participant_id=p.id, ) diff --git a/test/test_sequencing_groups.py b/test/test_sequencing_groups.py index 5cf19d975..a20b89a33 100644 --- a/test/test_sequencing_groups.py +++ b/test/test_sequencing_groups.py @@ -6,6 +6,7 @@ from db.python.utils import GenericFilter from models.enums.analysis import AnalysisStatus from models.models import ( + PRIMARY_EXTERNAL_ORG, AnalysisInternal, AssayUpsertInternal, SampleUpsertInternal, @@ -20,7 +21,7 @@ def get_sample_model(): """ return SampleUpsertInternal( meta={}, - external_id='EX_ID', + external_ids={PRIMARY_EXTERNAL_ORG: 'EX_ID'}, sequencing_groups=[ SequencingGroupUpsertInternal( type='genome', diff --git a/test/test_update_participant_family.py b/test/test_update_participant_family.py index 204295644..f0a6902eb 100644 --- a/test/test_update_participant_family.py +++ b/test/test_update_participant_family.py @@ -4,7 +4,7 @@ from db.python.layers.family import FamilyLayer from db.python.layers.participant import ParticipantLayer -from models.models import ParticipantUpsertInternal +from models.models import PRIMARY_EXTERNAL_ORG, ParticipantUpsertInternal class TestParticipantFamily(DbIsolatedTest): @@ -22,17 +22,17 @@ async def setUp(self) -> None: pl = ParticipantLayer(self.connection) self.pid = ( await pl.upsert_participant( - ParticipantUpsertInternal(external_id='EX01', reported_sex=2) + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX01'}, reported_sex=2) ) ).id self.pat_pid = ( await pl.upsert_participant( - ParticipantUpsertInternal(external_id='EX01_pat', reported_sex=1) + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX01_pat'}, reported_sex=1) ) ).id self.mat_pid = ( await pl.upsert_participant( - ParticipantUpsertInternal(external_id='EX01_mat', reported_sex=2) + ParticipantUpsertInternal(external_ids={PRIMARY_EXTERNAL_ORG: 'EX01_mat'}, reported_sex=2) ) ).id diff --git a/test/test_upsert.py b/test/test_upsert.py index 02d46a378..3c3fc2519 100644 --- a/test/test_upsert.py +++ b/test/test_upsert.py @@ -1,10 +1,13 @@ from test.testbase import DbIsolatedTest, run_as_sync from db.python.layers.participant import ParticipantLayer -from models.models.assay import AssayUpsertInternal -from models.models.participant import ParticipantUpsertInternal -from models.models.sample import SampleUpsertInternal -from models.models.sequencing_group import SequencingGroupUpsertInternal +from models.models import ( + PRIMARY_EXTERNAL_ORG, + AssayUpsertInternal, + ParticipantUpsertInternal, + SampleUpsertInternal, + SequencingGroupUpsertInternal, +) default_assay_meta = { 'sequencing_type': 'genome', @@ -14,11 +17,11 @@ all_participants = [ ParticipantUpsertInternal( - external_id='Demeter', + external_ids={PRIMARY_EXTERNAL_ORG: 'Demeter'}, meta={}, samples=[ SampleUpsertInternal( - external_id='sample_id001', + external_ids={PRIMARY_EXTERNAL_ORG: 'sample_id001'}, meta={}, sequencing_groups=[ SequencingGroupUpsertInternal( @@ -89,11 +92,11 @@ ], ), ParticipantUpsertInternal( - external_id='Apollo', + external_ids={PRIMARY_EXTERNAL_ORG: 'Apollo'}, meta={}, samples=[ SampleUpsertInternal( - external_id='sample_id002', + external_ids={PRIMARY_EXTERNAL_ORG: 'sample_id002'}, meta={}, sequencing_groups=[ SequencingGroupUpsertInternal( @@ -131,7 +134,7 @@ type='blood', ), SampleUpsertInternal( - external_id='sample_id004', + external_ids={PRIMARY_EXTERNAL_ORG: 'sample_id004'}, meta={}, sequencing_groups=[ SequencingGroupUpsertInternal( @@ -171,11 +174,11 @@ ], ), ParticipantUpsertInternal( - external_id='Athena', + external_ids={PRIMARY_EXTERNAL_ORG: 'Athena'}, meta={}, samples=[ SampleUpsertInternal( - external_id='sample_id003', + external_ids={PRIMARY_EXTERNAL_ORG: 'sample_id003'}, meta={}, sequencing_groups=[ SequencingGroupUpsertInternal( @@ -237,22 +240,33 @@ async def test_insert_participants(self): await pt.upsert_participants(all_participants, open_transaction=False) expected_sample_eid_to_participant_eid = { - sample.external_id: participant.external_id + sample_eid: participant_eid for participant in all_participants + for participant_eid in participant.external_ids.values() for sample in participant.samples + for sample_eid in sample.external_ids.values() } db_participants = await self.connection.connection.fetch_all( - 'SELECT * FROM participant' + 'SELECT * FROM participant_external_id ORDER BY participant_id' ) self.assertEqual(3, len(db_participants)) self.assertEqual('Demeter', db_participants[0]['external_id']) self.assertEqual('Apollo', db_participants[1]['external_id']) self.assertEqual('Athena', db_participants[2]['external_id']) - participant_id_map = {p['external_id']: p['id'] for p in db_participants} + participant_id_map = {p['external_id']: p['participant_id'] for p in db_participants} - db_samples = await self.connection.connection.fetch_all('SELECT * FROM sample') + db_samples = await self.connection.connection.fetch_all( + """ + SELECT s.participant_id, seid.external_id + FROM sample s + INNER JOIN sample_external_id seid ON s.id = seid.sample_id + WHERE seid.name = :PRIMARY_EXTERNAL_ORG + ORDER BY s.id + """, + {'PRIMARY_EXTERNAL_ORG': PRIMARY_EXTERNAL_ORG}, + ) self.assertEqual(4, len(db_samples)) for db_sample in db_samples: self.assertIsNotNone(db_sample['external_id']) diff --git a/test/test_web.py b/test/test_web.py index c14ba7b82..24d46fccd 100644 --- a/test/test_web.py +++ b/test/test_web.py @@ -9,6 +9,7 @@ ) from models.enums import MetaSearchEntityPrefix from models.models import ( + PRIMARY_EXTERNAL_ORG, Assay, AssayInternal, AssayUpsertInternal, @@ -54,11 +55,11 @@ def merge(d1: dict, d2: dict): def get_test_participant(): """Do it like this to avoid an upsert writing the test value""" return ParticipantUpsertInternal( - external_id='Demeter', + external_ids={PRIMARY_EXTERNAL_ORG: 'Demeter'}, meta={}, samples=[ SampleUpsertInternal( - external_id='sample_id001', + external_ids={PRIMARY_EXTERNAL_ORG: 'sample_id001'}, meta={}, type='blood', sequencing_groups=[ @@ -102,11 +103,11 @@ def get_test_participant(): def get_test_participant_2(): """Do it like this to avoid an upsert writing the test value""" return ParticipantUpsertInternal( - external_id='Meter', + external_ids={PRIMARY_EXTERNAL_ORG: 'Meter'}, meta={}, samples=[ SampleUpsertInternal( - external_id='sample_id002', + external_ids={PRIMARY_EXTERNAL_ORG: 'sample_id002'}, meta={}, type='blood', sequencing_groups=[ diff --git a/web/package.json b/web/package.json index 339bba0e2..c4d981a98 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "metamist", - "version": "7.0.4", + "version": "7.1.0", "private": true, "dependencies": { "@apollo/client": "^3.7.3", From 71dddbe459eeeb2176a9c2feced808b6ea7c2549 Mon Sep 17 00:00:00 2001 From: Michael Franklin <22381693+illusional@users.noreply.github.com> Date: Fri, 14 Jun 2024 07:46:58 +1000 Subject: [PATCH 3/3] Hotfix for external IDs on the Project Grid (#824) * Format external_ids on project grid * Fix external id key * Fix tests * Let linter do it's thang --------- Co-authored-by: Michael Franklin --- db/python/layers/web.py | 4 ++-- test/test_web.py | 16 ++++++++-------- web/src/pages/project/ProjectGrid.tsx | 20 ++++++++++++++------ 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/db/python/layers/web.py b/db/python/layers/web.py index 4366e52ed..9ef385145 100644 --- a/db/python/layers/web.py +++ b/db/python/layers/web.py @@ -506,7 +506,7 @@ async def get_project_summary( has_reported_gender = any(p.reported_gender for p in pmodels) has_karyotype = any(p.karyotype for p in pmodels) - participant_keys = [('external_id', 'Participant ID')] + participant_keys = [('external_ids', 'Participant ID')] if has_reported_sex: participant_keys.append(('reported_sex', 'Reported sex')) @@ -518,7 +518,7 @@ async def get_project_summary( participant_keys.extend(('meta.' + k, k) for k in participant_meta_keys) sample_keys: list[tuple[str, str]] = [ ('id', 'Sample ID'), - ('external_id', 'External Sample ID'), + ('external_ids', 'External Sample ID'), ('created_date', 'Created date'), ] + [('meta.' + k, k) for k in sample_meta_keys] diff --git a/test/test_web.py b/test/test_web.py index 24d46fccd..c865960bb 100644 --- a/test/test_web.py +++ b/test/test_web.py @@ -165,10 +165,10 @@ def get_test_participant_2(): }, batch_sequencing_group_stats={'M001': {'genome': '1'}}, participants=[], - participant_keys=[('external_id', 'Participant ID')], + participant_keys=[('external_ids', 'Participant ID')], sample_keys=[ ('id', 'Sample ID'), - ('external_id', 'External Sample ID'), + ('external_ids', 'External Sample ID'), ('created_date', 'Created date'), ], sequencing_group_keys=[ @@ -349,10 +349,10 @@ async def test_project_summary_multiple_participants(self): }, batch_sequencing_group_stats={'M001': {'genome': '2'}}, participants=[], # data_to_class(expected_data_list), - participant_keys=[('external_id', 'Participant ID')], + participant_keys=[('external_ids', 'Participant ID')], sample_keys=[ ('id', 'Sample ID'), - ('external_id', 'External Sample ID'), + ('external_ids', 'External Sample ID'), ('created_date', 'Created date'), ], sequencing_group_keys=[ @@ -402,10 +402,10 @@ async def test_project_summary_multiple_participants_and_filter(self): }, batch_sequencing_group_stats={'M001': {'genome': '2'}}, participants=[], # data_to_class(expected_data_list_filtered), - participant_keys=[('external_id', 'Participant ID')], + participant_keys=[('external_ids', 'Participant ID')], sample_keys=[ ('id', 'Sample ID'), - ('external_id', 'External Sample ID'), + ('external_ids', 'External Sample ID'), ('created_date', 'Created date'), ], sequencing_group_keys=[ @@ -478,10 +478,10 @@ async def test_field_with_space(self): }, batch_sequencing_group_stats={'M001': {'genome': '2'}}, participants=[], - participant_keys=[('external_id', 'Participant ID')], + participant_keys=[('external_ids', 'Participant ID')], sample_keys=[ ('id', 'Sample ID'), - ('external_id', 'External Sample ID'), + ('external_ids', 'External Sample ID'), ('created_date', 'Created date'), ], sequencing_group_keys=[ diff --git a/web/src/pages/project/ProjectGrid.tsx b/web/src/pages/project/ProjectGrid.tsx index a890167ef..149f72c9e 100644 --- a/web/src/pages/project/ProjectGrid.tsx +++ b/web/src/pages/project/ProjectGrid.tsx @@ -30,6 +30,12 @@ interface ProjectGridProps { }) => void } +const formatExternalIds = (ids: Record) => { + return Object.entries(ids) + .map(([k, v]) => (k.length === 0 ? sanitiseValue(v) : `${k}: ${v}`)) + .join(', ') +} + const ProjectGrid: React.FunctionComponent = ({ summary, projectName, @@ -300,9 +306,7 @@ const ProjectGrid: React.FunctionComponent = ({ // const border = '1px solid' // debugger return ( - + {isFirstOfGroup && ( = ({ key={`${p.id}participant.${k}`} rowSpan={participantRowSpan} > - {sanitiseValue(_.get(p, k))} + {k === 'external_ids' + ? formatExternalIds(p.external_ids) + : sanitiseValue(_.get(p, k))} ))} {sgidx === 0 && @@ -364,12 +370,14 @@ const ProjectGrid: React.FunctionComponent = ({ key={`${s.id}sample.${k}`} rowSpan={samplesRowSpan} > - {k === 'external_id' || k === 'id' ? ( + {k === 'external_ids' || k === 'id' ? ( - {sanitiseValue(_.get(s, k))} + {k === 'external_ids' + ? formatExternalIds(s.external_ids) + : sanitiseValue(s.id)} ) : ( sanitiseValue(_.get(s, k))