Skip to content

Commit

Permalink
Implement multiple external IDs for families (#896)
Browse files Browse the repository at this point in the history
* Create family_external_id database table
* Replace family.external_id with external_ids etc
* In api/graphql/schema.py, add external_ids alongside external_id
* In ParticipantGridRow.tsx, reuse prepareExternalIds() to handle family multiple extids

* Complete rewrite of insert_or_update_multiple_families()

The previous INSERT ... ON DUPLICATE KEY UPDATE code inserts a
new family or, if the same project+external_id entry already exists,
updates description and coded_phenotype -- as external_id is part of
the key used to locate the record, it can't be updated.

With external_id moving to a separate table, we need to write this
logic out explicitly. We search by any external id, but only insert
the primary external id for new records. (At present, this functionality
is used only by FamilyLayer.import_families(), which parses only the
primary external id.)

* Use transactions in create_family() and update_family()

* FamilyTable.get_id_map_by_internal_ids() returns only primary extids

Most users of this function want a single external id (per internal id)
that they can use to populate a pedigree or for use with seqr. (The call
in ParticipantLayer.generic_individual_metadata_importer() has complex
wants but is mostly similar.) Hence, at least for now, it is simplest
to keep the 1:1 map return type and return only the primary external ids.
  • Loading branch information
jmarshall authored Oct 9, 2024
1 parent a4450ee commit f2de605
Show file tree
Hide file tree
Showing 17 changed files with 470 additions and 116 deletions.
4 changes: 3 additions & 1 deletion api/graphql/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,7 @@ class GraphQLFamily:

id: int
external_id: str
external_ids: strawberry.scalars.JSON

description: str | None
coded_phenotype: str | None
Expand All @@ -739,7 +740,8 @@ class GraphQLFamily:
def from_internal(internal: FamilyInternal) -> 'GraphQLFamily':
return GraphQLFamily(
id=internal.id,
external_id=internal.external_id,
external_id=internal.external_ids[PRIMARY_EXTERNAL_ORG],
external_ids=internal.external_ids or {},
description=internal.description,
coded_phenotype=internal.coded_phenotype,
project_id=internal.project,
Expand Down
4 changes: 2 additions & 2 deletions api/routes/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class FamilyUpdateModel(BaseModel):
"""Model for updating a family"""

id: int
external_id: str | None = None
external_ids: dict[str, str] | None = None
description: str | None = None
coded_phenotype: str | None = None

Expand Down Expand Up @@ -171,7 +171,7 @@ async def update_family(
return {
'success': await family_layer.update_family(
id_=family.id,
external_id=family.external_id,
external_ids=family.external_ids,
description=family.description,
coded_phenotype=family.coded_phenotype,
)
Expand Down
76 changes: 76 additions & 0 deletions db/project.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1789,4 +1789,80 @@
<sql>ALTER TABLE `analysis_outputs` ADD SYSTEM VERSIONING;</sql>
</changeSet>

<changeSet id="2024-07-25-family-external-ids" author="john.marshall">
<createTable tableName="family_external_id">
<column name="project" type="INT">
<constraints
nullable="false"
foreignKeyName="FK_PROJECT_FAMILY_EXTERNAL_ID"
references="project(id)" />
</column>
<column name="family_id" type="INT">
<constraints
nullable="false"
foreignKeyName="FK_FAMILY_FAMILY_EXTERNAL_ID"
references="family(id)" />
</column>
<column name="name" type="VARCHAR(255)" />
<column name="external_id" type="VARCHAR(255)">
<constraints nullable="false" />
</column>
<column name="meta" type="LONGTEXT" />
<column name="audit_log_id" type="INT">
<constraints
nullable="false"
foreignKeyName="FK_FAMILY_EXTERNAL_ID_CHANGELOG_ID"
references="audit_log(id)" />
</column>
</createTable>
<addPrimaryKey
tableName="family_external_id"
columnNames="family_id,name"
constraintName="PK_FAMILY_EXTERNAL_ID"
validate="true"
/>
<addUniqueConstraint
tableName="family_external_id"
columnNames="project,external_id"
constraintName="UK_FAMILY_EXTERNAL_ID_UNIQUE_EIDS"
validate="true"
/>
<createIndex tableName="family" indexName="fk_project_family">
<column name="project" />
</createIndex>
<dropUniqueConstraint
tableName="family"
constraintName="UK_FAMILY_PROJECT_EXTERNALID"
/>

<sql>ALTER TABLE family_external_id ADD SYSTEM VERSIONING;</sql>

<!-- Migrate existing external_ids to the new tables, keyed by PRIMARY_EXTERNAL_ORG, i.e. '' -->
<sql>INSERT INTO audit_log (author, on_behalf_of, ar_guid, comment, auth_project, meta)
VALUES ('liquibase', NULL, NULL, 'family external_id migration', NULL, NULL)
RETURNING @audit_log_id := id;

INSERT INTO family_external_id (project, family_id, name, external_id, audit_log_id)
SELECT project, id, '', external_id, @audit_log_id
FROM family;
</sql>
</changeSet>

<changeSet id="2024-07-26-drop-old-external-id-columns" author="john.marshall">
<sql>SET @@system_versioning_alter_history = 1;</sql>

<dropNotNullConstraint
tableName="family"
columnName="external_id"
columnDataType="VARCHAR(255)"
/>
<sql>UPDATE family SET external_id = NULL</sql>
<renameColumn
tableName="family"
oldColumnName="external_id"
newColumnName="_external_id_unused"
columnDataType="VARCHAR(255)"
remarks="Migration of family external IDs to separate table"
/>
</changeSet>
</databaseChangeLog>
1 change: 1 addition & 0 deletions db/python/connect.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
'sample_external_id',
'sequencing_group_external_id',
'family',
'family_external_id',
'family_participant',
'participant_phenotypes',
'group_member',
Expand Down
13 changes: 8 additions & 5 deletions db/python/layers/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ def __init__(self, connection: Connection):
self.fptable = FamilyParticipantTable(self.connection)

async def create_family(
self, external_id: str, description: str = None, coded_phenotype: str = None
self,
external_ids: dict[str, str],
description: str | None = None,
coded_phenotype: str | None = None,
):
"""Create a family"""
return await self.ftable.create_family(
external_id=external_id,
external_ids=external_ids,
description=description,
coded_phenotype=coded_phenotype,
)
Expand Down Expand Up @@ -127,7 +130,7 @@ async def get_families_by_participants(
async def update_family(
self,
id_: int,
external_id: str = None,
external_ids: dict[str, str] | None = None,
description: str = None,
coded_phenotype: str = None,
) -> bool:
Expand All @@ -140,7 +143,7 @@ async def update_family(

return await self.ftable.update_family(
id_=id_,
external_id=external_id,
external_ids=external_ids,
description=description,
coded_phenotype=coded_phenotype,
)
Expand Down Expand Up @@ -303,7 +306,7 @@ async def import_pedigree(

for external_family_id in missing_external_family_ids:
internal_family_id = await self.ftable.create_family(
external_id=external_family_id,
external_ids={PRIMARY_EXTERNAL_ORG: external_family_id},
description=None,
coded_phenotype=None,
)
Expand Down
2 changes: 1 addition & 1 deletion db/python/layers/participant.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ async def generic_individual_metadata_importer(
# they might not be missing
for external_family_id in missing_family_ids:
new_pid = await ftable.create_family(
external_id=external_family_id,
external_ids={PRIMARY_EXTERNAL_ORG: external_family_id},
description=None,
coded_phenotype=None,
)
Expand Down
5 changes: 3 additions & 2 deletions db/python/layers/seqr.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from db.python.tables.project import Project
from models.enums import AnalysisStatus
from models.enums.web import SeqrDatasetType
from models.models import PRIMARY_EXTERNAL_ORG

# literally the most temporary thing ever, but for complete
# automation need to have sample inclusion / exclusion
Expand Down Expand Up @@ -282,8 +283,8 @@ async def sync_families(
return ['No families to synchronise']
family_data = [
{
'familyId': fam.external_id,
'displayName': fam.external_id,
'familyId': fam.external_ids[PRIMARY_EXTERNAL_ORG],
'displayName': fam.external_ids[PRIMARY_EXTERNAL_ORG],
'description': fam.description,
'codedPhenotype': fam.coded_phenotype,
}
Expand Down
2 changes: 1 addition & 1 deletion db/python/layers/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def assemble_nested_participants_from(
families = []
for family in families_by_pid.get(participant.id, []):
families.append(
FamilySimpleInternal(id=family.id, external_id=family.external_id)
FamilySimpleInternal(id=family.id, external_ids=family.external_ids)
)
nested_participant = NestedParticipantInternal(
id=participant.id,
Expand Down
Loading

0 comments on commit f2de605

Please sign in to comment.