Skip to content

Commit

Permalink
Merge pull request #623 from populationgenomics/dev
Browse files Browse the repository at this point in the history
Release
  • Loading branch information
vivbak authored Nov 23, 2023
2 parents a684651 + 5fb8892 commit 7db6896
Showing 1 changed file with 15 additions and 13 deletions.
28 changes: 15 additions & 13 deletions scripts/create_test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
type
assays {
id
meta
type
}
analyses {
Expand All @@ -114,7 +115,7 @@
"""
)

QUERY_FAMILY_SGID = gql(
QUERY_FAMILY_SAMPLES = gql(
"""
query FamilyQuery($project: String!) {
project(name: $project) {
Expand All @@ -127,7 +128,6 @@
}
}
}
}
}
"""
Expand Down Expand Up @@ -190,11 +190,11 @@ def main(
get_sids_for_families(project, families_n, additional_families)
)

# 2. Get all sids in project.
logger.info(f'Querying all sids in {project}')
# 2. Get all sample IDs and their SG IDs in project.
logger.info(f'Querying all samples in {project}')
sid_output = query(SG_ID_QUERY, variables={'project': project})
all_sids = {sid['id'] for sid in sid_output.get('project').get('samples')}
logger.info(f'Found {len(all_sids)} sids in {project}')
logger.info(f'Found {len(all_sids)} sample ids in {project}')

# 3. Randomly select from the remaining sgs
additional_samples.update(random.sample(all_sids - additional_samples, samples_n))
Expand Down Expand Up @@ -328,7 +328,7 @@ def upsert_assays(
existing_data,
sample_external_id,
existing_sgid,
assay.get('type'),
assay
)
existing_assay_id = _existing_assay.get('id') if _existing_assay else None
assay_upsert = AssayUpsert(
Expand Down Expand Up @@ -444,7 +444,7 @@ def get_existing_sg(


def get_existing_assay(
data: dict, sample_id: str, sg_id: str, assay_type: str
data: dict, sample_id: str, sg_id: str, original_assay: dict
) -> dict | None:
"""
Find assay in main data for this sample
Expand All @@ -453,7 +453,7 @@ def get_existing_assay(
"""
if sg := get_existing_sg(existing_data=data, sample_id=sample_id, sg_id=sg_id):
for assay in sg.get('assays', []):
if assay.get('type') == assay_type:
if assay.get('type') == original_assay.get('type') and assay.get('meta') == original_assay.get('meta'):
return assay

return None
Expand All @@ -479,7 +479,7 @@ def get_sids_for_families(
) -> set[str]:
"""Returns specific samples to be included in the test project."""

family_sgid_output = query(QUERY_FAMILY_SGID, {'project': project})
family_sgid_output = query(QUERY_FAMILY_SAMPLES, {'project': project})

all_family_sgids = family_sgid_output.get('project', {}).get('families', [])
assert all_family_sgids, 'No families returned in GQL result'
Expand Down Expand Up @@ -583,14 +583,16 @@ def transfer_participants(
"""Transfers relevant participants between projects"""
existing_participants = papi.get_participants(target_project)

target_project_epids = [
participant['external_id'] for participant in existing_participants
]
target_project_pid_map = {
participant['external_id']: participant['id'] for participant in existing_participants
}

participants_to_transfer = []
for participant in participant_data:
if participant['externalId'] not in target_project_epids:
if participant['externalId'] in list(target_project_pid_map.keys()):
# Participants with id field will be updated & those without will be inserted
participant['id'] = target_project_pid_map[participant['externalId']]
else:
del participant['id']
transfer_participant = {
'external_id': participant['externalId'],
Expand Down

0 comments on commit 7db6896

Please sign in to comment.