Skip to content

Commit 7eee023

Browse files
committed
Add more logs
1 parent 071ed4c commit 7eee023

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

scripts/create_test_subset.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ def main(
182182
raise ValueError('Come on, what exactly are you asking for?')
183183

184184
# for reproducibility
185+
logger.info('Setting random seed to 42')
185186
random.seed(42)
186187

187188
# 1. Find and SG IDs to be moved by Family ID -test.
@@ -191,13 +192,16 @@ def main(
191192
)
192193

193194
# 2. Get all sids in project.
195+
logger.info(f'Querying all sids in {project}')
194196
sid_output = query(SG_ID_QUERY, variables={'project': project})
195197
all_sids = {sid['id'] for sid in sid_output.get('project').get('samples')}
198+
logger.info(f'Found {len(all_sids)} sids in {project}')
196199

197200
# 3. Randomly select from the remaining sgs
198201
additional_samples.update(random.sample(all_sids - additional_samples, samples_n))
199202

200203
# 4. Query all the samples from the selected sgs
204+
logger.info(f'Transfering {len(additional_samples)} samples. Querying metadata.')
201205
original_project_subset_data = query(
202206
QUERY_ALL_DATA, {'project': project, 'sids': list(additional_samples)}
203207
)
@@ -217,24 +221,32 @@ def main(
217221
# Parse Families & Participants
218222
if skip_ped:
219223
# If no family data is available, only the participants should be transferred.
224+
logger.info(
225+
'Skipping pedigree/family information. Transferring participants only.'
226+
)
227+
logger.info(f'Transferring {len(participant_data)} participants. ')
220228
upserted_participant_map = transfer_participants(
221229
target_project=target_project,
222230
participant_data=participant_data,
223231
)
224232

225233
else:
234+
logger.info(f'Transferring {len(participant_data)} participants. ')
226235
family_ids = transfer_families(
227236
project, target_project, internal_participant_ids
228237
)
229238
upserted_participant_map = transfer_ped(project, target_project, family_ids)
230239

231240
existing_data = query(EXISTING_DATA_QUERY, {'project': target_project})
232241

242+
logger.info('Transferring samples, sequencing groups, and assays')
233243
samples = original_project_subset_data.get('project').get('samples')
234244
transfer_samples_sgs_assays(
235245
samples, existing_data, upserted_participant_map, target_project, project
236246
)
247+
logger.info('Transferring analyses')
237248
transfer_analyses(samples, existing_data, target_project, project)
249+
logger.info('Subset generation complete!')
238250

239251

240252
def transfer_samples_sgs_assays(
@@ -248,7 +260,7 @@ def transfer_samples_sgs_assays(
248260
Transfer samples, sequencing groups, and assays from the original project to the
249261
test project.
250262
"""
251-
logging.info('Transferring samples, sequencing groups, and assays')
263+
logger.info(f'Transferring {len(samples)} samples')
252264
for s in samples:
253265
sample_type = None if s['type'] == 'None' else s['type']
254266
existing_sid: str | None = None

0 commit comments

Comments
 (0)