@@ -182,6 +182,7 @@ def main(
182
182
raise ValueError ('Come on, what exactly are you asking for?' )
183
183
184
184
# for reproducibility
185
+ logger .info ('Setting random seed to 42' )
185
186
random .seed (42 )
186
187
187
188
# 1. Find and SG IDs to be moved by Family ID -test.
@@ -191,13 +192,16 @@ def main(
191
192
)
192
193
193
194
# 2. Get all sids in project.
195
+ logger .info (f'Querying all sids in { project } ' )
194
196
sid_output = query (SG_ID_QUERY , variables = {'project' : project })
195
197
all_sids = {sid ['id' ] for sid in sid_output .get ('project' ).get ('samples' )}
198
+ logger .info (f'Found { len (all_sids )} sids in { project } ' )
196
199
197
200
# 3. Randomly select from the remaining sgs
198
201
additional_samples .update (random .sample (all_sids - additional_samples , samples_n ))
199
202
200
203
# 4. Query all the samples from the selected sgs
204
+ logger .info (f'Transfering { len (additional_samples )} samples. Querying metadata.' )
201
205
original_project_subset_data = query (
202
206
QUERY_ALL_DATA , {'project' : project , 'sids' : list (additional_samples )}
203
207
)
@@ -217,24 +221,32 @@ def main(
217
221
# Parse Families & Participants
218
222
if skip_ped :
219
223
# If no family data is available, only the participants should be transferred.
224
+ logger .info (
225
+ 'Skipping pedigree/family information. Transferring participants only.'
226
+ )
227
+ logger .info (f'Transferring { len (participant_data )} participants. ' )
220
228
upserted_participant_map = transfer_participants (
221
229
target_project = target_project ,
222
230
participant_data = participant_data ,
223
231
)
224
232
225
233
else :
234
+ logger .info (f'Transferring { len (participant_data )} participants. ' )
226
235
family_ids = transfer_families (
227
236
project , target_project , internal_participant_ids
228
237
)
229
238
upserted_participant_map = transfer_ped (project , target_project , family_ids )
230
239
231
240
existing_data = query (EXISTING_DATA_QUERY , {'project' : target_project })
232
241
242
+ logger .info ('Transferring samples, sequencing groups, and assays' )
233
243
samples = original_project_subset_data .get ('project' ).get ('samples' )
234
244
transfer_samples_sgs_assays (
235
245
samples , existing_data , upserted_participant_map , target_project , project
236
246
)
247
+ logger .info ('Transferring analyses' )
237
248
transfer_analyses (samples , existing_data , target_project , project )
249
+ logger .info ('Subset generation complete!' )
238
250
239
251
240
252
def transfer_samples_sgs_assays (
@@ -248,7 +260,7 @@ def transfer_samples_sgs_assays(
248
260
Transfer samples, sequencing groups, and assays from the original project to the
249
261
test project.
250
262
"""
251
- logging .info ('Transferring samples, sequencing groups, and assays ' )
263
+ logger .info (f 'Transferring { len ( samples ) } samples ' )
252
264
for s in samples :
253
265
sample_type = None if s ['type' ] == 'None' else s ['type' ]
254
266
existing_sid : str | None = None
0 commit comments