diff --git a/metamist/parser/generic_metadata_parser.py b/metamist/parser/generic_metadata_parser.py index 44e88dd01..413204824 100644 --- a/metamist/parser/generic_metadata_parser.py +++ b/metamist/parser/generic_metadata_parser.py @@ -94,8 +94,8 @@ def __init__( seq_technology_column: Optional[str] = None, seq_platform_column: Optional[str] = None, seq_facility_column: Optional[str] = None, - library_type_column: Optional[str] = None, - end_type_column: Optional[str] = None, + seq_library_column: Optional[str] = None, + read_end_type_column: Optional[str] = None, read_length_column: Optional[str] = None, gvcf_column: Optional[str] = None, meta_column: Optional[str] = None, @@ -103,8 +103,8 @@ def __init__( batch_number: Optional[str] = None, reference_assembly_location_column: Optional[str] = None, default_reference_assembly_location: Optional[str] = None, - default_sequencing_type='genome', default_sample_type=None, + default_sequencing_type='genome', default_sequencing_technology='short-read', default_sequencing_platform='illumina', allow_extra_files_in_search_path=False, @@ -136,8 +136,8 @@ def __init__( self.seq_technology_column = seq_technology_column self.seq_platform_column = seq_platform_column self.seq_facility_column = seq_facility_column - self.library_type_column = library_type_column - self.end_type_column = end_type_column + self.seq_library_column = seq_library_column + self.read_end_type_column = read_end_type_column self.read_length_column = read_length_column self.reference_assembly_location_column = reference_assembly_location_column self.default_reference_assembly_location = default_reference_assembly_location @@ -213,7 +213,7 @@ def get_sequencing_type(self, row: SingleRow) -> str: value = 'exome' elif 'mt' in value: value = 'mtseq' - elif 'polya' in value: + elif 'polya' in value or 'mrna' in value: value = 'polyarna' elif 'total' in value: value = 'totalrna' @@ -221,23 +221,23 @@ def get_sequencing_type(self, row: SingleRow) -> str: value = 'singlecellrna' return str(value) - + def get_sequencing_facility(self, row: SingleRow) -> str: """Get sequencing facility from row""" value = row.get(self.seq_facility_column, None) return str(value) - - def get_library_type(self, row: SingleRow) -> str: - """Get library type from row""" - value = row.get(self.library_type_column, None) + + def get_sequencing_library(self, row: SingleRow) -> str: + """Get sequencing library from row""" + value = row.get(self.seq_library_column, None) return str(value) - - def get_assay_end_type(self, row: SingleRow) -> str: - """Get assay end type from row""" - value = row.get(self.end_type_column, None) + + def get_read_end_type(self, row: SingleRow) -> str: + """Get read end type from row""" + value = row.get(self.read_end_type_column, None) return str(value) - - def get_assay_read_length(self, row: SingleRow) -> str: + + def get_read_length(self, row: SingleRow) -> str: """Get read length from row""" value = row.get(self.read_length_column, None) return str(value) @@ -547,9 +547,10 @@ async def get_participant_meta_from_group(self, rows: GroupedRow): """Get participant-metadata from rows then set it in the ParticipantMetaGroup""" return self.collapse_arbitrary_meta(self.participant_meta_map, rows) - async def get_sequencing_group_meta( + async def get_sequencing_group_meta_from_variant_files( # Unused as of 2024-02-01 self, sequencing_group: ParsedSequencingGroup ) -> dict: + """Get sequencing group metadata from variant (vcf) files""" meta: dict[str, Any] = {} if not sequencing_group.sample.external_sid: @@ -679,12 +680,13 @@ async def get_assays_from_group( if self.batch_number is not None: collapsed_assay_meta['batch'] = self.batch_number - + if sequencing_group.sequencing_type in ['polyarna', 'totalrna', 'singlecellrna']: - rows=sequencing_group.rows - collapsed_assay_meta['library_type'] = self.get_library_type(rows[0]) - collapsed_assay_meta['end_type'] = self.get_assay_end_type(rows[0]) - collapsed_assay_meta['read_length'] = self.get_assay_read_length(rows[0]) + rows = sequencing_group.rows + collapsed_assay_meta['sequencing_facility'] = self.get_sequencing_facility(rows[0]) + collapsed_assay_meta['library_type'] = self.get_sequencing_library(rows[0]) + collapsed_assay_meta['end_type'] = self.get_read_end_type(rows[0]) + collapsed_assay_meta['read_length'] = self.get_read_length(rows[0]) for read in reads[reads_type]: assays.append( @@ -708,8 +710,8 @@ async def get_assays_from_group( }, ) ) - - sequencing_group.meta = await self.get_sequencing_group_meta_from_assays(assays) + + sequencing_group.meta = self.get_sequencing_group_meta_from_assays(assays) return assays diff --git a/metamist/parser/generic_parser.py b/metamist/parser/generic_parser.py index 65b000300..34dde4401 100644 --- a/metamist/parser/generic_parser.py +++ b/metamist/parser/generic_parser.py @@ -65,6 +65,7 @@ + GVCF_EXTENSIONS + VCF_EXTENSIONS ) +RNA_SEQ_TYPES = ['polyarna', 'totalrna', 'singlecellrna'] # construct rmatch string to capture all fastq patterns rmatch_str = ( @@ -83,8 +84,6 @@ SUPPORTED_READ_TYPES = Literal['fastq', 'bam', 'cram'] SUPPORTED_VARIANT_TYPES = Literal['gvcf', 'vcf'] -RNA_SEQ_TYPES = ['polyarna', 'totalrna', 'singlecellrna'] - QUERY_MATCH_PARTICIPANTS = gql( """ query GetParticipantEidMapQuery($project: String!) { @@ -210,6 +209,7 @@ def __init__( self.karyotype = karyotype self.meta = meta + self.external_family_id: str = None self.samples: list[ParsedSample] = [] def to_sm(self) -> ParticipantUpsert: @@ -261,7 +261,7 @@ def to_sm(self) -> SampleUpsert: class ParsedSequencingGroup: - """Class for holding sequence metadata grouped by type""" + """Class for holding sequencing group metadata""" def __init__( self, @@ -270,8 +270,8 @@ def __init__( internal_seqgroup_id: int | None, external_seqgroup_id: str | None, sequencing_type: str, - sequence_technology: str, - sequence_platform: str | None, + sequencing_technology: str, + sequencing_platform: str | None, meta: dict[str, Any] | None, ): self.sample = sample @@ -280,8 +280,8 @@ def __init__( self.internal_seqgroup_id = internal_seqgroup_id self.external_seqgroup_id = external_seqgroup_id self.sequencing_type = sequencing_type - self.sequencing_technology = sequence_technology - self.sequencing_platform = sequence_platform + self.sequencing_technology = sequencing_technology + self.sequencing_platform = sequencing_platform self.meta = meta self.assays: list[ParsedAssay] = [] @@ -352,7 +352,7 @@ def __init__( def to_sm(self): """To SM model""" if not self.sequencing_group.internal_seqgroup_id: - raise ValueError('Sequence group ID must be filled in by now') + raise ValueError('Sequencing group ID must be filled in by now') return Analysis( status=AnalysisStatus(self.status), type=str(self.type), @@ -364,7 +364,7 @@ def to_sm(self): def chunk(iterable: Iterable[T], chunk_size=50) -> Iterator[List[T]]: """ - Chunk a sequence by yielding lists of `chunk_size` + Chunk an iterable by yielding lists of `chunk_size` """ chnk: List[T] = [] for element in iterable: @@ -400,26 +400,27 @@ def wrapper(*args, **kwargs): class GenericParser( CloudHelper ): # pylint: disable=too-many-public-methods,too-many-arguments - """Parser for VCGS manifest""" + """Parser for ingesting rows of metadata""" def __init__( # pylint: disable=too-many-arguments self, path_prefix: Optional[str], search_paths: list[str], project: str, - default_sequencing_type='genome', - default_sequencing_technology='short-read', - default_sequencing_platform: str | None = None, - default_library_type: str | None=None, - default_assay_end_type: str | None=None, - default_assay_read_length: str | None=None, - default_sample_type=None, - default_analysis_type='qc', - default_analysis_status='completed', - skip_checking_gcs_objects=False, + default_sequencing_type: str = 'genome', + default_sequencing_technology: str = 'short-read', + default_sequencing_platform: str = None, + default_sequencing_facility: str = None, + default_library_type: str = None, + default_assay_end_type: str = None, + default_assay_read_length: str = None, + default_sample_type: str = None, + default_analysis_type: str = None, + default_analysis_status: str = None, key_map: Dict[str, str] = None, - ignore_extra_keys=False, required_keys: Set[str] = None, + ignore_extra_keys=False, + skip_checking_gcs_objects=False, verbose=True, ): self.path_prefix = path_prefix @@ -438,12 +439,13 @@ def __init__( # pylint: disable=too-many-arguments self.default_sequencing_type: str = default_sequencing_type self.default_sequencing_technology: str = default_sequencing_technology self.default_sequencing_platform: Optional[str] = default_sequencing_platform - self.default_library_type: Optional[str] = default_library_type - self.default_assay_end_type: Optional[str] = default_assay_end_type - self.default_assay_read_length: Optional[str] = default_assay_read_length + self.default_sequencing_facility: Optional[str] = default_sequencing_facility + self.default_sequencing_library: Optional[str] = default_library_type + self.default_read_end_type: Optional[str] = default_assay_end_type + self.default_read_length: Optional[str] = default_assay_read_length self.default_sample_type: Optional[str] = default_sample_type - self.default_analysis_type: str = default_analysis_type - self.default_analysis_status: str = default_analysis_status + self.default_analysis_type: Optional[str] = default_analysis_type + self.default_analysis_status: Optional[str] = default_analysis_status # gs specific self.default_bucket = None @@ -508,8 +510,7 @@ async def parse_manifest( # pylint: disable=too-many-branches ) -> Any: """ Parse manifest from iterable (file pointer / String.IO) - - Returns a dict mapping external sample ID to CPG sample ID + Returns a summary of the parsed records. """ rows = await self.file_pointer_to_rows( file_pointer=file_pointer, delimiter=delimiter @@ -517,7 +518,13 @@ async def parse_manifest( # pylint: disable=too-many-branches return await self.from_json(rows, confirm, dry_run) async def from_json(self, rows, confirm=False, dry_run=False): - """Parse passed rows""" + """ + Asynchronously parse rows of data, adding chunks of participants, samples, sequencing groups, assays, and analyses. + + Groups rows of participants by their IDs. For each participant, group samples by their IDs. + If no participants are present, groups samples by their IDs. + For each sample, gets its sequencing groups by their keys. For each sequencing group, groups assays and analyses. + """ await self.validate_rows(rows) # one participant with no value @@ -544,7 +551,9 @@ async def from_json(self, rows, confirm=False, dry_run=False): sequencing_groups: list[ParsedSequencingGroup] = [] for schunk in chunk(samples): - seq_groups_for_chunk = await asyncio.gather(*map(self.group_assays, schunk)) + seq_groups_for_chunk = await asyncio.gather( + *map(self.get_sample_sequencing_groups, schunk) + ) for sample, seqgroups in zip(schunk, seq_groups_for_chunk): sample.sequencing_groups = seqgroups @@ -566,7 +575,7 @@ async def from_json(self, rows, confirm=False, dry_run=False): # mark for removal sequencing_group.assays = None continue - sequencing_group.meta = self.get_seq_group_meta_from_assays(chunked_assays) + sequencing_group.meta = self.get_sequencing_group_meta_from_assays(chunked_assays) sequencing_group.assays = chunked_assays assays.extend(chunked_assays) sequencing_group.analyses = analyses @@ -590,7 +599,8 @@ async def from_json(self, rows, confirm=False, dry_run=False): if dry_run: logger.info('Dry run, so returning without inserting / updating metadata') - return summary, (participants if participants else samples) + self.prepare_detail(samples) + return summary if confirm: resp = str(input(message + '\n\nConfirm (y): ')) @@ -610,7 +620,11 @@ async def from_json(self, rows, confirm=False, dry_run=False): [s.to_sm() for s in samples], ) - print(json.dumps(result, indent=2)) + if self.verbose: + logger.info(json.dumps(result, indent=2)) + else: + self.prepare_detail(samples) + return summary def _get_dict_reader(self, file_pointer, delimiter: str): """ @@ -671,6 +685,35 @@ def prepare_summary( return summary + def prepare_detail( + self, + samples: list[ParsedSample], + ): + """ + Print all samples and their sequencing groups that will be inserted / updated + """ + sample_participants = {sample.external_sid : sample.participant.external_pid for sample in samples} + sample_sequencing_groups = {sample.external_sid : sample.sequencing_groups for sample in samples} + + details = [] + for sample, participant in sample_participants.items(): + for sg in sample_sequencing_groups[sample]: + sg_details = { + 'Participant': None, + 'Sample': sample, + 'Sequencing Type': sg.sequencing_type, + 'Assays': sum(1 for a in sg.assays if not a.internal_id), + } + if participant: + sg_details['Participant'] = participant + details.append(sg_details) + + headers = ['Participant', 'Sample', 'Sequencing Type', 'Assays'] + print('\t'.join(headers)) + for detail in details: + values = [str(detail.get(header, '')) for header in headers] + print('\t'.join(values)) + def prepare_message( self, summary, @@ -704,18 +747,18 @@ def prepare_message( message = f"""\ {self.project}: {header} - Sequence types: {str_seq_count} - Sequence group types: {str_seqg_count} + Assays count: {str_seq_count} + Sequencing group count: {str_seqg_count} Adding {summary['participants']['insert']} participants Adding {summary['samples']['insert']} samples - Adding {summary['sequencing_groups']['insert']} sequence groups + Adding {summary['sequencing_groups']['insert']} sequencing groups Adding {summary['assays']['insert']} assays - Adding {summary['analyses']['insert']} analysis + Adding {summary['analyses']['insert']} analyses Updating {summary['participants']['update']} participants Updating {summary['samples']['update']} samples - Updating {summary['sequencing_groups']['update']} sequence groups + Updating {summary['sequencing_groups']['update']} sequencing groups Updating {summary['assays']['update']} assays """ return message @@ -914,7 +957,7 @@ async def group_samples( ) -> list[ParsedSample]: """ From a set of rows, group (by calling self.get_sample_id) - and parse sample other sample values. + and parse samples and their values. """ samples = [] for sid, sample_rows in group_by(rows, self.get_sample_id).items(): @@ -937,7 +980,7 @@ async def get_sample_meta_from_group(self, rows: GroupedRow) -> dict: def get_sequencing_group_key(self, row: SingleRow) -> Hashable: """ - Get a key to group sequencing rows by. + Get a key to group sequencing group rows by. """ if seq_group_id := self.get_sequencing_group_id(row): return seq_group_id @@ -954,10 +997,10 @@ def get_sequencing_group_key(self, row: SingleRow) -> Hashable: return tuple(v for _, v in keys) - async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup]: + async def get_sample_sequencing_groups(self, sample: ParsedSample) -> list[ParsedSequencingGroup]: """ - From a set of rows, group (by calling self.get_sequencing_group_key) - and parse sequencing group other sequencing group values. + From a set of samples, group (by calling self.get_sequencing_group_key) + and parse sequencing groups and their values. """ sequencing_groups = [] for seq_rows in group_by(sample.rows, self.get_sequencing_group_key).values(): @@ -969,8 +1012,8 @@ async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup internal_seqgroup_id=None, external_seqgroup_id=self.get_sequencing_group_id(seq_rows[0]), sequencing_type=seq_type, - sequence_technology=seq_tech, - sequence_platform=seq_platform, + sequencing_technology=seq_tech, + sequencing_platform=seq_platform, meta={}, sample=sample, rows=seq_rows, @@ -1006,23 +1049,18 @@ async def get_assays_from_group( From a sequencing_group (list of rows with some common seq fields), return list[ParsedAssay] (does not have to equal number of rows). """ - + def get_sequencing_group_meta_from_assays(self, assays: list[ParsedAssay]) -> dict: """ From a list of assays, get any relevant sequencing group meta """ meta = {} for assay in assays: - if assay['meta'].get('type') not in RNA_SEQ_TYPES: + if assay.meta.get('sequencing_type') not in RNA_SEQ_TYPES: continue - if assay['meta'].get('library_type'): - meta['library_type'] = assay['meta']['library_type'] - if assay['meta'].get('reads', []) and len(assay['meta'].get('reads', [])) % 2 == 0: - meta['end_type'] = 'paired' - else: - meta['end_type'] = 'single' - if assay['meta'].get('read_length'): - meta['read_length'] = assay['meta']['read_length'] + for key in ('sequencing_facility', 'library_type', 'read_end_type', 'read_length'): + if assay.meta.get(key): + meta[key] = assay.meta[key] return meta def get_sample_type(self, row: GroupedRow) -> str: @@ -1051,6 +1089,22 @@ def get_sequencing_platform(self, row: SingleRow) -> str | None: """Get sequencing platform from row""" return self.default_sequencing_platform + def get_sequencing_facility(self, row: SingleRow) -> str | None: + """Get sequencing facility from row""" + return self.default_sequencing_facility + + def get_sequencing_library(self, row: SingleRow) -> str | None: + """Get library type from row""" + return self.default_sequencing_library + + def get_read_end_type(self, row: SingleRow) -> str | None: + """Get read end type from row""" + return self.default_read_end_type + + def get_read_length(self, row: SingleRow) -> str | None: + """Get read length from row""" + return self.default_read_length + def get_analysis_type(self, sample_id: str, row: GroupedRow) -> str: """Get analysis type from row""" return str(self.default_analysis_type) @@ -1320,6 +1374,7 @@ def parse_fastqs_structure(fastqs) -> List[List[str]]: invalid_fastq_groups = [grp for grp in fastq_groups.values() if len(grp) != 2] if invalid_fastq_groups: + # TODO: implement handling for single-ended reads raise ValueError(f'Invalid fastq group {invalid_fastq_groups}') sorted_groups = sorted( diff --git a/metamist/parser/sample_file_map_parser.py b/metamist/parser/sample_file_map_parser.py index 4e6e57f8b..5bb7dc17d 100644 --- a/metamist/parser/sample_file_map_parser.py +++ b/metamist/parser/sample_file_map_parser.py @@ -17,8 +17,8 @@ SEQ_TYPE_COL_NAME = 'type' CHECKSUM_COL_NAME = 'checksum' SEQ_FACILITY_COL_NAME = 'sequencing_facility' -LIBRARY_TYPE_COL_NAME = 'library_type' -ASSAY_END_TYPE_COL_NAME = 'end_type' +LIBRARY_TYPE_COL_NAME = 'sequencing_library' +READ_END_TYPE_COL_NAME = 'read_end_type' READ_LENGTH_COL_NAME = 'read_length' KeyMap = { @@ -35,6 +35,10 @@ SAMPLE_ID_COL_NAME: ['sample_id', 'sample', 'sample id'], READS_COL_NAME: ['filename', 'filenames', 'files', 'file'], SEQ_TYPE_COL_NAME: ['type', 'types', 'sequencing type', 'sequencing_type'], + SEQ_FACILITY_COL_NAME: ['facility', 'sequencing facility', 'sequencing_facility'], + LIBRARY_TYPE_COL_NAME: ['library', 'library_prep', 'library prep', 'library type', 'library_type', 'sequencing_library', 'sequencing library'], + READ_END_TYPE_COL_NAME: ['read_end_type', 'read end type', 'read_end_types', 'read end types', 'end type', 'end_type', 'end_types', 'end types'], + READ_LENGTH_COL_NAME: ['length', 'read length', 'read_length', 'read lengths', 'read_lengths'], CHECKSUM_COL_NAME: ['md5', 'checksum'], } @@ -48,6 +52,10 @@ - 'Filenames' - ['Type'] - 'Checksum' +- ['Sequencing Facility'] - needed for exome & rna samples +- ['Library Type'] - needed for exome & rna samples +- ['End Type'] - needed for rna samples +- ['Read Length'] - needed for rna samples e.g. Sample ID Filenames @@ -70,6 +78,13 @@ Apollo sample_id004 sample_id004.filename-R1.fastq.gz Apollo sample_id004 sample_id004.filename-R2.fastq.gz +Example with optional columns for RNA samples +e.g. + Individual ID Sample ID Filenames Type Facility Library End Type Read Length + Hera sample_id001 sample_id001_TSStrtRNA_R1.fastq.gz,sample_id001_TSStrtRNA_R2.fastq.gz totalrna VCGS TSStrtRNA paired 151 + Hestia sample_id002 sample_id002_TSStrmRNA_R1.fastq.gz,sample_id002_TSStrmRNA_R2.fastq.gz polyarna VCGS TSStrmRNA paired 151 + + This format is useful for ingesting filenames for the seqr loading pipeline """ @@ -81,17 +96,18 @@ class SampleFileMapParser(GenericMetadataParser): """Parser for SampleFileMap""" - def __init__( + def __init__( # pylint: disable=too-many-arguments self, search_locations: List[str], project: str, - default_sequencing_type='genome', default_sample_type='blood', + default_sequencing_type='genome', default_sequencing_technology='short-read', default_sequencing_platform='illumina', - default_library_type: str | None=None, - default_assay_end_type: str | None=None, - default_assay_read_length: str | None=None, + default_sequencing_facility: str = None, + default_sequencing_library: str = None, + default_read_end_type: str = None, + default_read_length: str = None, allow_extra_files_in_search_path=False, default_reference_assembly_location: str | None = None, ): @@ -105,14 +121,16 @@ def __init__( seq_type_column=SEQ_TYPE_COL_NAME, seq_facility_column=SEQ_FACILITY_COL_NAME, library_type_column=LIBRARY_TYPE_COL_NAME, - end_type_column=ASSAY_END_TYPE_COL_NAME, + read_end_type_column=READ_END_TYPE_COL_NAME, read_length_column=READ_LENGTH_COL_NAME, - default_sequencing_type=default_sequencing_type, default_sample_type=default_sample_type, + default_sequencing_type=default_sequencing_type, default_sequencing_technology=default_sequencing_technology, - default_library_type=default_library_type, - default_assay_end_type=default_assay_end_type, - default_assay_read_length=default_assay_read_length, + default_sequencing_platform=default_sequencing_platform, + default_sequencing_facility=default_sequencing_facility, + default_sequencing_library=default_sequencing_library, + default_read_end_type=default_read_end_type, + default_read_length=default_read_length, default_reference_assembly_location=default_reference_assembly_location, participant_meta_map={}, sample_meta_map={}, @@ -144,10 +162,19 @@ def get_info() -> tuple[str, str]: help='The metamist project to import manifest into', ) @click.option('--default-sample-type', default='blood') -@click.option('--default-sequence-type', default='wgs') -@click.option('--default-sequence-technology', default='short-read') +@click.option('--default-sequencing-type', default='wgs') +@click.option('--default-sequencing-technology', default='short-read') +@click.option('--default-sequencing-facility', default=None) +@click.option('--default-sequencing-library', default=None) +@click.option('--default-read-end-type', default=None) +@click.option('--default-read-length', default=None) @click.option( - '--confirm', is_flag=True, help='Confirm with user input before updating server' + '--default-reference-assembly', + required=False, + help=( + 'CRAMs require a reference assembly to realign. ' + 'This must be provided if any of the reads are crams' + ), ) @click.option( '--search-path', @@ -155,9 +182,6 @@ def get_info() -> tuple[str, str]: required=True, help='Search path to search for files within', ) -@click.option( - '--dry-run', is_flag=True, help='Just prepare the run, without comitting it' -) @click.option( '--allow-extra-files-in-search_path', is_flag=True, @@ -165,26 +189,28 @@ def get_info() -> tuple[str, str]: 'in the search path that are not covered by the sample map.', ) @click.option( - '--default-reference-assembly', - required=False, - help=( - 'CRAMs require a reference assembly to realign. ' - 'This must be provided if any of the reads are crams' - ), + '--confirm', is_flag=True, help='Confirm with user input before updating server' +) +@click.option( + '--dry-run', is_flag=True, help='Just prepare the run, without comitting it' ) @click.argument('manifests', nargs=-1) @run_as_sync -async def main( +async def main( # pylint: disable=too-many-arguments manifests, search_path: List[str], project, default_sample_type='blood', default_sequencing_type='genome', - default_sequence_technology='short-read', + default_sequencing_technology='short-read', + default_sequencing_facility: str = None, + default_sequencing_library: str = None, + default_read_end_type: str = None, + default_read_length: str = None, default_reference_assembly: str = None, + allow_extra_files_in_search_path=False, confirm=False, dry_run=False, - allow_extra_files_in_search_path=False, ): """Run script from CLI arguments""" if not manifests: @@ -198,10 +224,14 @@ async def main( project=project, default_sample_type=default_sample_type, default_sequencing_type=default_sequencing_type, - default_sequencing_technology=default_sequence_technology, + default_sequencing_technology=default_sequencing_technology, + default_sequencing_facility=default_sequencing_facility, + default_sequencing_library=default_sequencing_library, + default_read_end_type=default_read_end_type, + default_read_length=default_read_length, + default_reference_assembly_location=default_reference_assembly, search_locations=search_path, allow_extra_files_in_search_path=allow_extra_files_in_search_path, - default_reference_assembly_location=default_reference_assembly, ) for manifest in manifests: logger.info(f'Importing {manifest}')