Skip to content

Commit

Permalink
Add new sample map column defaults, update sg meta on RNA ingest
Browse files Browse the repository at this point in the history
  • Loading branch information
EddieLF committed Jan 31, 2024
1 parent c7c2280 commit 4570de2
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 3 deletions.
15 changes: 14 additions & 1 deletion metamist/parser/generic_metadata_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def __init__(
seq_platform_column: Optional[str] = None,
seq_facility_column: Optional[str] = None,
library_type_column: Optional[str] = None,
end_type_column: Optional[str] = None,
read_length_column: Optional[str] = None,
gvcf_column: Optional[str] = None,
meta_column: Optional[str] = None,
Expand Down Expand Up @@ -136,6 +137,7 @@ def __init__(
self.seq_platform_column = seq_platform_column
self.seq_facility_column = seq_facility_column
self.library_type_column = library_type_column
self.end_type_column = end_type_column
self.read_length_column = read_length_column
self.reference_assembly_location_column = reference_assembly_location_column
self.default_reference_assembly_location = default_reference_assembly_location
Expand Down Expand Up @@ -230,7 +232,12 @@ def get_library_type(self, row: SingleRow) -> str:
value = row.get(self.library_type_column, None)
return str(value)

def get_read_length(self, row: SingleRow) -> str:
def get_assay_end_type(self, row: SingleRow) -> str:
"""Get assay end type from row"""
value = row.get(self.end_type_column, None)
return str(value)

def get_assay_read_length(self, row: SingleRow) -> str:
"""Get read length from row"""
value = row.get(self.read_length_column, None)
return str(value)
Expand Down Expand Up @@ -672,6 +679,12 @@ async def get_assays_from_group(

if self.batch_number is not None:
collapsed_assay_meta['batch'] = self.batch_number

if sequencing_group.sequencing_type in ['polyarna', 'totalrna', 'singlecellrna']:
rows=sequencing_group.rows
collapsed_assay_meta['library_type'] = self.get_library_type(rows[0])
collapsed_assay_meta['end_type'] = self.get_assay_end_type(rows[0])
collapsed_assay_meta['read_length'] = self.get_assay_read_length(rows[0])

for read in reads[reads_type]:
assays.append(
Expand Down
14 changes: 12 additions & 2 deletions metamist/parser/generic_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,9 @@ def __init__( # pylint: disable=too-many-arguments
default_sequencing_type='genome',
default_sequencing_technology='short-read',
default_sequencing_platform: str | None = None,
default_library_type: str | None=None,
default_assay_end_type: str | None=None,
default_assay_read_length: str | None=None,
default_sample_type=None,
default_analysis_type='qc',
default_analysis_status='completed',
Expand All @@ -435,6 +438,9 @@ def __init__( # pylint: disable=too-many-arguments
self.default_sequencing_type: str = default_sequencing_type
self.default_sequencing_technology: str = default_sequencing_technology
self.default_sequencing_platform: Optional[str] = default_sequencing_platform
self.default_library_type: Optional[str] = default_library_type
self.default_assay_end_type: Optional[str] = default_assay_end_type
self.default_assay_read_length: Optional[str] = default_assay_read_length
self.default_sample_type: Optional[str] = default_sample_type
self.default_analysis_type: str = default_analysis_type
self.default_analysis_status: str = default_analysis_status
Expand Down Expand Up @@ -1007,13 +1013,17 @@ def get_sequencing_group_meta_from_assays(self, assays: list[ParsedAssay]) -> di
"""
meta = {}
for assay in assays:
if assay['type'] not in RNA_SEQ_TYPES:
if assay['meta'].get('type') not in RNA_SEQ_TYPES:
continue
if assay['meta'].get('library_type'):
meta['library_type'] = assay['meta']['library_type']
if assay['meta'].get('reads', []) and len(assay['meta'].get('reads', [])) % 2 == 0:
meta['end_type'] = 'paired'
else:
meta['end_type'] = 'single'

if assay['meta'].get('read_length'):
meta['read_length'] = assay['meta']['read_length']
return meta

def get_sample_type(self, row: GroupedRow) -> str:
"""Get sample type from row"""
Expand Down
8 changes: 8 additions & 0 deletions metamist/parser/sample_file_map_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
CHECKSUM_COL_NAME = 'checksum'
SEQ_FACILITY_COL_NAME = 'sequencing_facility'
LIBRARY_TYPE_COL_NAME = 'library_type'
ASSAY_END_TYPE_COL_NAME = 'end_type'
READ_LENGTH_COL_NAME = 'read_length'

KeyMap = {
Expand Down Expand Up @@ -88,6 +89,9 @@ def __init__(
default_sample_type='blood',
default_sequencing_technology='short-read',
default_sequencing_platform='illumina',
default_library_type: str | None=None,
default_assay_end_type: str | None=None,
default_assay_read_length: str | None=None,
allow_extra_files_in_search_path=False,
default_reference_assembly_location: str | None = None,
):
Expand All @@ -101,10 +105,14 @@ def __init__(
seq_type_column=SEQ_TYPE_COL_NAME,
seq_facility_column=SEQ_FACILITY_COL_NAME,
library_type_column=LIBRARY_TYPE_COL_NAME,
end_type_column=ASSAY_END_TYPE_COL_NAME,
read_length_column=READ_LENGTH_COL_NAME,
default_sequencing_type=default_sequencing_type,
default_sample_type=default_sample_type,
default_sequencing_technology=default_sequencing_technology,
default_library_type=default_library_type,
default_assay_end_type=default_assay_end_type,
default_assay_read_length=default_assay_read_length,
default_reference_assembly_location=default_reference_assembly_location,
participant_meta_map={},
sample_meta_map={},
Expand Down

0 comments on commit 4570de2

Please sign in to comment.