From 9199dab65cb1d3ad1e0d4c731ba89ef6a770b637 Mon Sep 17 00:00:00 2001
From: EddieLF <34049565+EddieLF@users.noreply.github.com>
Date: Fri, 16 Feb 2024 16:11:18 +1100
Subject: [PATCH] Rna ingestion updates (#676)

* Updating parsers with library type etc

* Add new sample map column defaults, update sg meta on RNA ingest

* Update parser column default, update rna sg meta, update parsing prints

* Change reference to 'sequence' to 'assay'

* Change reference to 'library_type' to 'sequencing_library'

* Change another reference to 'library_type' to 'sequencing_library'

* Add required fields for exomes, more tidying, use tabulate

* Add tabulate to requirements

* Fix parse_manifest return for tests, update script referencing group_assays fn

* Remove required exome fields, re-add default analysis status 'completed'

* Update parser testing with RNA examples

* Remove added external_family_id from ParsedParticipant class

* Refactor parser to use SequencingDefualts class

* Collapse sequencing defaults into new class

* Update setup.py and resolve merge conflicts

* isort linting

* Fix typehint for read and ref files function return
---
 metamist/parser/generic_metadata_parser.py | 209 ++++++++++++++-------
 metamist/parser/generic_parser.py          | 191 ++++++++++++++-----
 metamist/parser/sample_file_map_parser.py  | 103 +++++++---
 scripts/parse_existing_cohort.py           |   6 +-
 scripts/parse_ont_sheet.py                 |  38 ++--
 scripts/parse_sample_file_map.py           |  18 +-
 scripts/parse_vcgs_manifest.py             |   6 +-
 setup.py                                   |   1 +
 test/test_parse_existing_cohort.py         |   4 +-
 test/test_parse_file_map.py                | 172 ++++++++++++++++-
 10 files changed, 569 insertions(+), 179 deletions(-)

diff --git a/metamist/parser/generic_metadata_parser.py b/metamist/parser/generic_metadata_parser.py
index d89104df0..3c9758770 100644
--- a/metamist/parser/generic_metadata_parser.py
+++ b/metamist/parser/generic_metadata_parser.py
@@ -4,15 +4,17 @@
 import re
 import shlex
 from functools import reduce
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 import click
 
 from metamist.parser.generic_parser import (  # noqa
+    DefaultSequencing,
     GenericParser,
     GroupedRow,
     ParsedAnalysis,
     ParsedAssay,
+    ParsedSample,
     ParsedSequencingGroup,
     SingleRow,
     run_as_sync,
@@ -92,16 +94,22 @@ def __init__(
         seq_type_column: Optional[str] = None,
         seq_technology_column: Optional[str] = None,
         seq_platform_column: Optional[str] = None,
+        seq_facility_column: Optional[str] = None,
+        seq_library_column: Optional[str] = None,
+        read_end_type_column: Optional[str] = None,
+        read_length_column: Optional[str] = None,
         gvcf_column: Optional[str] = None,
         meta_column: Optional[str] = None,
         seq_meta_column: Optional[str] = None,
         batch_number: Optional[str] = None,
         reference_assembly_location_column: Optional[str] = None,
         default_reference_assembly_location: Optional[str] = None,
-        default_sequencing_type='genome',
         default_sample_type=None,
-        default_sequencing_technology='short-read',
-        default_sequencing_platform='illumina',
+        default_sequencing=DefaultSequencing(
+            seq_type='genome', technology='short-read', platform='illumina'
+        ),
+        default_read_end_type: Optional[str] = None,
+        default_read_length: Optional[str | int] = None,
         allow_extra_files_in_search_path=False,
         **kwargs,
     ):
@@ -109,10 +117,10 @@ def __init__(
             path_prefix=None,
             search_paths=search_locations,
             project=project,
-            default_sequencing_type=default_sequencing_type,
             default_sample_type=default_sample_type,
-            default_sequencing_technology=default_sequencing_technology,
-            default_sequencing_platform=default_sequencing_platform,
+            default_sequencing=default_sequencing,
+            default_read_end_type=default_read_end_type,
+            default_read_length=default_read_length,
             **kwargs,
         )
 
@@ -130,6 +138,10 @@ def __init__(
         self.seq_type_column = seq_type_column
         self.seq_technology_column = seq_technology_column
         self.seq_platform_column = seq_platform_column
+        self.seq_facility_column = seq_facility_column
+        self.seq_library_column = seq_library_column
+        self.read_end_type_column = read_end_type_column
+        self.read_length_column = read_length_column
         self.reference_assembly_location_column = reference_assembly_location_column
         self.default_reference_assembly_location = default_reference_assembly_location
 
@@ -168,14 +180,14 @@ def get_sequencing_types(self, row: GroupedRow) -> list[str]:
         if isinstance(row, dict):
             return [self.get_sequencing_type(row)]
         return [
-            str(r.get(self.seq_type_column, self.default_sequencing_type)) for r in row
+            str(r.get(self.seq_type_column, self.default_sequencing.seq_type)) for r in row
         ]
 
     def get_sequencing_technology(self, row: SingleRow) -> str:
         """Get assay technology for single row"""
         value = (
             row.get(self.seq_technology_column, None)
-            or self.default_sequencing_technology
+            or self.default_sequencing.technology
         )
         value = value.lower()
 
@@ -187,7 +199,7 @@ def get_sequencing_technology(self, row: SingleRow) -> str:
     def get_sequencing_platform(self, row: SingleRow) -> str:
         """Get sequencing platform for single row"""
         value = (
-            row.get(self.seq_platform_column, None) or self.default_sequencing_platform
+            row.get(self.seq_platform_column, None) or self.default_sequencing.platform
         )
         value = value.lower()
 
@@ -195,7 +207,7 @@ def get_sequencing_platform(self, row: SingleRow) -> str:
 
     def get_sequencing_type(self, row: SingleRow) -> str:
         """Get assay type from row"""
-        value = row.get(self.seq_type_column, None) or self.default_sequencing_type
+        value = row.get(self.seq_type_column, None) or self.default_sequencing.seq_type
         value = value.lower()
 
         if value == 'wgs':
@@ -204,9 +216,47 @@ def get_sequencing_type(self, row: SingleRow) -> str:
             value = 'exome'
         elif 'mt' in value:
             value = 'mtseq'
+        elif 'polya' in value or 'mrna' in value:
+            value = 'polyarna'
+        elif 'total' in value:
+            value = 'totalrna'
+        elif 'single' in value:
+            value = 'singlecellrna'
 
         return str(value)
 
+    def get_sequencing_facility(self, row: SingleRow) -> str:
+        """Get sequencing facility from row"""
+        value = (
+            row.get(self.seq_facility_column, None) or self.default_sequencing.facility
+        )
+        value = str(value) if value else None
+        return value
+
+    def get_sequencing_library(self, row: SingleRow) -> str:
+        """Get sequencing library from row"""
+        value = (
+            row.get(self.seq_library_column, None) or self.default_sequencing.library
+        )
+        value = str(value) if value else None
+        return value
+
+    def get_read_end_type(self, row: SingleRow) -> str:
+        """Get read end type from row"""
+        value = (
+            row.get(self.read_end_type_column, None) or self.default_read_end_type
+        )
+        value = str(value).lower() if value else None
+        return value
+
+    def get_read_length(self, row: SingleRow) -> int:
+        """Get read length from row"""
+        value = (
+            row.get(self.read_length_column, None) or self.default_read_length
+        )
+        value = int(value) if value else None
+        return value
+
     def get_assay_id(self, row: GroupedRow) -> Optional[dict[str, str]]:
         """Get external assay ID from row. Needs to be implemented per parser.
         NOTE: To be re-thought after assay group changes are applied"""
@@ -515,6 +565,7 @@ async def get_participant_meta_from_group(self, rows: GroupedRow):
     async def get_sequencing_group_meta(
         self, sequencing_group: ParsedSequencingGroup
     ) -> dict:
+        """Get sequencing group metadata from variant (vcf) files"""
         meta: dict[str, Any] = {}
 
         if not sequencing_group.sample.external_sid:
@@ -552,6 +603,55 @@ async def get_sequencing_group_meta(
 
         return meta
 
+    async def get_read_and_ref_files_and_checksums(self, sample_id: str, rows: GroupedRow) -> (
+            Tuple[List[str], List[str], Set[str]]):
+        """Get read filenames and checksums from rows."""
+        read_filenames: List[str] = []
+        read_checksums: List[str] = []
+        reference_assemblies: set[str] = set()
+        for r in rows:
+            _rfilenames = await self.get_read_filenames(sample_id=sample_id, row=r)
+            read_filenames.extend(_rfilenames)
+            if self.checksum_column and self.checksum_column in r:
+                checksums = await self.get_checksums_from_row(sample_id, r, _rfilenames)
+                if not checksums:
+                    checksums = [None] * len(_rfilenames)
+                read_checksums.extend(checksums)
+            if self.reference_assembly_location_column:
+                ref = r.get(self.reference_assembly_location_column)
+                if ref:
+                    reference_assemblies.add(ref)
+        return read_filenames, read_checksums, reference_assemblies
+
+    async def parse_cram_assays(self, sample: ParsedSample, reference_assemblies: set[str]) -> dict[str, Any]:
+        """Parse CRAM assays"""
+        if len(reference_assemblies) > 1:
+            # sorted for consistent testing
+            str_ref_assemblies = ', '.join(sorted(reference_assemblies))
+            raise ValueError(
+                f'Multiple reference assemblies were defined for {sample.external_sid}: {str_ref_assemblies}'
+            )
+        if len(reference_assemblies) == 1:
+            ref = next(iter(reference_assemblies))
+        else:
+            ref = self.default_reference_assembly_location
+            if not ref:
+                raise ValueError(
+                    f'Reads type for {sample.external_sid!r} is CRAM, but a reference '
+                    f'is not defined, please set the default reference assembly path'
+                )
+
+        ref_fp = self.file_path(ref)
+        secondary_files = (
+            await self.create_secondary_file_objects_by_potential_pattern(
+                ref_fp, ['.fai']
+            )
+        )
+        cram_reference = await self.create_file_object(
+            ref_fp, secondary_files=secondary_files
+        )
+        return {'reference_assembly' : cram_reference}
+
     async def get_assays_from_group(
         self, sequencing_group: ParsedSequencingGroup
     ) -> list[ParsedAssay]:
@@ -566,27 +666,9 @@ async def get_assays_from_group(
 
         assays = []
 
-        read_filenames: List[str] = []
-        read_checksums: List[str] = []
-        reference_assemblies: set[str] = set()
-
-        for r in rows:
-            _rfilenames = await self.get_read_filenames(
-                sample_id=sample.external_sid, row=r
-            )
-            read_filenames.extend(_rfilenames)
-            if self.checksum_column and self.checksum_column in r:
-                checksums = await self.get_checksums_from_row(
-                    sample.external_sid, r, _rfilenames
-                )
-                if not checksums:
-                    checksums = [None] * len(_rfilenames)
-                read_checksums.extend(checksums)
-
-            if self.reference_assembly_location_column:
-                ref = r.get(self.reference_assembly_location_column)
-                if ref:
-                    reference_assemblies.add(ref)
+        read_filenames, read_checksums, reference_assemblies = await self.get_read_and_ref_files_and_checksums(
+            sample.external_sid, rows
+        )
 
         # strip in case collaborator put "file1, file2"
         full_read_filenames: List[str] = []
@@ -614,37 +696,32 @@ async def get_assays_from_group(
         # collapsed_assay_meta['reads'] = reads[reads_type]
 
         if reads_type == 'cram':
-            if len(reference_assemblies) > 1:
-                # sorted for consistent testing
-                str_ref_assemblies = ', '.join(sorted(reference_assemblies))
-                raise ValueError(
-                    f'Multiple reference assemblies were defined for {sample.external_sid}: {str_ref_assemblies}'
-                )
-            if len(reference_assemblies) == 1:
-                ref = next(iter(reference_assemblies))
-            else:
-                ref = self.default_reference_assembly_location
-
-                if not ref:
-                    raise ValueError(
-                        f'Reads type for {sample.external_sid!r} is CRAM, but a reference '
-                        f'is not defined, please set the default reference assembly path'
-                    )
-
-            ref_fp = self.file_path(ref)
-            secondary_files = (
-                await self.create_secondary_file_objects_by_potential_pattern(
-                    ref_fp, ['.fai']
-                )
+            collapsed_assay_meta.update(
+                await self.parse_cram_assays(sample, reference_assemblies)
             )
-            cram_reference = await self.create_file_object(
-                ref_fp, secondary_files=secondary_files
-            )
-            collapsed_assay_meta['reference_assembly'] = cram_reference
 
         if self.batch_number is not None:
             collapsed_assay_meta['batch'] = self.batch_number
 
+        if sequencing_group.sequencing_type in ['exome', 'polyarna', 'totalrna', 'singlecellrna']:
+            rows = sequencing_group.rows
+            # Exome / RNA should have facility and library, allow missing for exomes - for now
+            if self.get_sequencing_facility(rows[0]):
+                collapsed_assay_meta['sequencing_facility'] = self.get_sequencing_facility(rows[0])
+            if self.get_sequencing_library(rows[0]):
+                collapsed_assay_meta['sequencing_library'] = self.get_sequencing_library(rows[0])
+            # RNA requires read end type and length as well
+            if sequencing_group.sequencing_type != 'exome':
+                collapsed_assay_meta['read_end_type'] = self.get_read_end_type(rows[0])
+                collapsed_assay_meta['read_length'] = self.get_read_length(rows[0])
+                # if any of the above fields are not set for an RNA assay, raise an error
+                if not all(collapsed_assay_meta.values()):
+                    raise ValueError(
+                        f'Not all required fields were set for RNA sample {sample.external_sid}:\n'
+                        f'{collapsed_assay_meta}\n'
+                        f'Use the default value arguments if they are not present in the manifest.'
+                    )
+
         for read in reads[reads_type]:
             assays.append(
                 ParsedAssay(
@@ -653,8 +730,8 @@ async def get_assays_from_group(
                     # as we grab all reads, and then determine assay
                     # grouping from there.
                     rows=sequencing_group.rows,
-                    internal_seq_id=None,
-                    external_seq_ids={},
+                    internal_assay_id=None,
+                    external_assay_ids={},
                     # unfortunately hard to break them up by row in the current format
                     # assay_status=self.get_assay_status(rows),
                     assay_type='sequencing',
@@ -667,6 +744,8 @@ async def get_assays_from_group(
                     },
                 )
             )
+        if not sequencing_group.meta:
+            sequencing_group.meta = self.get_sequencing_group_meta_from_assays(assays)
 
         return assays
 
@@ -803,9 +882,9 @@ async def main(
     if not manifests:
         raise ValueError('Expected at least 1 manifest')
 
-    extra_seach_paths = [m for m in manifests if m.startswith('gs://')]
-    if extra_seach_paths:
-        search_path = list(set(search_path).union(set(extra_seach_paths)))
+    extra_search_paths = [m for m in manifests if m.startswith('gs://')]
+    if extra_search_paths:
+        search_path = list(set(search_path).union(set(extra_search_paths)))
 
     participant_meta_map: Dict[Any, Any] = {}
     sample_meta_map: Dict[Any, Any] = {}
@@ -839,7 +918,9 @@ async def main(
         reported_gender_column=reported_gender_column,
         karyotype_column=karyotype_column,
         default_sample_type=default_sample_type,
-        default_sequencing_type=default_assay_type,
+        default_sequencing=DefaultSequencing(
+            seq_type='genome', technology='short-read', platform='illumina'
+        ),
         search_locations=search_path,
     )
     for manifest in manifests:
diff --git a/metamist/parser/generic_parser.py b/metamist/parser/generic_parser.py
index 9bee4fe70..8dc2de5d7 100644
--- a/metamist/parser/generic_parser.py
+++ b/metamist/parser/generic_parser.py
@@ -28,6 +28,7 @@
 )
 
 from cloudpathlib import AnyPath
+from tabulate import tabulate
 
 from metamist.apis import AnalysisApi, AssayApi, ParticipantApi, SampleApi
 from metamist.graphql import gql, query_async
@@ -65,6 +66,7 @@
     + GVCF_EXTENSIONS
     + VCF_EXTENSIONS
 )
+RNA_SEQ_TYPES = ['polyarna', 'totalrna', 'singlecellrna']
 
 # construct rmatch string to capture all fastq patterns
 rmatch_str = (
@@ -259,7 +261,7 @@ def to_sm(self) -> SampleUpsert:
 
 
 class ParsedSequencingGroup:
-    """Class for holding sequence metadata grouped by type"""
+    """Class for holding sequencing group metadata"""
 
     def __init__(
         self,
@@ -268,8 +270,8 @@ def __init__(
         internal_seqgroup_id: int | None,
         external_seqgroup_id: str | None,
         sequencing_type: str,
-        sequence_technology: str,
-        sequence_platform: str | None,
+        sequencing_technology: str,
+        sequencing_platform: str | None,
         meta: dict[str, Any] | None,
     ):
         self.sample = sample
@@ -278,8 +280,8 @@ def __init__(
         self.internal_seqgroup_id = internal_seqgroup_id
         self.external_seqgroup_id = external_seqgroup_id
         self.sequencing_type = sequencing_type
-        self.sequencing_technology = sequence_technology
-        self.sequencing_platform = sequence_platform
+        self.sequencing_technology = sequencing_technology
+        self.sequencing_platform = sequencing_platform
         self.meta = meta
 
         self.assays: list[ParsedAssay] = []
@@ -292,7 +294,7 @@ def to_sm(self) -> SequencingGroupUpsert:
             technology=self.sequencing_technology,
             platform=self.sequencing_platform,
             meta=self.meta,
-            assays=[sq.to_sm() for sq in self.assays or []],
+            assays=[a.to_sm() for a in self.assays or []],
             id=self.internal_seqgroup_id,
         )
 
@@ -304,16 +306,16 @@ def __init__(
         self,
         group: ParsedSequencingGroup,
         rows: GroupedRow,
-        internal_seq_id: int | None,
-        external_seq_ids: dict[str, str],
+        internal_assay_id: int | None,
+        external_assay_ids: dict[str, str],
         assay_type: str | None,
         meta: dict[str, Any] | None,
     ):
         self.sequencing_group = group
         self.rows = rows
 
-        self.internal_id = internal_seq_id
-        self.external_ids = external_seq_ids
+        self.internal_id = internal_assay_id
+        self.external_ids = external_assay_ids
         self.assay_type = assay_type
         self.meta = meta
 
@@ -350,7 +352,7 @@ def __init__(
     def to_sm(self):
         """To SM model"""
         if not self.sequencing_group.internal_seqgroup_id:
-            raise ValueError('Sequence group ID must be filled in by now')
+            raise ValueError('Sequencing group ID must be filled in by now')
         return Analysis(
             status=AnalysisStatus(self.status),
             type=str(self.type),
@@ -360,9 +362,26 @@ def to_sm(self):
         )
 
 
+class DefaultSequencing:
+    """Groups default sequencing information"""
+    def __init__(
+        self,
+        seq_type: str = 'genome',  # seq_type because `type` is a built-in
+        technology: str = 'short-read',
+        platform: str = 'illumina',
+        facility: str = None,
+        library: str = None,
+    ):
+        self.seq_type = seq_type
+        self.technology = technology
+        self.platform = platform
+        self.facility = facility
+        self.library = library
+
+
 def chunk(iterable: Iterable[T], chunk_size=50) -> Iterator[List[T]]:
     """
-    Chunk a sequence by yielding lists of `chunk_size`
+    Chunk an iterable by yielding lists of `chunk_size`
     """
     chnk: List[T] = []
     for element in iterable:
@@ -398,23 +417,23 @@ def wrapper(*args, **kwargs):
 class GenericParser(
     CloudHelper
 ):  # pylint: disable=too-many-public-methods,too-many-arguments
-    """Parser for VCGS manifest"""
+    """Parser for ingesting rows of metadata"""
 
     def __init__(  # pylint: disable=too-many-arguments
         self,
         path_prefix: Optional[str],
         search_paths: list[str],
         project: str,
-        default_sequencing_type='genome',
-        default_sequencing_technology='short-read',
-        default_sequencing_platform: str | None = None,
-        default_sample_type=None,
-        default_analysis_type='qc',
-        default_analysis_status='completed',
-        skip_checking_gcs_objects=False,
+        default_sample_type: str = None,
+        default_sequencing: DefaultSequencing = DefaultSequencing(),
+        default_read_end_type: str = None,
+        default_read_length: str | int = None,
+        default_analysis_type: str = None,
+        default_analysis_status: str = 'completed',
         key_map: Dict[str, str] = None,
-        ignore_extra_keys=False,
         required_keys: Set[str] = None,
+        ignore_extra_keys=False,
+        skip_checking_gcs_objects=False,
         verbose=True,
     ):
         self.path_prefix = path_prefix
@@ -430,12 +449,12 @@ def __init__(  # pylint: disable=too-many-arguments
 
         self.project = project
 
-        self.default_sequencing_type: str = default_sequencing_type
-        self.default_sequencing_technology: str = default_sequencing_technology
-        self.default_sequencing_platform: Optional[str] = default_sequencing_platform
+        self.default_sequencing = default_sequencing
+        self.default_read_end_type: Optional[str] = default_read_end_type
+        self.default_read_length: Optional[str] = default_read_length
         self.default_sample_type: Optional[str] = default_sample_type
-        self.default_analysis_type: str = default_analysis_type
-        self.default_analysis_status: str = default_analysis_status
+        self.default_analysis_type: Optional[str] = default_analysis_type
+        self.default_analysis_status: Optional[str] = default_analysis_status
 
         # gs specific
         self.default_bucket = None
@@ -500,8 +519,7 @@ async def parse_manifest(  # pylint: disable=too-many-branches
     ) -> Any:
         """
         Parse manifest from iterable (file pointer / String.IO)
-
-        Returns a dict mapping external sample ID to CPG sample ID
+        Returns a summary of the parsed records.
         """
         rows = await self.file_pointer_to_rows(
             file_pointer=file_pointer, delimiter=delimiter
@@ -509,7 +527,13 @@ async def parse_manifest(  # pylint: disable=too-many-branches
         return await self.from_json(rows, confirm, dry_run)
 
     async def from_json(self, rows, confirm=False, dry_run=False):
-        """Parse passed rows"""
+        """
+        Asynchronously parse rows of data, adding chunks of participants, samples, sequencing groups, assays, and analyses.
+
+        Groups rows of participants by their IDs. For each participant, group samples by their IDs.
+        If no participants are present, groups samples by their IDs.
+        For each sample, gets its sequencing groups by their keys. For each sequencing group, groups assays and analyses.
+        """
         await self.validate_rows(rows)
 
         # one participant with no value
@@ -536,7 +560,9 @@ async def from_json(self, rows, confirm=False, dry_run=False):
 
         sequencing_groups: list[ParsedSequencingGroup] = []
         for schunk in chunk(samples):
-            seq_groups_for_chunk = await asyncio.gather(*map(self.group_assays, schunk))
+            seq_groups_for_chunk = await asyncio.gather(
+                *map(self.get_sample_sequencing_groups, schunk)
+            )
 
             for sample, seqgroups in zip(schunk, seq_groups_for_chunk):
                 sample.sequencing_groups = seqgroups
@@ -581,6 +607,7 @@ async def from_json(self, rows, confirm=False, dry_run=False):
 
         if dry_run:
             logger.info('Dry run, so returning without inserting / updating metadata')
+            self.prepare_detail(samples)
             return summary, (participants if participants else samples)
 
         if confirm:
@@ -601,7 +628,10 @@ async def from_json(self, rows, confirm=False, dry_run=False):
                 [s.to_sm() for s in samples],
             )
 
-        print(json.dumps(result, indent=2))
+        if self.verbose:
+            logger.info(json.dumps(result, indent=2))
+        else:
+            self.prepare_detail(samples)
 
     def _get_dict_reader(self, file_pointer, delimiter: str):
         """
@@ -662,6 +692,29 @@ def prepare_summary(
 
         return summary
 
+    def prepare_detail(self, samples: list[ParsedSample]):
+        """Uses tabulate to print a detailed summary of the samples being inserted / updated"""
+        sample_participants = {}
+        for sample in samples:
+            sample_participants[sample.external_sid] = sample.participant.external_pid if sample.participant else None
+        sample_sequencing_groups = {sample.external_sid: sample.sequencing_groups for sample in samples}
+
+        details = []
+        for sample, participant in sample_participants.items():
+            for sg in sample_sequencing_groups[sample]:
+                sg_details = {
+                    'Participant': participant if participant else '',
+                    'Sample': sample,
+                    'Sequencing Type': sg.sequencing_type,
+                    'Assays': sum(1 for a in sg.assays if not a.internal_id),
+                }
+                details.append(sg_details)
+
+        headers = ['Participant', 'Sample', 'Sequencing Type', 'Assays']
+        table = list(list(detail.values()) for detail in details)
+
+        print(tabulate(table, headers=headers, tablefmt='grid'))
+
     def prepare_message(
         self,
         summary,
@@ -681,32 +734,38 @@ def prepare_message(
             header = f'Processing samples: {external_sample_ids}'
 
         assays_count: dict[str, int] = defaultdict(int)
+        assays_types_count: dict[str, int] = defaultdict(int)
         sequencing_group_counts: dict[str, int] = defaultdict(int)
-        for s in assays:
-            assays_count[str(s.assay_type)] += 1
+        for a in assays:
+            assays_count[str(a.meta.get('sequencing_type'))] += 1
+            assays_types_count[str(a.assay_type)] += 1
         for sg in sequencing_groups:
             sequencing_group_counts[str(sg.sequencing_type)] += 1
 
-        str_seq_count = ', '.join(f'{k}={v}' for k, v in assays_count.items())
+        str_assay_count = ', '.join(f'{k}={v}' for k, v in assays_count.items())
+        str_assay_types_count = ', '.join(f'{k}={v}' for k, v in assays_types_count.items())
         str_seqg_count = ', '.join(
             f'{k}={v}' for k, v in sequencing_group_counts.items()
         )
 
         message = f"""\
+
+
                 {self.project}: {header}
 
-                Sequence types: {str_seq_count}
-                Sequence group types: {str_seqg_count}
+                Assays count: {str_assay_count}
+                Assays types count: {str_assay_types_count}
+                Sequencing group count: {str_seqg_count}
 
                 Adding {summary['participants']['insert']} participants
                 Adding {summary['samples']['insert']} samples
-                Adding {summary['sequencing_groups']['insert']} sequence groups
+                Adding {summary['sequencing_groups']['insert']} sequencing groups
                 Adding {summary['assays']['insert']} assays
-                Adding {summary['analyses']['insert']} analysis
+                Adding {summary['analyses']['insert']} analyses
 
                 Updating {summary['participants']['update']} participants
                 Updating {summary['samples']['update']} samples
-                Updating {summary['sequencing_groups']['update']} sequence groups
+                Updating {summary['sequencing_groups']['update']} sequencing groups
                 Updating {summary['assays']['update']} assays
                 """
         return message
@@ -905,7 +964,7 @@ async def group_samples(
     ) -> list[ParsedSample]:
         """
         From a set of rows, group (by calling self.get_sample_id)
-        and parse sample other sample values.
+        and parse samples and their values.
         """
         samples = []
         for sid, sample_rows in group_by(rows, self.get_sample_id).items():
@@ -928,7 +987,7 @@ async def get_sample_meta_from_group(self, rows: GroupedRow) -> dict:
 
     def get_sequencing_group_key(self, row: SingleRow) -> Hashable:
         """
-        Get a key to group sequencing rows by.
+        Get a key to group sequencing group rows by.
         """
         if seq_group_id := self.get_sequencing_group_id(row):
             return seq_group_id
@@ -945,10 +1004,10 @@ def get_sequencing_group_key(self, row: SingleRow) -> Hashable:
 
         return tuple(v for _, v in keys)
 
-    async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup]:
+    async def get_sample_sequencing_groups(self, sample: ParsedSample) -> list[ParsedSequencingGroup]:
         """
-        From a set of rows, group (by calling self.get_sequencing_group_key)
-        and parse sequencing group other sequencing group values.
+        From a set of samples, group (by calling self.get_sequencing_group_key)
+        and parse sequencing groups and their values.
         """
         sequencing_groups = []
         for seq_rows in group_by(sample.rows, self.get_sequencing_group_key).values():
@@ -960,8 +1019,8 @@ async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup
                 internal_seqgroup_id=None,
                 external_seqgroup_id=self.get_sequencing_group_id(seq_rows[0]),
                 sequencing_type=seq_type,
-                sequence_technology=seq_tech,
-                sequence_platform=seq_platform,
+                sequencing_technology=seq_tech,
+                sequencing_platform=seq_platform,
                 meta={},
                 sample=sample,
                 rows=seq_rows,
@@ -998,6 +1057,23 @@ async def get_assays_from_group(
         return list[ParsedAssay] (does not have to equal number of rows).
         """
 
+    def get_sequencing_group_meta_from_assays(self, assays: list[ParsedAssay]) -> dict:
+        """
+        From a list of assays, get any relevant sequencing group meta
+        """
+        meta = {}
+        for assay in assays:
+            if assay.meta.get('sequencing_type') == 'exome':
+                keys = ('sequencing_facility', 'sequencing_library')
+            elif assay.meta.get('sequencing_type') in RNA_SEQ_TYPES:
+                keys = ('sequencing_facility', 'sequencing_library', 'read_end_type', 'read_length')
+            else:
+                continue
+            for key in keys:
+                if assay.meta.get(key) is not None:
+                    meta[key] = assay.meta[key]
+        return meta
+
     def get_sample_type(self, row: GroupedRow) -> str:
         """Get sample type from row"""
         return self.default_sample_type
@@ -1014,15 +1090,31 @@ def get_sequencing_group_id(self, row: SingleRow) -> str | None:
 
     def get_sequencing_type(self, row: SingleRow) -> str:
         """Get sequence types from row"""
-        return self.default_sequencing_type
+        return self.default_sequencing.seq_type
 
     def get_sequencing_technology(self, row: SingleRow) -> str:
         """Get sequencing technology from row"""
-        return self.default_sequencing_technology
+        return self.default_sequencing.technology
 
     def get_sequencing_platform(self, row: SingleRow) -> str | None:
         """Get sequencing platform from row"""
-        return self.default_sequencing_platform
+        return self.default_sequencing.platform
+
+    def get_sequencing_facility(self, row: SingleRow) -> str | None:
+        """Get sequencing facility from row"""
+        return self.default_sequencing.facility
+
+    def get_sequencing_library(self, row: SingleRow) -> str | None:
+        """Get library type from row"""
+        return self.default_sequencing.library
+
+    def get_read_end_type(self, row: SingleRow) -> str | None:
+        """Get read end type from row"""
+        return self.default_read_end_type
+
+    def get_read_length(self, row: SingleRow) -> str | None:
+        """Get read length from row"""
+        return self.default_read_length
 
     def get_analysis_type(self, sample_id: str, row: GroupedRow) -> str:
         """Get analysis type from row"""
@@ -1293,6 +1385,7 @@ def parse_fastqs_structure(fastqs) -> List[List[str]]:
 
         invalid_fastq_groups = [grp for grp in fastq_groups.values() if len(grp) != 2]
         if invalid_fastq_groups:
+            # TODO: implement handling for single-ended reads
             raise ValueError(f'Invalid fastq group {invalid_fastq_groups}')
 
         sorted_groups = sorted(
diff --git a/metamist/parser/sample_file_map_parser.py b/metamist/parser/sample_file_map_parser.py
index 077aca912..e095d8036 100644
--- a/metamist/parser/sample_file_map_parser.py
+++ b/metamist/parser/sample_file_map_parser.py
@@ -6,13 +6,17 @@
 import click
 
 from metamist.parser.generic_metadata_parser import GenericMetadataParser, run_as_sync
-from metamist.parser.generic_parser import SingleRow
+from metamist.parser.generic_parser import DefaultSequencing, SingleRow
 
 PARTICIPANT_COL_NAME = 'individual_id'
 SAMPLE_ID_COL_NAME = 'sample_id'
 READS_COL_NAME = 'filenames'
 SEQ_TYPE_COL_NAME = 'type'
 CHECKSUM_COL_NAME = 'checksum'
+SEQ_FACILITY_COL_NAME = 'sequencing_facility'
+SEQ_LIBRARY_COL_NAME = 'sequencing_library'
+READ_END_TYPE_COL_NAME = 'read_end_type'
+READ_LENGTH_COL_NAME = 'read_length'
 
 KeyMap = {
     PARTICIPANT_COL_NAME: [
@@ -28,6 +32,10 @@
     SAMPLE_ID_COL_NAME: ['sample_id', 'sample', 'sample id'],
     READS_COL_NAME: ['filename', 'filenames', 'files', 'file'],
     SEQ_TYPE_COL_NAME: ['type', 'types', 'sequencing type', 'sequencing_type'],
+    SEQ_FACILITY_COL_NAME: ['facility', 'sequencing facility', 'sequencing_facility'],
+    SEQ_LIBRARY_COL_NAME: ['library', 'library_prep', 'library prep', 'library type', 'library_type', 'sequencing_library', 'sequencing library'],
+    READ_END_TYPE_COL_NAME: ['read_end_type', 'read end type', 'read_end_types', 'read end types', 'end type', 'end_type', 'end_types', 'end types'],
+    READ_LENGTH_COL_NAME: ['length', 'read length', 'read_length', 'read lengths', 'read_lengths'],
     CHECKSUM_COL_NAME: ['md5', 'checksum'],
 }
 
@@ -41,6 +49,10 @@
 - 'Filenames'
 - ['Type']
 - 'Checksum'
+- ['Sequencing Facility'] - needed for exome & rna samples
+- ['Library Type'] - needed for exome & rna samples
+- ['End Type'] - needed for rna samples
+- ['Read Length'] - needed for rna samples
 
 e.g.
     Sample ID       Filenames
@@ -63,6 +75,13 @@
     Apollo	        sample_id004	sample_id004.filename-R1.fastq.gz
     Apollo	        sample_id004	sample_id004.filename-R2.fastq.gz
 
+Example with optional columns for RNA samples
+e.g.
+    Individual ID	Sample ID	    Filenames	                                                            Type        Facility  Library    End Type  Read Length
+    Hera            sample_id001	sample_id001_TSStrtRNA_R1.fastq.gz,sample_id001_TSStrtRNA_R2.fastq.gz	totalrna    VCGS      TSStrtRNA  paired    151
+    Hestia          sample_id002	sample_id002_TSStrmRNA_R1.fastq.gz,sample_id002_TSStrmRNA_R2.fastq.gz	polyarna    VCGS      TSStrmRNA  paired    151
+
+
 This format is useful for ingesting filenames for the seqr loading pipeline
 """
 
@@ -78,12 +97,15 @@ def __init__(
         self,
         search_locations: List[str],
         project: str,
-        default_sequencing_type='genome',
         default_sample_type='blood',
-        default_sequencing_technology='short-read',
-        default_sequencing_platform='illumina',
+        default_sequencing=DefaultSequencing(
+            seq_type='genome', technology='short-read', platform='illumina'
+        ),
+        default_read_end_type: str = None,
+        default_read_length: str | int = None,
         allow_extra_files_in_search_path=False,
         default_reference_assembly_location: str | None = None,
+        verbose=True,
     ):
         super().__init__(
             search_locations=search_locations,
@@ -93,9 +115,14 @@ def __init__(
             reads_column=READS_COL_NAME,
             checksum_column=CHECKSUM_COL_NAME,
             seq_type_column=SEQ_TYPE_COL_NAME,
-            default_sequencing_type=default_sequencing_type,
+            seq_facility_column=SEQ_FACILITY_COL_NAME,
+            seq_library_column=SEQ_LIBRARY_COL_NAME,
+            read_end_type_column=READ_END_TYPE_COL_NAME,
+            read_length_column=READ_LENGTH_COL_NAME,
             default_sample_type=default_sample_type,
-            default_sequencing_technology=default_sequencing_technology,
+            default_sequencing=default_sequencing,
+            default_read_end_type=default_read_end_type,
+            default_read_length=default_read_length,
             default_reference_assembly_location=default_reference_assembly_location,
             participant_meta_map={},
             sample_meta_map={},
@@ -103,6 +130,7 @@ def __init__(
             qc_meta_map={},
             allow_extra_files_in_search_path=allow_extra_files_in_search_path,
             key_map=KeyMap,
+            verbose=verbose,
         )
 
     def get_sample_id(self, row: SingleRow) -> str:
@@ -127,10 +155,19 @@ def get_info() -> tuple[str, str]:
     help='The metamist project to import manifest into',
 )
 @click.option('--default-sample-type', default='blood')
-@click.option('--default-sequence-type', default='wgs')
-@click.option('--default-sequence-technology', default='short-read')
+@click.option('--default-sequencing-type', default='wgs')
+@click.option('--default-sequencing-technology', default='short-read')
+@click.option('--default-sequencing-facility', default=None)
+@click.option('--default-sequencing-library', default=None)
+@click.option('--default-read-end-type', default=None)
+@click.option('--default-read-length', default=None)
 @click.option(
-    '--confirm', is_flag=True, help='Confirm with user input before updating server'
+    '--default-reference-assembly',
+    required=False,
+    help=(
+        'CRAMs require a reference assembly to realign. '
+        'This must be provided if any of the reads are crams'
+    ),
 )
 @click.option(
     '--search-path',
@@ -139,52 +176,62 @@ def get_info() -> tuple[str, str]:
     help='Search path to search for files within',
 )
 @click.option(
-    '--dry-run', is_flag=True, help='Just prepare the run, without comitting it'
-)
-@click.option(
-    '--allow-extra-files-in-search_path',
+    '--allow-extra-files-in-search-path',
     is_flag=True,
     help='By default, this parser will fail if there are crams, bams, fastqs '
     'in the search path that are not covered by the sample map.',
 )
 @click.option(
-    '--default-reference-assembly',
-    required=False,
-    help=(
-        'CRAMs require a reference assembly to realign. '
-        'This must be provided if any of the reads are crams'
-    ),
+    '--confirm', is_flag=True, help='Confirm with user input before updating server'
 )
+@click.option(
+    '--dry-run', is_flag=True, help='Just prepare the run, without comitting it'
+)
+@click.option('--verbose', '-v', is_flag=True, help='Verbose output')
 @click.argument('manifests', nargs=-1)
 @run_as_sync
-async def main(
+async def main(  # pylint: disable=too-many-arguments
     manifests,
     search_path: List[str],
     project,
     default_sample_type='blood',
     default_sequencing_type='genome',
-    default_sequence_technology='short-read',
+    default_sequencing_technology='short-read',
+    default_sequencing_platform='illumina',
+    default_sequencing_facility: str = None,
+    default_sequencing_library: str = None,
+    default_read_end_type: str = None,
+    default_read_length: str = None,
     default_reference_assembly: str = None,
+    allow_extra_files_in_search_path=False,
     confirm=False,
     dry_run=False,
-    allow_extra_files_in_search_path=False,
+    verbose=False,
 ):
     """Run script from CLI arguments"""
     if not manifests:
         raise ValueError('Expected at least 1 manifest')
 
-    extra_seach_paths = [m for m in manifests if m.startswith('gs://')]
-    if extra_seach_paths:
-        search_path = list(set(search_path).union(set(extra_seach_paths)))
+    extra_search_paths = [m for m in manifests if m.startswith('gs://')]
+    if extra_search_paths:
+        search_path = list(set(search_path).union(set(extra_search_paths)))
 
     parser = SampleFileMapParser(
         project=project,
         default_sample_type=default_sample_type,
-        default_sequencing_type=default_sequencing_type,
-        default_sequencing_technology=default_sequence_technology,
+        default_sequencing=DefaultSequencing(
+            seq_type=default_sequencing_type,
+            technology=default_sequencing_technology,
+            platform=default_sequencing_platform,
+            facility=default_sequencing_facility,
+            library=default_sequencing_library,
+        ),
+        default_read_end_type=default_read_end_type,
+        default_read_length=default_read_length,
+        default_reference_assembly_location=default_reference_assembly,
         search_locations=search_path,
         allow_extra_files_in_search_path=allow_extra_files_in_search_path,
-        default_reference_assembly_location=default_reference_assembly,
+        verbose=verbose,
     )
     for manifest in manifests:
         logger.info(f'Importing {manifest}')
diff --git a/scripts/parse_existing_cohort.py b/scripts/parse_existing_cohort.py
index 1df35063c..e0665e1ca 100644
--- a/scripts/parse_existing_cohort.py
+++ b/scripts/parse_existing_cohort.py
@@ -49,7 +49,7 @@
     SingleRow,
     run_as_sync,
 )
-from metamist.parser.generic_parser import READS_EXTENSIONS
+from metamist.parser.generic_parser import READS_EXTENSIONS, DefaultSequencing
 
 logger = logging.getLogger(__file__)
 logger.addHandler(logging.StreamHandler())
@@ -132,7 +132,9 @@ def __init__(
             assay_meta_map=Columns.sequence_meta_map(),
             batch_number=batch_number,
             allow_extra_files_in_search_path=True,
-            default_sequencing_type=sequencing_type,
+            default_sequencing=DefaultSequencing(
+                seq_type=sequencing_type,
+            )
         )
 
     def _get_dict_reader(self, file_pointer, delimiter: str):
diff --git a/scripts/parse_ont_sheet.py b/scripts/parse_ont_sheet.py
index 61865a122..29fe0c2ba 100644
--- a/scripts/parse_ont_sheet.py
+++ b/scripts/parse_ont_sheet.py
@@ -6,7 +6,11 @@
 import click
 
 from metamist.parser.generic_metadata_parser import GenericMetadataParser, run_as_sync
-from metamist.parser.generic_parser import ParsedSample, ParsedSequencingGroup
+from metamist.parser.generic_parser import (
+    DefaultSequencing,
+    ParsedSample,
+    ParsedSequencingGroup,
+)
 
 logger = logging.getLogger(__file__)
 logger.addHandler(logging.StreamHandler())
@@ -37,10 +41,12 @@ def __init__(
         self,
         search_locations: List[str],
         project: str,
-        default_sequencing_type='genome',
-        default_sequencing_technology='long-read',
-        default_sequencing_platform='oxford-nanopore',
         default_sample_type='blood',
+        default_sequencing=DefaultSequencing(
+            seq_type='genome',
+            technology='long-read',
+            platform='oxford-nanopore',
+        ),
         allow_extra_files_in_search_path=False,
     ):
         sequence_meta_map = {
@@ -66,9 +72,7 @@ def __init__(
             sample_name_column=Columns.SAMPLE_ID,
             reads_column=Columns.PASS_FASTQ_FILENAME,
             default_sample_type=default_sample_type,
-            default_sequencing_type=default_sequencing_type,
-            default_sequencing_technology=default_sequencing_technology,
-            default_sequencing_platform=default_sequencing_platform,
+            default_sequencing=default_sequencing,
             participant_meta_map={},
             sample_meta_map={},
             assay_meta_map=sequence_meta_map,
@@ -93,8 +97,8 @@ def parse_fastqs_structure(fastqs) -> List[List[str]]:
         """
         return [fastqs]
 
-    async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup]:
-        sequencing_groups = await super().group_assays(sample)
+    async def get_sample_sequencing_groups(self, sample: ParsedSample) -> list[ParsedSequencingGroup]:
+        sequencing_groups = await super().get_sample_sequencing_groups(sample)
 
         for sequencing_group in sequencing_groups:
             failed_fastqs: list[str] = []
@@ -128,7 +132,7 @@ async def group_assays(self, sample: ParsedSample) -> list[ParsedSequencingGroup
     help='The metamist project to import manifest into',
 )
 @click.option('--default-sample-type', default='blood')
-@click.option('--default-sequence-type', default='genome')
+@click.option('--default-sequencing-type', default='genome')
 @click.option('--default-sequencing-technology', default='long-read')
 @click.option('--default-sequencing-platform', default='oxford-nanopore')
 @click.option(
@@ -167,16 +171,18 @@ async def main(
     if not manifests:
         raise ValueError('Expected at least 1 manifest')
 
-    extra_seach_paths = [m for m in manifests if m.startswith('gs://')]
-    if extra_seach_paths:
-        search_path = list(set(search_path).union(set(extra_seach_paths)))
+    extra_search_paths = [m for m in manifests if m.startswith('gs://')]
+    if extra_search_paths:
+        search_path = list(set(search_path).union(set(extra_search_paths)))
 
     parser = OntParser(
         project=project,
         default_sample_type=default_sample_type,
-        default_sequencing_type=default_sequencing_type,
-        default_sequencing_platform=default_sequencing_platform,
-        default_sequencing_technology=default_sequencing_technology,
+        default_sequencing=DefaultSequencing(
+            seq_type=default_sequencing_type,
+            technology=default_sequencing_technology,
+            platform=default_sequencing_platform,
+        ),
         search_locations=search_path,
         allow_extra_files_in_search_path=allow_extra_files_in_search_path,
     )
diff --git a/scripts/parse_sample_file_map.py b/scripts/parse_sample_file_map.py
index ccc5059a3..e4e4ca7bb 100755
--- a/scripts/parse_sample_file_map.py
+++ b/scripts/parse_sample_file_map.py
@@ -5,7 +5,7 @@
 
 import click
 
-from metamist.parser.generic_metadata_parser import run_as_sync
+from metamist.parser.generic_metadata_parser import DefaultSequencing, run_as_sync
 from metamist.parser.sample_file_map_parser import SampleFileMapParser
 
 __DOC = """
@@ -36,7 +36,7 @@
 )
 @click.option('--default-sample-type', default='blood')
 @click.option('--default-sequencing-type', default='wgs')
-@click.option('--default-sequence-technology', default='short-read')
+@click.option('--default-sequencing-technology', default='short-read')
 @click.option(
     '--confirm', is_flag=True, help='Confirm with user input before updating server'
 )
@@ -67,7 +67,7 @@ async def main(
     project,
     default_sample_type='blood',
     default_sequencing_type='wgs',
-    default_sequence_technology='short-read',
+    default_sequencing_technology='short-read',
     confirm=False,
     dry_run=False,
     allow_extra_files_in_search_path=False,
@@ -77,15 +77,17 @@ async def main(
     if not manifests:
         raise ValueError('Expected at least 1 manifest')
 
-    extra_seach_paths = [m for m in manifests if m.startswith('gs://')]
-    if extra_seach_paths:
-        search_path = list(set(search_path).union(set(extra_seach_paths)))
+    extra_search_paths = [m for m in manifests if m.startswith('gs://')]
+    if extra_search_paths:
+        search_path = list(set(search_path).union(set(extra_search_paths)))
 
     parser = SampleFileMapParser(
         project=project,
         default_sample_type=default_sample_type,
-        default_sequencing_type=default_sequencing_type,
-        default_sequencing_technology=default_sequence_technology,
+        default_sequencing=DefaultSequencing(
+            seq_type=default_sequencing_type,
+            technology=default_sequencing_technology,
+        ),
         search_locations=search_path,
         allow_extra_files_in_search_path=allow_extra_files_in_search_path,
         default_reference_assembly_location=ref,
diff --git a/scripts/parse_vcgs_manifest.py b/scripts/parse_vcgs_manifest.py
index 491640322..78c4691a6 100644
--- a/scripts/parse_vcgs_manifest.py
+++ b/scripts/parse_vcgs_manifest.py
@@ -114,14 +114,14 @@ def get_sequencing_type(self, row: SingleRow) -> SequenceType:
             types = [types]
         if len(types) <= 0:
             if (
-                self.default_sequencing_type is None
-                or self.default_sequencing_type.lower() == 'none'
+                self.default_sequencing.seq_type is None
+                or self.default_sequencing.seq_type.lower() == 'none'
             ):
                 raise ValueError(
                     f"Couldn't detect sequence type for sample {sample_id}, and "
                     'no default was available.'
                 )
-            return SequenceType(self.default_sequencing_type)
+            return SequenceType(self.default_sequencing.seq_type)
         if len(types) > 1:
             raise ValueError(
                 f'Multiple library types for same sample {sample_id}, '
diff --git a/setup.py b/setup.py
index 1c1d579c0..d49473b98 100644
--- a/setup.py
+++ b/setup.py
@@ -38,6 +38,7 @@
         # for get id-token
         'cpg-utils >= 4.9.4',
         'gql[aiohttp,requests]',
+        'tabulate >= 0.9.0'
     ],
     entry_points={
         'metamist_parser': [
diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py
index 8fb169803..a40989857 100644
--- a/test/test_parse_existing_cohort.py
+++ b/test/test_parse_existing_cohort.py
@@ -299,7 +299,7 @@ async def test_genome_sequencing_type(self):
             allow_missing_files=True,
             sequencing_type='genome',
         )
-        self.assertEqual(parser.default_sequencing_type, 'genome')
+        self.assertEqual(parser.default_sequencing.seq_type, 'genome')
 
     @run_as_sync
     async def test_exome_sequencing_type(self):
@@ -314,7 +314,7 @@ async def test_exome_sequencing_type(self):
             allow_missing_files=True,
             sequencing_type='exome',
         )
-        self.assertEqual(parser.default_sequencing_type, 'exome')
+        self.assertEqual(parser.default_sequencing.seq_type, 'exome')
 
     @run_as_sync
     @patch('metamist.parser.generic_parser.query_async')
diff --git a/test/test_parse_file_map.py b/test/test_parse_file_map.py
index 22e7b700f..ff1aa5046 100644
--- a/test/test_parse_file_map.py
+++ b/test/test_parse_file_map.py
@@ -2,7 +2,7 @@
 from test.testbase import DbIsolatedTest, run_as_sync
 from unittest.mock import patch
 
-from metamist.parser.generic_parser import ParsedParticipant
+from metamist.parser.generic_parser import DefaultSequencing, ParsedParticipant
 from metamist.parser.sample_file_map_parser import SampleFileMapParser
 
 
@@ -25,7 +25,7 @@ async def test_single_row_fastq(self, mock_graphql_query):
         parser = SampleFileMapParser(
             search_locations=[],
             project=self.project_name,
-            default_sequencing_technology='short-read',
+            default_sequencing=DefaultSequencing(),
         )
         fs = ['<sample-id>.filename-R1.fastq.gz', '<sample-id>.filename-R2.fastq.gz']
         parser.filename_map = {k: 'gs://BUCKET/FAKE/' + k for k in fs}
@@ -91,7 +91,7 @@ async def test_to_external(self, mock_graphql_query):
         parser = SampleFileMapParser(
             search_locations=[],
             project=self.project_name,
-            default_sequencing_technology='short-read',
+            default_sequencing=DefaultSequencing(),
         )
         fs = ['<sample-id>.filename-R1.fastq.gz', '<sample-id>.filename-R2.fastq.gz']
         parser.filename_map = {k: 'gs://BUCKET/FAKE/' + k for k in fs}
@@ -148,7 +148,7 @@ async def test_two_rows_with_provided_checksums(self, mock_graphql_query):
         self.maxDiff = None
 
         self.assertDictEqual({}, participants[0].samples[0].meta)
-        expected_sequence1_reads = [
+        expected_assay1_reads = [
             {
                 'location': 'gs://BUCKET/FAKE/<sample-id>.filename-R1.fastq.gz',
                 'basename': '<sample-id>.filename-R1.fastq.gz',
@@ -168,11 +168,11 @@ async def test_two_rows_with_provided_checksums(self, mock_graphql_query):
         ]
 
         self.assertListEqual(
-            expected_sequence1_reads,
+            expected_assay1_reads,
             participants[0].samples[0].sequencing_groups[0].assays[0].meta['reads'],
         )
 
-        expected_sequence2_reads = [
+        expected_assay2_reads = [
             {
                 'location': 'gs://BUCKET/FAKE/<sample-id2>.filename-R1.fastq.gz',
                 'basename': '<sample-id2>.filename-R1.fastq.gz',
@@ -191,6 +191,164 @@ async def test_two_rows_with_provided_checksums(self, mock_graphql_query):
             },
         ]
         self.assertListEqual(
-            expected_sequence2_reads,
+            expected_assay2_reads,
             participants[1].samples[0].sequencing_groups[0].assays[0].meta['reads'],
         )
+
+    @run_as_sync
+    @patch('metamist.parser.generic_parser.query_async')
+    async def test_valid_rna_rows(self, mock_graphql_query):
+        """
+        Test importing a single row of rna data
+        """
+
+        mock_graphql_query.side_effect = self.run_graphql_query_async
+
+        rows = [
+            'Sample ID\tFilenames\tType\tfacility\tlibrary\tend_type\tread_length',
+            '<sample-id>\t<sample-id>.filename-R1.fastq.gz,<sample-id>.filename-R2.fastq.gz\tpolyarna\tVCGS\tTSStrmRNA\tpaired\t151',
+            '<sample-id2>\t<sample-id2>.filename-R1.fastq.gz\ttotalrna\tVCGS\tTSStrtRNA\tpaired\t151',
+            '<sample-id2>\t<sample-id2>.filename-R2.fastq.gz\ttotalrna\tVCGS\tTSStrtRNA\tpaired\t151',
+        ]
+
+        parser = SampleFileMapParser(
+            search_locations=[],
+            # doesn't matter, we're going to mock the call anyway
+            project=self.project_name,
+        )
+        fs = [
+            '<sample-id>.filename-R1.fastq.gz',
+            '<sample-id>.filename-R2.fastq.gz',
+            '<sample-id2>.filename-R1.fastq.gz',
+            '<sample-id2>.filename-R2.fastq.gz',
+        ]
+        parser.filename_map = {k: 'gs://BUCKET/FAKE/' + k for k in fs}
+        parser.skip_checking_gcs_objects = True
+
+        file_contents = '\n'.join(rows)
+        summary, samples = await parser.parse_manifest(
+            StringIO(file_contents), delimiter='\t', dry_run=True
+        )
+
+        self.assertEqual(0, summary['participants']['insert'])
+        self.assertEqual(0, summary['participants']['update'])
+        self.assertEqual(2, summary['samples']['insert'])
+        self.assertEqual(0, summary['samples']['update'])
+        self.assertEqual(2, summary['assays']['insert'])
+        self.assertEqual(0, summary['assays']['update'])
+        self.maxDiff = None
+
+        self.assertEqual('polyarna', samples[0].sequencing_groups[0].sequencing_type)
+        expected_sg1_meta = {
+            'sequencing_facility': 'VCGS',
+            'sequencing_library': 'TSStrmRNA',
+            'read_end_type': 'paired',
+            'read_length': 151
+        }
+        self.assertDictEqual(
+            expected_sg1_meta,
+            samples[0].sequencing_groups[0].meta,
+        )
+
+        self.assertEqual('totalrna', samples[1].sequencing_groups[0].sequencing_type)
+        expected_sg2_meta = {
+            'sequencing_facility': 'VCGS',
+            'sequencing_library': 'TSStrtRNA',
+            'read_end_type': 'paired',
+            'read_length': 151
+        }
+        self.assertDictEqual(
+            expected_sg2_meta,
+            samples[1].sequencing_groups[0].meta,
+        )
+
+    @run_as_sync
+    @patch('metamist.parser.generic_parser.query_async')
+    async def test_invalid_rna_row(self, mock_graphql_query):
+        """
+        Test importing a single row of rna data
+        """
+
+        mock_graphql_query.side_effect = self.run_graphql_query_async
+
+        rows = [
+            'Sample ID\tFilenames\tType',
+            '<sample-id>\t<sample-id>.filename-R1.fastq.gz,<sample-id>.filename-R2.fastq.gz\tpolyarna'
+        ]
+
+        parser = SampleFileMapParser(
+            search_locations=[],
+            # doesn't matter, we're going to mock the call anyway
+            project=self.project_name,
+        )
+        fs = [
+            '<sample-id>.filename-R1.fastq.gz',
+            '<sample-id>.filename-R2.fastq.gz',
+        ]
+        parser.filename_map = {k: 'gs://BUCKET/FAKE/' + k for k in fs}
+        parser.skip_checking_gcs_objects = True
+
+        file_contents = '\n'.join(rows)
+        with self.assertRaises(
+                ValueError
+        ):
+            _, _ = await parser.parse_manifest(
+                StringIO(file_contents), delimiter='\t', dry_run=True
+            )
+
+    @run_as_sync
+    @patch('metamist.parser.generic_parser.query_async')
+    async def test_rna_row_with_default_field_values(self, mock_graphql_query):
+        """
+        Test importing a single row of rna data
+        """
+
+        mock_graphql_query.side_effect = self.run_graphql_query_async
+
+        rows = [
+            'Sample ID\tFilenames\tType',
+            '<sample-id>\t<sample-id>.filename-R1.fastq.gz,<sample-id>.filename-R2.fastq.gz\tpolyarna'
+        ]
+
+        parser = SampleFileMapParser(
+            search_locations=[],
+            # doesn't matter, we're going to mock the call anyway
+            project=self.project_name,
+            default_sequencing=DefaultSequencing(
+                facility='VCGS',
+                library='TSStrmRNA'
+            ),
+            default_read_end_type='paired',
+            default_read_length=151
+        )
+        fs = [
+            '<sample-id>.filename-R1.fastq.gz',
+            '<sample-id>.filename-R2.fastq.gz',
+        ]
+        parser.filename_map = {k: 'gs://BUCKET/FAKE/' + k for k in fs}
+        parser.skip_checking_gcs_objects = True
+
+        file_contents = '\n'.join(rows)
+        summary, samples = await parser.parse_manifest(
+            StringIO(file_contents), delimiter='\t', dry_run=True
+        )
+
+        self.assertEqual(0, summary['participants']['insert'])
+        self.assertEqual(0, summary['participants']['update'])
+        self.assertEqual(1, summary['samples']['insert'])
+        self.assertEqual(0, summary['samples']['update'])
+        self.assertEqual(1, summary['assays']['insert'])
+        self.assertEqual(0, summary['assays']['update'])
+        self.maxDiff = None
+
+        self.assertEqual('polyarna', samples[0].sequencing_groups[0].sequencing_type)
+        expected_sg1_meta = {
+            'sequencing_facility': 'VCGS',
+            'sequencing_library': 'TSStrmRNA',
+            'read_end_type': 'paired',
+            'read_length': 151
+        }
+        self.assertDictEqual(
+            expected_sg1_meta,
+            samples[0].sequencing_groups[0].meta,
+        )