From dd0e6f786c6c5ae01ba851ffb247a9d89fb05382 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 07:32:19 +1000 Subject: [PATCH 01/14] Test that fails if warning is set to true and ValueError is raised --- test/test_parse_existing_cohort.py | 109 ++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 33 deletions(-) diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index 6f9b67fb9..c457c44fc 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -134,39 +134,40 @@ async def test_no_header(self): # to exclude absolute paths (as absolute paths are NOT in the file map). # I don't know what needs to change to fix this test, except maybe # that the EC parser shouldn't return absolute paths - # @run_as_sync - # @patch('metamist.parser.generic_parser.query_async') - # async def test_missing_fastqs(self, mock_graphql_query): - # """ - # Tests case where the fastq's in the storage do not match the ingested samples. - # """ - # mock_graphql_query.side_effect = self.run_graphql_query_async - # - # rows = [ - # 'HEADER', - # '""', - # 'Application\tExternal ID\tSample Concentration (ng/ul)\tVolume (uL)\tSex\tSample/Name\tReference Genome\tParticipant ID\t', - # 'App\tEXTID1234\t100\t100\tFemale\t220405_FLUIDX1234\thg38\tPID123', - # ] - # parser = ExistingCohortParser( - # include_participant_column=False, - # batch_number='M01', - # search_locations=[], - # project=self.project_name, - # ) - # - # parser.filename_map = { - # 'HG3F_2_220405_FLUIDXMISTMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq': '/path/to/HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq', - # 'HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq': '/path/to/HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq', - # } - # - # file_contents = '\n'.join(rows) - # - # with self.assertRaises(ValueError): - # await parser.parse_manifest( - # StringIO(file_contents), delimiter='\t', dry_run=True - # ) - # return + @run_as_sync + @patch('metamist.parser.generic_parser.query_async') + async def test_missing_fastqs(self, mock_graphql_query): + """ + Tests case where the fastq's in the storage do not match the ingested samples. + """ + mock_graphql_query.side_effect = self.run_graphql_query_async + + rows = [ + 'HEADER', + '""', + 'Application\tExternal ID\tSample Concentration (ng/ul)\tVolume (uL)\tSex\tSample/Name\tReference Genome\tParticipant ID\t', + 'App\tEXTID1234\t100\t100\tFemale\t220405_FLUIDX1234\thg38\tPID123', + ] + parser = ExistingCohortParser( + include_participant_column=False, + batch_number='M01', + search_locations=[], + project=self.project_name, + warning_flag=False, + ) + + parser.filename_map = { + 'HG3F_2_220405_FLUIDXMISTMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq': '/path/to/HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq', + 'HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq': '/path/to/HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq', + } + + file_contents = '\n'.join(rows) + + with self.assertRaises(ValueError): + await parser.parse_manifest( + StringIO(file_contents), delimiter='\t', dry_run=True + ) + return @run_as_sync @patch('metamist.parser.generic_parser.query_async') @@ -232,3 +233,45 @@ async def test_existing_row( self.assertEqual(0, summary['assays']['update']) return + + @run_as_sync + @patch('metamist.parser.generic_parser.query_async') + @patch( + 'metamist.parser.generic_metadata_parser.GenericMetadataParser.get_read_filenames', + return_value=[], + ) + async def test_parse_cohort_with_warning( + self, mock_graphql_query, mock_get_read_filenames + ): + """Test when warning_flag is True and records with missing fastqs, no ValueError is raised""" + + mock_graphql_query.side_effect = self.run_graphql_query_async + # mock_get_read_filenames.return_value = [] + + rows = [ + 'HEADER', + '""', + 'Application\tExternal ID\tSample Concentration (ng/ul)\tVolume (uL)\tSex\tSample/Name\tReference Genome\t', + 'App\tEXTID1234\t100\t100\tFemale\t220405_FLUIDX1234\thg38\t', + ] + + parser = ExistingCohortParser( + include_participant_column=False, + batch_number='M01', + search_locations=[], + project=self.project_name, + warning_flag=True, + ) + + file_contents = '\n'.join(rows) + + try: + await parser.parse_manifest( + StringIO(file_contents), delimiter='\t', dry_run=True + ) + except ValueError: + self.fail("ValueError was raised") + + mock_get_read_filenames.assert_called() + + return From 83f21744f12a5d2c5bda920ea6347a58a6841449 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 07:33:28 +1000 Subject: [PATCH 02/14] Added warning flag to ignore missing data when parsing --- .gitignore | 4 ++++ scripts/parse_existing_cohort.py | 13 +++++++++++-- web/package-lock.json | 4 ++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index d5fa95d5a..78ba17012 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,7 @@ web/src/__generated__ # pulumi config files Pulumi*.yaml + +venv/ +scraps.ipynb +db/mariadb-java-client-3.0.3.jar.1 \ No newline at end of file diff --git a/scripts/parse_existing_cohort.py b/scripts/parse_existing_cohort.py index 7c6e322e7..64a580caf 100644 --- a/scripts/parse_existing_cohort.py +++ b/scripts/parse_existing_cohort.py @@ -105,12 +105,15 @@ def __init__( search_locations, batch_number, include_participant_column, + warning_flag, ): if include_participant_column: participant_column = Columns.PARTICIPANT_COLUMN else: participant_column = Columns.EXTERNAL_ID + self.warning_flag = warning_flag + super().__init__( project=project, search_locations=search_locations, @@ -134,13 +137,17 @@ def _get_dict_reader(self, file_pointer, delimiter: str): return reader async def get_read_filenames( - self, sample_id: Optional[str], row: SingleRow + self, + sample_id: Optional[str], + row: SingleRow, ) -> List[str]: """ We don't have fastq urls in a manifest, so overriding this method to take urls from a bucket listing. """ + warning_flag = self.warning_flag + read_filenames = [ filename for filename, path in self.filename_map.items() @@ -148,8 +155,10 @@ async def get_read_filenames( and any(filename.endswith(ext) for ext in READS_EXTENSIONS) ] - if not read_filenames: + if not read_filenames and not warning_flag: raise ValueError(f'No read files found for {sample_id}') + else: + logger.warning(f'No read files found for {sample_id}') return read_filenames def get_assay_id(self, row: GroupedRow) -> Optional[dict[str, str]]: diff --git a/web/package-lock.json b/web/package-lock.json index 5d81f4805..3ee6c289b 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1,12 +1,12 @@ { "name": "metamist", - "version": "6.3.0", + "version": "6.5.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "metamist", - "version": "6.3.0", + "version": "6.5.0", "dependencies": { "@apollo/client": "^3.7.3", "@emotion/react": "^11.10.4", From cbd08af315bfe35b3633ac6d76381f7898fce102 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 07:45:11 +1000 Subject: [PATCH 03/14] Commented this out again for consistency with dev branch --- test/test_parse_existing_cohort.py | 68 +++++++++++++++--------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index c457c44fc..1b32b0d64 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -134,40 +134,40 @@ async def test_no_header(self): # to exclude absolute paths (as absolute paths are NOT in the file map). # I don't know what needs to change to fix this test, except maybe # that the EC parser shouldn't return absolute paths - @run_as_sync - @patch('metamist.parser.generic_parser.query_async') - async def test_missing_fastqs(self, mock_graphql_query): - """ - Tests case where the fastq's in the storage do not match the ingested samples. - """ - mock_graphql_query.side_effect = self.run_graphql_query_async - - rows = [ - 'HEADER', - '""', - 'Application\tExternal ID\tSample Concentration (ng/ul)\tVolume (uL)\tSex\tSample/Name\tReference Genome\tParticipant ID\t', - 'App\tEXTID1234\t100\t100\tFemale\t220405_FLUIDX1234\thg38\tPID123', - ] - parser = ExistingCohortParser( - include_participant_column=False, - batch_number='M01', - search_locations=[], - project=self.project_name, - warning_flag=False, - ) - - parser.filename_map = { - 'HG3F_2_220405_FLUIDXMISTMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq': '/path/to/HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq', - 'HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq': '/path/to/HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq', - } - - file_contents = '\n'.join(rows) - - with self.assertRaises(ValueError): - await parser.parse_manifest( - StringIO(file_contents), delimiter='\t', dry_run=True - ) - return + # @run_as_sync + # @patch('metamist.parser.generic_parser.query_async') + # async def test_missing_fastqs(self, mock_graphql_query): + # """ + # Tests case where the fastq's in the storage do not match the ingested samples. + # """ + # mock_graphql_query.side_effect = self.run_graphql_query_async + + # rows = [ + # 'HEADER', + # '""', + # 'Application\tExternal ID\tSample Concentration (ng/ul)\tVolume (uL)\tSex\tSample/Name\tReference Genome\tParticipant ID\t', + # 'App\tEXTID1234\t100\t100\tFemale\t220405_FLUIDX1234\thg38\tPID123', + # ] + # parser = ExistingCohortParser( + # include_participant_column=False, + # batch_number='M01', + # search_locations=[], + # project=self.project_name, + # warning_flag=False, + # ) + + # parser.filename_map = { + # 'HG3F_2_220405_FLUIDXMISTMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq': '/path/to/HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R1.fastq', + # 'HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq': '/path/to/HG3F_2_220405_FLUIDXMISMATCH1234_Homo-sapiens_AAC-TAT_R_220208_VB_BLAH_M002_R2.fastq', + # } + + # file_contents = '\n'.join(rows) + + # with self.assertRaises(ValueError): + # await parser.parse_manifest( + # StringIO(file_contents), delimiter='\t', dry_run=True + # ) + # return @run_as_sync @patch('metamist.parser.generic_parser.query_async') From 9207b6735778dbee4a1ea7d8cca03a80a4028b96 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 08:12:18 +1000 Subject: [PATCH 04/14] .gitignore updated --- .gitignore | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 78ba17012..eb675baf5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ db/postgres*.jar .vscode/ env/ +venv/ __pycache__/ *.pyc .DS_Store @@ -60,7 +61,3 @@ web/src/__generated__ # pulumi config files Pulumi*.yaml - -venv/ -scraps.ipynb -db/mariadb-java-client-3.0.3.jar.1 \ No newline at end of file From b4a86a0cf493acaf1022248576b4f2e4def5069b Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 08:44:36 +1000 Subject: [PATCH 05/14] added warning_flag to cmd flags and updated doc string --- scripts/parse_existing_cohort.py | 13 +++++++++++++ test/test_parse_existing_cohort.py | 5 ++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/scripts/parse_existing_cohort.py b/scripts/parse_existing_cohort.py index 64a580caf..a2473e428 100644 --- a/scripts/parse_existing_cohort.py +++ b/scripts/parse_existing_cohort.py @@ -30,6 +30,11 @@ Additionally, the reads-column is not provided for existing-cohort csvs. This information is derived from the fluidX id pulled from the filename. +Additional Options: +--warning-flag: +Set this flag to parse manifests with missing data and generate warnings instead of raising errors. +This allows the script to proceed even if some data is missing. + """ import csv @@ -214,6 +219,12 @@ def get_existing_external_sequence_ids(self, participant_map: dict[str, dict]): @click.option( '--include-participant-column', 'include_participant_column', is_flag=True ) +@click.option( + '--warning-flag', + 'warning_flag', + is_flag=True, + help='Set this flag to parse manifests with missing data', +) @click.argument('manifests', nargs=-1) @run_as_sync async def main( @@ -224,6 +235,7 @@ async def main( confirm=True, dry_run=False, include_participant_column=False, + warning_flag=False, ): """Run script from CLI arguments""" @@ -232,6 +244,7 @@ async def main( search_locations=search_locations, batch_number=batch_number, include_participant_column=include_participant_column, + warning_flag=warning_flag, ) for manifest_path in manifests: diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index 1b32b0d64..6edf6f331 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -45,6 +45,7 @@ async def test_single_row( batch_number='M01', search_locations=[], project=self.project_name, + warning_flag=False, ) parser.filename_map = { @@ -115,6 +116,7 @@ async def test_no_header(self): batch_number='M01', search_locations=[], project=self.project_name, + warning_flag=False, ) parser.filename_map = { @@ -215,6 +217,7 @@ async def test_existing_row( batch_number='M01', search_locations=[], project=self.project_name, + warning_flag=False, ) parser.filename_map = { @@ -270,7 +273,7 @@ async def test_parse_cohort_with_warning( StringIO(file_contents), delimiter='\t', dry_run=True ) except ValueError: - self.fail("ValueError was raised") + self.fail('ValueError was raised') mock_get_read_filenames.assert_called() From 89c005850064dfaf929714e4c6d3707f47f59426 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 08:52:34 +1000 Subject: [PATCH 06/14] fixed linting issue --- scripts/parse_existing_cohort.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/parse_existing_cohort.py b/scripts/parse_existing_cohort.py index a2473e428..46b97f637 100644 --- a/scripts/parse_existing_cohort.py +++ b/scripts/parse_existing_cohort.py @@ -162,8 +162,9 @@ async def get_read_filenames( if not read_filenames and not warning_flag: raise ValueError(f'No read files found for {sample_id}') - else: - logger.warning(f'No read files found for {sample_id}') + + logger.warning(f'No read files found for {sample_id}') + return read_filenames def get_assay_id(self, row: GroupedRow) -> Optional[dict[str, str]]: From ebb53793ab6eaba31557b65e656a8942c5b8d925 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 09:42:35 +1000 Subject: [PATCH 07/14] removed a commented out bit of code --- test/test_parse_existing_cohort.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index 6edf6f331..1633ef888 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -249,7 +249,6 @@ async def test_parse_cohort_with_warning( """Test when warning_flag is True and records with missing fastqs, no ValueError is raised""" mock_graphql_query.side_effect = self.run_graphql_query_async - # mock_get_read_filenames.return_value = [] rows = [ 'HEADER', From 01c61392febd32d536eec88d49293656c92d526c Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 13:52:29 +1000 Subject: [PATCH 08/14] fixed testing error and renamed warning_flag to allow_missing_files to be more obvious --- scripts/parse_existing_cohort.py | 23 +++++----- test/test_parse_existing_cohort.py | 67 ++++++++++++++---------------- 2 files changed, 43 insertions(+), 47 deletions(-) diff --git a/scripts/parse_existing_cohort.py b/scripts/parse_existing_cohort.py index 46b97f637..4f96570d6 100644 --- a/scripts/parse_existing_cohort.py +++ b/scripts/parse_existing_cohort.py @@ -31,7 +31,7 @@ This information is derived from the fluidX id pulled from the filename. Additional Options: ---warning-flag: +--allow-missing-files: Set this flag to parse manifests with missing data and generate warnings instead of raising errors. This allows the script to proceed even if some data is missing. @@ -110,14 +110,14 @@ def __init__( search_locations, batch_number, include_participant_column, - warning_flag, + allow_missing_files, ): if include_participant_column: participant_column = Columns.PARTICIPANT_COLUMN else: participant_column = Columns.EXTERNAL_ID - self.warning_flag = warning_flag + self.allow_missing_files = allow_missing_files super().__init__( project=project, @@ -151,8 +151,6 @@ async def get_read_filenames( urls from a bucket listing. """ - warning_flag = self.warning_flag - read_filenames = [ filename for filename, path in self.filename_map.items() @@ -160,10 +158,11 @@ async def get_read_filenames( and any(filename.endswith(ext) for ext in READS_EXTENSIONS) ] - if not read_filenames and not warning_flag: - raise ValueError(f'No read files found for {sample_id}') + if not read_filenames: + if not self.allow_missing_files: + raise ValueError(f'No read files found for {sample_id}') - logger.warning(f'No read files found for {sample_id}') + logger.warning(f'No read files found for {sample_id}') return read_filenames @@ -221,8 +220,8 @@ def get_existing_external_sequence_ids(self, participant_map: dict[str, dict]): '--include-participant-column', 'include_participant_column', is_flag=True ) @click.option( - '--warning-flag', - 'warning_flag', + '--allow-missing-files', + 'allow_missing_files', is_flag=True, help='Set this flag to parse manifests with missing data', ) @@ -236,7 +235,7 @@ async def main( confirm=True, dry_run=False, include_participant_column=False, - warning_flag=False, + allow_missing_files=False, ): """Run script from CLI arguments""" @@ -245,7 +244,7 @@ async def main( search_locations=search_locations, batch_number=batch_number, include_participant_column=include_participant_column, - warning_flag=warning_flag, + allow_missing_files=allow_missing_files, ) for manifest_path in manifests: diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index 1633ef888..67839bc5e 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -1,13 +1,12 @@ from datetime import datetime from io import StringIO +from test.testbase import DbIsolatedTest, run_as_sync from unittest.mock import patch -from test.testbase import run_as_sync, DbIsolatedTest - from db.python.layers import ParticipantLayer -from scripts.parse_existing_cohort import ExistingCohortParser -from models.models import ParticipantUpsertInternal, SampleUpsertInternal from metamist.parser.generic_parser import ParsedParticipant +from models.models import ParticipantUpsertInternal, SampleUpsertInternal +from scripts.parse_existing_cohort import Columns, ExistingCohortParser class TestExistingCohortParser(DbIsolatedTest): @@ -45,7 +44,7 @@ async def test_single_row( batch_number='M01', search_locations=[], project=self.project_name, - warning_flag=False, + allow_missing_files=False, ) parser.filename_map = { @@ -116,7 +115,7 @@ async def test_no_header(self): batch_number='M01', search_locations=[], project=self.project_name, - warning_flag=False, + allow_missing_files=False, ) parser.filename_map = { @@ -155,7 +154,7 @@ async def test_no_header(self): # batch_number='M01', # search_locations=[], # project=self.project_name, - # warning_flag=False, + # allow_missing_files=False, # ) # parser.filename_map = { @@ -217,7 +216,7 @@ async def test_existing_row( batch_number='M01', search_locations=[], project=self.project_name, - warning_flag=False, + allow_missing_files=False, ) parser.filename_map = { @@ -238,42 +237,40 @@ async def test_existing_row( return @run_as_sync - @patch('metamist.parser.generic_parser.query_async') - @patch( - 'metamist.parser.generic_metadata_parser.GenericMetadataParser.get_read_filenames', - return_value=[], - ) - async def test_parse_cohort_with_warning( - self, mock_graphql_query, mock_get_read_filenames - ): - """Test when warning_flag is True and records with missing fastqs, no ValueError is raised""" - - mock_graphql_query.side_effect = self.run_graphql_query_async + async def test_get_read_filenames_no_reads_fail(self): + """Test when allow_missing_files is False and records with missing fastqs, ValueError is raised""" - rows = [ - 'HEADER', - '""', - 'Application\tExternal ID\tSample Concentration (ng/ul)\tVolume (uL)\tSex\tSample/Name\tReference Genome\t', - 'App\tEXTID1234\t100\t100\tFemale\t220405_FLUIDX1234\thg38\t', - ] + single_row = {Columns.MANIFEST_FLUID_X: ''} parser = ExistingCohortParser( include_participant_column=False, batch_number='M01', search_locations=[], project=self.project_name, - warning_flag=True, + allow_missing_files=False, ) + parser.filename_map = {} - file_contents = '\n'.join(rows) + with self.assertRaises(ValueError): + # this will raise a ValueError because the allow_missing_files=False, + # and there are no matching reads in the filename map + await parser.get_read_filenames(sample_id='', row=single_row) - try: - await parser.parse_manifest( - StringIO(file_contents), delimiter='\t', dry_run=True - ) - except ValueError: - self.fail('ValueError was raised') + @run_as_sync + async def test_get_read_filenames_no_reads_pass(self): + """Test when allow_missing_files is True and records with missing fastqs, no ValueError is raised""" - mock_get_read_filenames.assert_called() + single_row = {Columns.MANIFEST_FLUID_X: ''} - return + parser = ExistingCohortParser( + include_participant_column=False, + batch_number='M01', + search_locations=[], + project=self.project_name, + allow_missing_files=True, + ) + parser.filename_map = {} + + read_filenames = await parser.get_read_filenames(sample_id='', row=single_row) + + self.assertEqual(len(read_filenames), 0) From 778585d6a21a2b92af16742ea9a9d3bd574e9b2d Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 15:19:41 +1000 Subject: [PATCH 09/14] added check for warning log in test_get_read_filenames_no_reads_pass as well as length of read_filenames. Also changed removed /venv in .gitignore file - probably best for a separate PR --- .gitignore | 1 - test/test_parse_existing_cohort.py | 10 +++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index eb675baf5..d5fa95d5a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ db/postgres*.jar .vscode/ env/ -venv/ __pycache__/ *.pyc .DS_Store diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index 67839bc5e..c3617a87b 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -2,6 +2,8 @@ from io import StringIO from test.testbase import DbIsolatedTest, run_as_sync from unittest.mock import patch +import sys +import logging from db.python.layers import ParticipantLayer from metamist.parser.generic_parser import ParsedParticipant @@ -271,6 +273,12 @@ async def test_get_read_filenames_no_reads_pass(self): ) parser.filename_map = {} - read_filenames = await parser.get_read_filenames(sample_id='', row=single_row) + with self.assertLogs(level='INFO') as cm: + read_filenames = await parser.get_read_filenames( + sample_id='', row=single_row + ) + + self.assertEqual(len(cm.output), 1) + self.assertIn('No read files found for ', cm.output[0]) self.assertEqual(len(read_filenames), 0) From d85ba2dd15d12943ca33bfada7adb685079b806c Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 15:21:30 +1000 Subject: [PATCH 10/14] removed unused imports --- test/test_parse_existing_cohort.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index c3617a87b..6521cb17d 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -2,8 +2,6 @@ from io import StringIO from test.testbase import DbIsolatedTest, run_as_sync from unittest.mock import patch -import sys -import logging from db.python.layers import ParticipantLayer from metamist.parser.generic_parser import ParsedParticipant From 44779bca049a7badf93381e1acc42b589b8a5dc6 Mon Sep 17 00:00:00 2001 From: michael-harper <109899932+michael-harper@users.noreply.github.com> Date: Mon, 20 Nov 2023 15:23:33 +1000 Subject: [PATCH 11/14] Update scripts/parse_existing_cohort.py Co-authored-by: Vivian Bakiris <79084890+vivbak@users.noreply.github.com> --- scripts/parse_existing_cohort.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/parse_existing_cohort.py b/scripts/parse_existing_cohort.py index 4f96570d6..34d64f370 100644 --- a/scripts/parse_existing_cohort.py +++ b/scripts/parse_existing_cohort.py @@ -223,7 +223,7 @@ def get_existing_external_sequence_ids(self, participant_map: dict[str, dict]): '--allow-missing-files', 'allow_missing_files', is_flag=True, - help='Set this flag to parse manifests with missing data', + help='Set this flag to parse/ingest sequencing groups with missing reads', ) @click.argument('manifests', nargs=-1) @run_as_sync From 9f0909cd109f894282035ee29ad9d769faa2ee06 Mon Sep 17 00:00:00 2001 From: michael-harper <109899932+michael-harper@users.noreply.github.com> Date: Mon, 20 Nov 2023 15:24:00 +1000 Subject: [PATCH 12/14] Update test/test_parse_existing_cohort.py Co-authored-by: Vivian Bakiris <79084890+vivbak@users.noreply.github.com> --- test/test_parse_existing_cohort.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_parse_existing_cohort.py b/test/test_parse_existing_cohort.py index 6521cb17d..d8e755bf7 100644 --- a/test/test_parse_existing_cohort.py +++ b/test/test_parse_existing_cohort.py @@ -238,7 +238,7 @@ async def test_existing_row( @run_as_sync async def test_get_read_filenames_no_reads_fail(self): - """Test when allow_missing_files is False and records with missing fastqs, ValueError is raised""" + """Test ValueError is raised when allow_missing_files is False and sequencing groups have no reads""" single_row = {Columns.MANIFEST_FLUID_X: ''} From 2aec9dca5ac970813923e1e96df192f2c925ef07 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 15:35:28 +1000 Subject: [PATCH 13/14] de-bumping metamist version --- web/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/package-lock.json b/web/package-lock.json index 3ee6c289b..0989b7ee7 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1,12 +1,12 @@ { "name": "metamist", - "version": "6.5.0", + "version": "6.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "metamist", - "version": "6.5.0", + "version": "6.3.0", "dependencies": { "@apollo/client": "^3.7.3", "@emotion/react": "^11.10.4", @@ -12140,4 +12140,4 @@ } } } -} +} \ No newline at end of file From 74b9031ec4bb074ba26ee08b627eee1914cb49a9 Mon Sep 17 00:00:00 2001 From: Michael Harper Date: Mon, 20 Nov 2023 15:50:17 +1000 Subject: [PATCH 14/14] fixed linting issue --- web/package-lock.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/package-lock.json b/web/package-lock.json index 0989b7ee7..5d81f4805 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -12140,4 +12140,4 @@ } } } -} \ No newline at end of file +}