diff --git a/isatools/isatab/validate/rules/core.py b/isatools/isatab/validate/rules/core.py index 9b66a37e..a3ad728c 100644 --- a/isatools/isatab/validate/rules/core.py +++ b/isatools/isatab/validate/rules/core.py @@ -115,7 +115,7 @@ def __init__(self, rules_to_run: tuple = DEFAULT_INVESTIGATION_RULES): """ The ISA investigation validator class - :param investigation_df_dict: a dictionnary of DataFrames and list of dataframes representing sthe investigation + :param investigation_df_dict: a dictionary of DataFrames and lists of DataFrames representing the investigation file :param dir_context: the directory of the investigation :param configs: directory of the XML config files :param available_rules: a customizable list of all available rules for investigation objects diff --git a/isatools/isatab/validate/rules/rules_40xx.py b/isatools/isatab/validate/rules/rules_40xx.py index 36d072d3..97343f92 100644 --- a/isatools/isatab/validate/rules/rules_40xx.py +++ b/isatools/isatab/validate/rules/rules_40xx.py @@ -116,12 +116,14 @@ def check_measurement_technology_types(i_df_dict, configs): for i, assay_df in enumerate(i_df_dict['s_assays']): measurement_types = assay_df['Study Assay Measurement Type'].tolist() technology_types = assay_df['Study Assay Technology Type'].tolist() + if len(measurement_types) == len(technology_types): for x, measurement_type in enumerate(measurement_types): lowered_mt = measurement_types[x].lower() lowered_tt = technology_types[x].lower() if (lowered_mt, lowered_tt) not in configs.keys(): - spl = "Measurement {}/technology {},STUDY.{}, STUDY ASSAY.{}" + + spl = "Measurement {}/technology {}, STUDY.{}, STUDY ASSAY.{}" spl = spl.format(measurement_types[x], technology_types[x], i, x) error = ("(E) Could not load configuration for measurement type '{}' and technology type '{}' " "for STUDY.{}, STUDY ASSAY.{}'").format(measurement_types[x], technology_types[x], i, x) @@ -284,29 +286,6 @@ def pairwise(iterable): validator.add_warning(message="Missing Protocol Value", supplemental=spl, code=1007) log.warning(spl) if cfg.get_isatab_configuration(): - # proto_ref_index = [i for i in table.columns if 'protocol ref' in i.lower()] - # result = True - # for each in proto_ref_index: - # prots_found = set() - # for cell in table[each]: - # prots_found.add(cell) - # if len(prots_found) > 1: - # log.warning("(W) Multiple protocol references {} are found in {}".format(prots_found, each)) - # log.warning("(W) Only one protocol reference should be used in a Protocol REF column.") - # result = False - # if result: - # field_headers = [i for i in table.columns - # if i.lower().endswith(' name') - # or i.lower().endswith(' data file') - # or i.lower().endswith(' data matrix file')] - # protos = [i for i in table.columns if i.lower() == 'protocol ref'] - # if len(protos) > 0: - # last_proto_index = table.columns.get_loc(protos[len(protos) - 1]) - # else: - # last_proto_index = -1 - # last_mat_or_dat_index = table.columns.get_loc(field_headers[len(field_headers) - 1]) - # if last_proto_index > last_mat_or_dat_index: - # log.warning("(W) Protocol REF column without output in file '" + table.filename + "'") for left, right in pairwise(field_headers): cleft = None cright = None @@ -327,23 +306,12 @@ def pairwise(iterable): for proto_name in proto_names: proto_type = proto_map.get(proto_name) if not proto_type and proto_name: - spl = ("Could not find protocol type for protocol name '{}' in file '{}'").format( - proto_name, table.filename) + spl = ("Could not find protocol type for protocol name '{}' in file '{}'" ).format(proto_name, table.filename) validator.add_warning(message="Missing Protocol Declaration", supplemental=spl, code=1007) log.warning("(W) {}".format(spl)) else: fprotos.append(proto_type) - # proto_name = table.iloc[0][header] - # try: - # proto_type = proto_map[proto_name] - # fprotos.append(proto_type) - # except KeyError: - # spl = ("Could not find protocol type for protocol name '{}', trying to validate_rules against name " - # "only").format(proto_name) - # validator.add_warning(message="Missing Protocol declaration", supplemental=spl, code=1007) - # log.warning("(W) {}".format(spl)) - # fprotos.append(proto_name) invalid_protos = set(cprotos) - set(fprotos) if len(invalid_protos) > 0: spl = ("Protocol(s) of type {} defined in the ISA-configuration expected as a between '{}' and " @@ -351,8 +319,6 @@ def pairwise(iterable): spl = spl.format(str(list(invalid_protos)), cleft.header, cright.header, table.filename) validator.add_warning(message="Missing Protocol declaration", supplemental=spl, code=1007) log.warning("(W) {}".format(spl)) - result = False - return result def load_table_checks(df, filename): diff --git a/tests/isatab/test_isatab.py b/tests/isatab/test_isatab.py index d33141fd..7a256a44 100644 --- a/tests/isatab/test_isatab.py +++ b/tests/isatab/test_isatab.py @@ -28,6 +28,9 @@ def setUpModule(): "git clone -b tests --single-branch git@github.com:ISA-tools/ISAdatasets {0}" .format(utils.DATA_DIR)) +def replace_windows_newlines(input_string): + return input_string.replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n') + def replace_windows_newlines(input_string): return input_string.replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n') @@ -445,7 +448,7 @@ def test_isatab_dump_source_sample_char_quant(self): s.process_sequence = [sample_collection_process] s.samples.append(sample1) i.studies = [s] - actual = isatab.dumps(i) + actual = replace_windows_newlines(isatab.dumps(i)) expected = """Source Name\tMaterial Type\tCharacteristics[organism]\tTerm Source REF\tTerm Accession Number\tCharacteristics[body weight]\tUnit\tTerm Source REF\tTerm Accession Number\tProtocol REF\tParameter Value[vessel]\tTerm Source REF\tTerm Accession Number\tParameter Value[storage temperature]\tUnit\tTerm Source REF\tTerm Accession Number\tSample Name\tCharacteristics[organism part]\tTerm Source REF\tTerm Accession Number\tCharacteristics[specimen mass]\tUnit\tTerm Source REF\tTerm Accession Number source1\tspecimen\tHuman\tNCBITAXON\thttp://purl.bioontology.org/ontology/STY/T016\t72\tkilogram\tUO\thttp://purl.obolibrary.org/obo/UO_0000009\tsample collection\teppendorf tube\tOBI\tpurl.org\t-20\tdegree Celsius\tUO\thttp://purl.obolibrary.org/obo/UO_0000027\tsample1\tliver\tUBERON\thttp://purl.obolibrary.org/obo/UBERON_0002107\t450.5\tmilligram\tUO\thttp://purl.obolibrary.org/obo/UO_0000022""" self.assertIn(expected, actual) @@ -1269,7 +1272,8 @@ def test_source_protocol_ref_sample_protocol_ref_sample(self): i.studies = [s] expected = """Source Name\tProtocol REF\tSample Name\tProtocol REF\tSample Name source1\tsample collection\tsample1\taliquoting\taliquot1""" - self.assertIn(expected, isatab.dumps(i).replace('\r\r\n', '\n').replace('\r\n', '\n').replace('\r', '\n')) + self.assertIn(expected, replace_windows_newlines(isatab.dumps(i))) + def test_sample_protocol_ref_material_protocol_ref_data2(self): i = Investigation() @@ -1727,6 +1731,7 @@ def test_isatab_preprocess_issue235(self): if """Protocol REF\tData Transformation Name""" in header: self.fail('Incorrectly inserted Protocol REF before ' 'Data Transformation Name') + os.remove(tmp.name) def test_isatab_factor_value_parsing_issue270(self): with open(os.path.join(self._tab_data_dir, 'issue270', 'i_matteo.txt'), diff --git a/tests/isatab/validate/test_core.py b/tests/isatab/validate/test_core.py index 41d4d4f8..401385c0 100644 --- a/tests/isatab/validate/test_core.py +++ b/tests/isatab/validate/test_core.py @@ -19,6 +19,7 @@ def test_b_ii_s_3(self): r = validate(fp=data_file, config_dir=self.default_conf, origin="") self.assertEqual(len(r['warnings']), 2) + def test_mtbls267(self): data_path = path.join(path.dirname(path.abspath(__file__)), '..', '..', 'data', 'tab', 'MTBLS267-partial') with open(path.join(data_path, 'i_Investigation.txt'), 'r') as data_file: @@ -84,6 +85,7 @@ def is_investigation(investigation_df): r = validate(data_file, rules=rules) self.assertEqual(len(r['warnings']), 2) + rule = '12000' expected_error = { 'message': 'Unknown/System Error', diff --git a/tests/validators/test_validate_test_data.py b/tests/validators/test_validate_test_data.py index 06f8bef3..80ca2c7b 100644 --- a/tests/validators/test_validate_test_data.py +++ b/tests/validators/test_validate_test_data.py @@ -321,9 +321,6 @@ class TestIsaJsonCreateTestData(unittest.TestCase): def setUp(self): self._reporting_level = logging.ERROR - # self.v2_create_schemas_path = os.path.join( - # os.path.dirname(__file__), '../..', 'isatools', 'resources', 'schemas', - # 'isa_model_version_2_0_schemas', 'create') self.v2_create_schemas_path = pathlib.Path( pathlib.Path(__file__).parents[0], '..', '..', 'isatools', 'resources', 'schemas', 'isa_model_version_2_0_schemas', 'create') @@ -334,13 +331,11 @@ def test_validate_testdata_sampleassayplan_json(self): with open(os.path.join(self.v2_create_schemas_path, 'sample_assay_plan_schema.json')) as fp: sample_assay_plan_schema = json.load(fp) - res_path = str(pathlib.Path("file://", self.v2_create_schemas_path, - 'sample_assay_plan_schema.json')) + + res_path = pathlib.Path("file://", self.v2_create_schemas_path, + 'sample_assay_plan_schema.json').as_uri() resolver = RefResolver(res_path, sample_assay_plan_schema) - resolver = RefResolver('file://{}'.format( - os.path.join(self.v2_create_schemas_path, - 'sample_assay_plan_schema.json')), - sample_assay_plan_schema) + validator = Draft4Validator(sample_assay_plan_schema, resolver=resolver) validator.validate(json.load(test_case_fp)) @@ -368,10 +363,9 @@ def test_validate_testdata_treatment_sequence_json(self): with open(os.path.join(self.v2_create_schemas_path, 'treatment_sequence_schema.json')) as fp: treatment_sequence_schema = json.load(fp) - resolver = RefResolver('file://{}'.format( - os.path.join(self.v2_create_schemas_path, - 'treatment_sequence_schema.json')), - treatment_sequence_schema) + res_path = pathlib.Path("file://", self.v2_create_schemas_path, + 'treatment_sequence_schema.json').as_uri() + resolver = RefResolver(res_path, treatment_sequence_schema) validator = Draft4Validator(treatment_sequence_schema, resolver=resolver) validator.validate(json.load(test_case_fp))