diff --git a/isatools/isatab/validate/core.py b/isatools/isatab/validate/core.py index 2fa6a08c..da13d88b 100644 --- a/isatools/isatab/validate/core.py +++ b/isatools/isatab/validate/core.py @@ -186,21 +186,21 @@ def validate(fp: TextIO, built_rules = build_rules(rules) try: - i_df = load_investigation(fp=fp) + i_df_dict = load_investigation(fp=fp) params = { - "investigation_df": i_df, + "investigation_df_dict": i_df_dict, "dir_context": path.dirname(fp.name), "configs": config_dir, } investigation_validator = ISAInvestigationValidator(**params, **built_rules['investigation']) - for i, study_df in enumerate(i_df['studies']): + for i, study_df in enumerate(i_df_dict['studies']): study_filename = study_df.iloc[0]['Study File Name'] study_validator = ISAStudyValidator(validator=investigation_validator, study_index=i, study_filename=study_filename, study_df=study_df, **built_rules['studies']) assay_tables = list() - assay_df = study_validator.params['investigation_df']['s_assays'][i] + assay_df = study_validator.params['investigation_df_dict']['s_assays'][i] for x, assay_filename in enumerate(assay_df['Study Assay File Name'].tolist()): ISAAssayValidator(assay_tables=assay_tables, validator=study_validator, assay_index=x, assay_df=assay_df, assay_filename=assay_filename, **built_rules['assays']) diff --git a/isatools/isatab/validate/rules/core.py b/isatools/isatab/validate/rules/core.py index 40774499..d0fe08fd 100644 --- a/isatools/isatab/validate/rules/core.py +++ b/isatools/isatab/validate/rules/core.py @@ -108,14 +108,14 @@ def validate_rules(self, validator): class ISAInvestigationValidator: def __init__(self, - investigation_df: DataFrame, + investigation_df_dict: dict, dir_context: str, configs: str, available_rules: list = INVESTIGATION_RULES_MAPPING, rules_to_run: tuple = DEFAULT_INVESTIGATION_RULES): """ The ISA investigation validator class - :param investigation_df: the investigation dataframe + :param investigation_df_dict: a dictionary of DataFrames and lists of DataFrames representing the investigation file :param dir_context: the directory of the investigation :param configs: directory of the XML config files :param available_rules: a customizable list of all available rules for investigation objects @@ -124,7 +124,7 @@ def __init__(self, self.all_rules = Rules(rules_to_run=rules_to_run, available_rules=available_rules) self.has_validated = False self.params = { - 'investigation_df': investigation_df, + 'investigation_df_dict': investigation_df_dict, 'dir_context': dir_context, 'configs': configs, 'term_source_refs': None @@ -162,8 +162,8 @@ def __init__(self, self.params['study_sample_table'] = load_table(s_fp) self.params['study_sample_table'].filename = study_filename - protocol_names = self.params['investigation_df']['s_protocols'][study_index]['Study Protocol Name'].tolist() - protocol_types = self.params['investigation_df']['s_protocols'][study_index]['Study Protocol Type'].tolist() + protocol_names = self.params['investigation_df_dict']['s_protocols'][study_index]['Study Protocol Name'].tolist() + protocol_types = self.params['investigation_df_dict']['s_protocols'][study_index]['Study Protocol Type'].tolist() self.params['protocol_names_and_types'] = dict(zip(protocol_names, protocol_types)) self.params['study_group_size_in_comment'] = None diff --git a/isatools/isatab/validate/rules/defaults.py b/isatools/isatab/validate/rules/defaults.py index a655cd62..eaafb849 100644 --- a/isatools/isatab/validate/rules/defaults.py +++ b/isatools/isatab/validate/rules/defaults.py @@ -30,30 +30,30 @@ INVESTIGATION_RULES_MAPPING = [ - {'rule': check_table_files_read, 'params': ['investigation_df', 'dir_context'], 'identifier': '0006'}, + {'rule': check_table_files_read, 'params': ['investigation_df_dict', 'dir_context'], 'identifier': '0006'}, - {'rule': sample_not_declared, 'params': ['investigation_df', 'dir_context'], 'identifier': '1003'}, - {'rule': check_protocol_usage, 'params': ['investigation_df', 'dir_context'], 'identifier': '1007'}, - {'rule': check_study_factor_usage, 'params': ['investigation_df', 'dir_context'], 'identifier': '1008'}, - {'rule': check_protocol_parameter_usage, 'params': ['investigation_df', 'dir_context'], 'identifier': '1009'}, - {'rule': check_protocol_names, 'params': ['investigation_df'], 'identifier': '1010'}, - {'rule': check_protocol_parameter_names, 'params': ['investigation_df'], 'identifier': '1011'}, - {'rule': check_study_factor_names, 'params': ['investigation_df'], 'identifier': '1012'}, + {'rule': sample_not_declared, 'params': ['investigation_df_dict', 'dir_context'], 'identifier': '1003'}, + {'rule': check_protocol_usage, 'params': ['investigation_df_dict', 'dir_context'], 'identifier': '1007'}, + {'rule': check_study_factor_usage, 'params': ['investigation_df_dict', 'dir_context'], 'identifier': '1008'}, + {'rule': check_protocol_parameter_usage, 'params': ['investigation_df_dict', 'dir_context'], 'identifier': '1009'}, + {'rule': check_protocol_names, 'params': ['investigation_df_dict'], 'identifier': '1010'}, + {'rule': check_protocol_parameter_names, 'params': ['investigation_df_dict'], 'identifier': '1011'}, + {'rule': check_study_factor_names, 'params': ['investigation_df_dict'], 'identifier': '1012'}, - {'rule': check_date_formats, 'params': ['investigation_df'], 'identifier': '3001'}, - {'rule': check_dois, 'params': ['investigation_df'], 'identifier': '3002'}, - {'rule': check_pubmed_ids_format, 'params': ['investigation_df'], 'identifier': '3003'}, - {'rule': check_ontology_sources, 'params': ['investigation_df'], 'identifier': '3008'}, + {'rule': check_date_formats, 'params': ['investigation_df_dict'], 'identifier': '3001'}, + {'rule': check_dois, 'params': ['investigation_df_dict'], 'identifier': '3002'}, + {'rule': check_pubmed_ids_format, 'params': ['investigation_df_dict'], 'identifier': '3003'}, + {'rule': check_ontology_sources, 'params': ['investigation_df_dict'], 'identifier': '3008'}, {'rule': load_config, 'params': ['configs'], 'identifier': '4001'}, - {'rule': check_measurement_technology_types, 'params': ['investigation_df', 'configs'], 'identifier': '4002'}, - {'rule': check_investigation_against_config, 'params': ['investigation_df', 'configs'], 'identifier': '4003'}, + {'rule': check_measurement_technology_types, 'params': ['investigation_df_dict', 'configs'], 'identifier': '4002'}, + {'rule': check_investigation_against_config, 'params': ['investigation_df_dict', 'configs'], 'identifier': '4003'}, # copies - {'rule': check_table_files_read, 'params': ['investigation_df', 'dir_context'], 'identifier': '0008'}, - {'rule': check_protocol_usage, 'params': ['investigation_df', 'dir_context'], 'identifier': '1019'}, - {'rule': check_protocol_parameter_usage, 'params': ['investigation_df', 'dir_context'], 'identifier': '1020'}, - {'rule': check_study_factor_usage, 'params': ['investigation_df', 'dir_context'], 'identifier': '1021'}, + {'rule': check_table_files_read, 'params': ['investigation_df_dict', 'dir_context'], 'identifier': '0008'}, + {'rule': check_protocol_usage, 'params': ['investigation_df_dict', 'dir_context'], 'identifier': '1019'}, + {'rule': check_protocol_parameter_usage, 'params': ['investigation_df_dict', 'dir_context'], 'identifier': '1020'}, + {'rule': check_study_factor_usage, 'params': ['investigation_df_dict', 'dir_context'], 'identifier': '1021'}, ] STUDY_RULES_MAPPING = [ diff --git a/isatools/isatab/validate/rules/rules_00xx.py b/isatools/isatab/validate/rules/rules_00xx.py index 248d1447..1a52aa56 100644 --- a/isatools/isatab/validate/rules/rules_00xx.py +++ b/isatools/isatab/validate/rules/rules_00xx.py @@ -5,14 +5,14 @@ from isatools.isatab.defaults import log -def check_table_files_read(i_df, dir_context): +def check_table_files_read(i_df_dict, dir_context): """Used for rules 0006 and 0008 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :param dir_context: Path to where the investigation file is found :return: None """ - for i, study_df in enumerate(i_df['studies']): + for i, study_df in enumerate(i_df_dict['studies']): study_filename = study_df.iloc[0]['Study File Name'] if study_filename != '': try: @@ -22,7 +22,7 @@ def check_table_files_read(i_df, dir_context): spl = "Study File {} does not appear to exist".format(study_filename) validator.add_error(message="Missing study tab file(s)", supplemental=spl, code=6) log.error("(E) Study File {} does not appear to exist".format(study_filename)) - for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()): + for j, assay_filename in enumerate(i_df_dict['s_assays'][i]['Study Assay File Name'].tolist()): if assay_filename != '': try: with utf8_text_file_open(path.join(dir_context, assay_filename)): diff --git a/isatools/isatab/validate/rules/rules_10xx.py b/isatools/isatab/validate/rules/rules_10xx.py index 190cd273..6bfc1b0c 100644 --- a/isatools/isatab/validate/rules/rules_10xx.py +++ b/isatools/isatab/validate/rules/rules_10xx.py @@ -9,14 +9,14 @@ from isatools.isatab.utils import cell_has_value -def check_samples_not_declared_in_study_used_in_assay(i_df, dir_context): +def check_samples_not_declared_in_study_used_in_assay(i_df_dict, dir_context): """Checks if samples found in assay tables are found in the study-sample table - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :param dir_context: Path to where the investigation file is found :return: None """ - for i, study_df in enumerate(i_df['studies']): + for i, study_df in enumerate(i_df_dict['studies']): study_filename = study_df.iloc[0]['Study File Name'] if study_filename != '': try: @@ -25,7 +25,7 @@ def check_samples_not_declared_in_study_used_in_assay(i_df, dir_context): study_samples = set(study_df['Sample Name']) except FileNotFoundError: pass - for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()): + for j, assay_filename in enumerate(i_df_dict['s_assays'][i]['Study Assay File Name'].tolist()): if assay_filename != '': try: with utf8_text_file_open(path.join(dir_context, assay_filename)) as a_fp: @@ -40,15 +40,15 @@ def check_samples_not_declared_in_study_used_in_assay(i_df, dir_context): pass -def check_study_factor_usage(i_df, dir_context): +def check_study_factor_usage(i_df_dict, dir_context): """Used for rules 1008 and 1021 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :param dir_context: Path to where the investigation file is found :return: None """ - for i, study_df in enumerate(i_df['studies']): - study_factors_declared = set(i_df['s_factors'][i]['Study Factor Name'].tolist()) + for i, study_df in enumerate(i_df_dict['studies']): + study_factors_declared = set(i_df_dict['s_factors'][i]['Study Factor Name'].tolist()) study_filename = study_df.iloc[0]['Study File Name'] error_spl = "Some factors used in an study file {} are not declared in the investigation file: {}" error_msg = "Some factors are not declared in the investigation" @@ -66,7 +66,7 @@ def check_study_factor_usage(i_df, dir_context): validator.add_error(message=error_msg, supplemental=spl, code=1008) except FileNotFoundError: pass - for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()): + for j, assay_filename in enumerate(i_df_dict['s_assays'][i]['Study Assay File Name'].tolist()): if assay_filename != '': try: study_factors_used = set() @@ -92,7 +92,7 @@ def check_study_factor_usage(i_df, dir_context): study_factors_used = study_factors_used.union(set(fv)) except FileNotFoundError: pass - for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()): + for j, assay_filename in enumerate(i_df_dict['s_assays'][i]['Study Assay File Name'].tolist()): if assay_filename != '': try: with utf8_text_file_open(path.join(dir_context, assay_filename)) as a_fp: @@ -109,15 +109,15 @@ def check_study_factor_usage(i_df, dir_context): .format(list(study_factors_declared - study_factors_used))) -def check_protocol_usage(i_df, dir_context): +def check_protocol_usage(i_df_dict, dir_context): """Used for rules 1007 and 1019 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :param dir_context: Path to where the investigation file is found :return: None """ - for i, study_df in enumerate(i_df['studies']): - protocols_declared = set(i_df['s_protocols'][i]['Study Protocol Name'].tolist()) + for i, study_df in enumerate(i_df_dict['studies']): + protocols_declared = set(i_df_dict['s_protocols'][i]['Study Protocol Name'].tolist()) protocols_declared.add('') study_filename = study_df.iloc[0]['Study File Name'] if study_filename != '': @@ -136,7 +136,7 @@ def check_protocol_usage(i_df, dir_context): log.error("(E) {}".format(spl)) except FileNotFoundError: pass - for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()): + for j, assay_filename in enumerate(i_df_dict['s_assays'][i]['Study Assay File Name'].tolist()): if assay_filename != '': try: protocol_refs_used = set() @@ -165,7 +165,7 @@ def check_protocol_usage(i_df, dir_context): except FileNotFoundError: pass for j, assay_filename in enumerate( - i_df['s_assays'][i]['Study Assay File Name'].tolist()): + i_df_dict['s_assays'][i]['Study Assay File Name'].tolist()): if assay_filename != '': try: with utf8_text_file_open(path.join(dir_context, assay_filename)) as a_fp: @@ -183,16 +183,16 @@ def check_protocol_usage(i_df, dir_context): log.warning(warning) -def check_protocol_parameter_usage(i_df, dir_context): +def check_protocol_parameter_usage(i_df_dict, dir_context): """Used for rules 1009 and 1020 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :param dir_context: Path to where the investigation file is found :return: None """ - for i, study_df in enumerate(i_df['studies']): + for i, study_df in enumerate(i_df_dict['studies']): protocol_parameters_declared = set() - protocol_parameters_per_protocol = set(i_df['s_protocols'][i]['Study Protocol Parameters Name'].tolist()) + protocol_parameters_per_protocol = set(i_df_dict['s_protocols'][i]['Study Protocol Parameters Name'].tolist()) for protocol_parameters in protocol_parameters_per_protocol: parameters_list = protocol_parameters.split(';') protocol_parameters_declared = protocol_parameters_declared.union(set(parameters_list)) @@ -216,7 +216,7 @@ def check_protocol_parameter_usage(i_df, dir_context): log.error(error) except FileNotFoundError: pass - for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()): + for j, assay_filename in enumerate(i_df_dict['s_assays'][i]['Study Assay File Name'].tolist()): if assay_filename != '': try: protocol_parameters_used = set() @@ -246,7 +246,7 @@ def check_protocol_parameter_usage(i_df, dir_context): protocol_parameters_used = protocol_parameters_used.union(set(pv)) except FileNotFoundError: pass - for j, assay_filename in enumerate(i_df['s_assays'][i]['Study Assay File Name'].tolist()): + for j, assay_filename in enumerate(i_df_dict['s_assays'][i]['Study Assay File Name'].tolist()): if assay_filename != '': try: with utf8_text_file_open(path.join(dir_context, assay_filename)) as a_fp: @@ -263,13 +263,13 @@ def check_protocol_parameter_usage(i_df, dir_context): log.warning(warning) -def check_protocol_names(i_df): +def check_protocol_names(i_df_dict): """Used for rule 1010 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :return: None """ - for study_protocols_df in i_df['s_protocols']: + for study_protocols_df in i_df_dict['s_protocols']: for i, protocol_name in enumerate(study_protocols_df['Study Protocol Name'].tolist()): # DataFrames labels empty cells as 'Unnamed: n' if protocol_name == '' or 'Unnamed: ' in protocol_name: @@ -279,13 +279,13 @@ def check_protocol_names(i_df): log.warning(warning) -def check_protocol_parameter_names(i_df): +def check_protocol_parameter_names(i_df_dict): """Used for rule 1011 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :return: None """ - for study_protocols_df in i_df['s_protocols']: + for study_protocols_df in i_df_dict['s_protocols']: for i, protocol_parameters_names in enumerate(study_protocols_df['Study Protocol Parameters Name'].tolist()): # There's an empty cell if no protocols if len(protocol_parameters_names.split(sep=';')) > 1: @@ -298,13 +298,13 @@ def check_protocol_parameter_names(i_df): log.warning(warning) -def check_study_factor_names(i_df): +def check_study_factor_names(i_df_dict): """Used for rule 1012 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :return: None """ - for study_factors_df in i_df['s_factors']: + for study_factors_df in i_df_dict['s_factors']: for i, factor_name in enumerate(study_factors_df['Study Factor Name'].tolist()): # DataFrames labels empty cells as 'Unnamed: n' if factor_name == '' or 'Unnamed: ' in factor_name: diff --git a/isatools/isatab/validate/rules/rules_30xx.py b/isatools/isatab/validate/rules/rules_30xx.py index 22e2e74a..1716b4df 100644 --- a/isatools/isatab/validate/rules/rules_30xx.py +++ b/isatools/isatab/validate/rules/rules_30xx.py @@ -1,33 +1,31 @@ import iso8601 -from pandas import DataFrame - from isatools.isatab.validate.store import validator from isatools.isatab.defaults import log, _RX_DOI, _RX_PMID, _RX_PMCID from isatools.isatab.utils import cell_has_value -def check_filenames_present(i_df: DataFrame) -> None: +def check_filenames_present(i_df_dict: dict) -> None: """ Used for rule 3005 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :return: None """ - for s_pos, study_df in enumerate(i_df['studies']): + for s_pos, study_df in enumerate(i_df_dict['studies']): if study_df.iloc[0]['Study File Name'] == '': validator.add_warning(message="Missing Study File Name", supplemental="STUDY.{}".format(s_pos), code=3005) log.warning("(W) A study filename is missing for STUDY.{}".format(s_pos)) - for a_pos, filename in enumerate(i_df['s_assays'][s_pos]['Study Assay File Name'].tolist()): + for a_pos, filename in enumerate(i_df_dict['s_assays'][s_pos]['Study Assay File Name'].tolist()): if filename == '': spl = "STUDY.{}, STUDY ASSAY.{}".format(s_pos, a_pos) validator.add_warning.append(message="Missing assay file name", supplemental=spl, code=3005) log.warning("(W) An assay filename is missing for STUDY ASSAY.{}".format(a_pos)) -def check_date_formats(i_df): +def check_date_formats(i_df_dict): """ Used for rule 3001 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :return: None """ @@ -45,13 +43,13 @@ def check_iso8601_date(date_str): validator.add_warning(message="Date is not ISO8601 formatted", supplemental=spl, code=3001) log.warning("(W) Date {} does not conform to ISO8601 format".format(date_str)) - release_date_vals = i_df['investigation']['Investigation Public Release Date'].tolist() + release_date_vals = i_df_dict['investigation']['Investigation Public Release Date'].tolist() if len(release_date_vals) > 0: check_iso8601_date(release_date_vals[0]) - sub_date_values = i_df['investigation']['Investigation Submission Date'].tolist() + sub_date_values = i_df_dict['investigation']['Investigation Submission Date'].tolist() if len(sub_date_values) > 0: check_iso8601_date(sub_date_values[0]) - for i, study_df in enumerate(i_df['studies']): + for i, study_df in enumerate(i_df_dict['studies']): release_date_vals = study_df['Study Public Release Date'].tolist() if len(release_date_vals) > 0: check_iso8601_date(release_date_vals[0]) @@ -60,10 +58,10 @@ def check_iso8601_date(date_str): check_iso8601_date(sub_date_values[0]) -def check_dois(i_df): +def check_dois(i_df_dict): """ Used for rule 3002 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :return: None """ @@ -79,17 +77,17 @@ def check_doi(doi_str): validator.add_warning(message="DOI is not valid format", supplemental=spl, code=3002) log.warning("(W) DOI {} does not conform to DOI format".format(doi_str)) - for doi in i_df['i_publications']['Investigation Publication DOI'].tolist(): + for doi in i_df_dict['i_publications']['Investigation Publication DOI'].tolist(): check_doi(doi) - for i, study_df in enumerate(i_df['s_publications']): + for i, study_df in enumerate(i_df_dict['s_publications']): for doi in study_df['Study Publication DOI'].tolist(): check_doi(doi) -def check_pubmed_ids_format(i_df): +def check_pubmed_ids_format(i_df_dict): """ Used for rule 3003 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :return: None """ @@ -105,21 +103,21 @@ def check_pubmed_id(pubmed_id_str): validator.add_warning(message="PubMed ID is not valid format", supplemental=spl, code=3003) log.warning("(W) PubMed ID {} is not valid format".format(pubmed_id_str)) - for doi in i_df['i_publications']['Investigation PubMed ID'].tolist(): + for doi in i_df_dict['i_publications']['Investigation PubMed ID'].tolist(): check_pubmed_id(str(doi)) - for study_pubs_df in i_df['s_publications']: + for study_pubs_df in i_df_dict['s_publications']: for doi in study_pubs_df['Study PubMed ID'].tolist(): check_pubmed_id(str(doi)) -def check_ontology_sources(i_df): +def check_ontology_sources(i_df_dict): """ Used for rule 3008 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :return: None """ term_source_refs = [] - for i, ontology_source_name in enumerate(i_df['ontology_sources']['Term Source Name'].tolist()): + for i, ontology_source_name in enumerate(i_df_dict['ontology_sources']['Term Source Name'].tolist()): if ontology_source_name == '' or 'Unnamed: ' in ontology_source_name: spl = "pos={}".format(i) warn = "(W) An Ontology Source Reference at position {} is missing Term Source Name, so can't be referenced" diff --git a/isatools/isatab/validate/rules/rules_40xx.py b/isatools/isatab/validate/rules/rules_40xx.py index 7f39c0df..84b87ace 100644 --- a/isatools/isatab/validate/rules/rules_40xx.py +++ b/isatools/isatab/validate/rules/rules_40xx.py @@ -13,10 +13,10 @@ ) -def check_investigation_against_config(i_df, configs): +def check_investigation_against_config(i_df_dict, configs): """Checks investigation file against the loaded configurations - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :param configs: A dictionary of ISA Configuration objects :return: None """ @@ -52,18 +52,18 @@ def check_section_against_required_fields_one_value(section, required, i=0): config_fields = configs[('[investigation]', '')].get_isatab_configuration()[0].get_field() required_fields = [i.header for i in config_fields if i.is_required] - check_section_against_required_fields_one_value(i_df['investigation'], required_fields) - check_section_against_required_fields_one_value(i_df['i_publications'], required_fields) - check_section_against_required_fields_one_value(i_df['i_contacts'], required_fields) + check_section_against_required_fields_one_value(i_df_dict['investigation'], required_fields) + check_section_against_required_fields_one_value(i_df_dict['i_publications'], required_fields) + check_section_against_required_fields_one_value(i_df_dict['i_contacts'], required_fields) - for x, study_df in enumerate(i_df['studies']): - check_section_against_required_fields_one_value(i_df['studies'][x], required_fields, x) - check_section_against_required_fields_one_value(i_df['s_design_descriptors'][x], required_fields, x) - check_section_against_required_fields_one_value(i_df['s_publications'][x], required_fields, x) - check_section_against_required_fields_one_value(i_df['s_factors'][x], required_fields, x) - check_section_against_required_fields_one_value(i_df['s_assays'][x], required_fields, x) - check_section_against_required_fields_one_value(i_df['s_protocols'][x], required_fields, x) - check_section_against_required_fields_one_value(i_df['s_contacts'][x], required_fields, x) + for x, study_df in enumerate(i_df_dict['studies']): + check_section_against_required_fields_one_value(i_df_dict['studies'][x], required_fields, x) + check_section_against_required_fields_one_value(i_df_dict['s_design_descriptors'][x], required_fields, x) + check_section_against_required_fields_one_value(i_df_dict['s_publications'][x], required_fields, x) + check_section_against_required_fields_one_value(i_df_dict['s_factors'][x], required_fields, x) + check_section_against_required_fields_one_value(i_df_dict['s_assays'][x], required_fields, x) + check_section_against_required_fields_one_value(i_df_dict['s_protocols'][x], required_fields, x) + check_section_against_required_fields_one_value(i_df_dict['s_contacts'][x], required_fields, x) def load_config(config_dir): @@ -92,16 +92,16 @@ def load_config(config_dir): return configs -def check_measurement_technology_types(i_df, configs): +def check_measurement_technology_types(i_df_dict, configs): """Rule 4002 - :param i_df: An investigation DataFrame + :param i_df_dict: A dictionary of DataFrames and lists of DataFrames representing the investigation file :param configs: A dictionary of ISA Configuration objects :return: None """ - for i, assay_df in enumerate(i_df['s_assays']): - measurement_types = assay_df['Study Assay Measurement Type'].tolist() - technology_types = assay_df['Study Assay Technology Type'].tolist() + for i, study_assays_df in enumerate(i_df_dict['s_assays']): + measurement_types = study_assays_df['Study Assay Measurement Type'].tolist() + technology_types = study_assays_df['Study Assay Technology Type'].tolist() if len(measurement_types) == len(technology_types): for x, measurement_type in enumerate(measurement_types): lowered_mt = measurement_types[x].lower() diff --git a/tests/isatab/validate/test_core.py b/tests/isatab/validate/test_core.py index 6c9aeda9..ecf8a745 100644 --- a/tests/isatab/validate/test_core.py +++ b/tests/isatab/validate/test_core.py @@ -47,7 +47,7 @@ def test_bii_s_7(self): def test_print_rule(self): raw_rule = INVESTIGATION_RULES_MAPPING[0] rule = Rule(**raw_rule) - expected_string = "rule=check_table_files_read, params=['investigation_df', 'dir_context'], identifier=0006" + expected_string = "rule=check_table_files_read, params=['investigation_df_dict', 'dir_context'], identifier=0006" self.assertEqual(str(rule), expected_string) def test_rules_error(self): @@ -69,7 +69,7 @@ def is_investigation(investigation_df): *INVESTIGATION_RULES_MAPPING, { 'rule': is_investigation, - 'params': ['investigation_df'], + 'params': ['investigation_df_dict'], 'identifier': '6000' } ],