diff --git a/loadgen/test_settings_internal.cc b/loadgen/test_settings_internal.cc index 5a18c32f9..08d1d03de 100644 --- a/loadgen/test_settings_internal.cc +++ b/loadgen/test_settings_internal.cc @@ -334,6 +334,8 @@ void LogRequestedTestSettings(const TestSettings &s) { s.performance_issue_same_index); MLPERF_LOG(detail, "requested_performance_sample_count_override", s.performance_sample_count_override); + MLPERF_LOG(detail, "requested_sample_concatenate_permutation", + s.sample_concatenate_permutation); // Token latencies specific values if (s.use_token_latencies){ MLPERF_LOG(detail, "requested_use_token_latencies", s.use_token_latencies); @@ -443,6 +445,8 @@ void TestSettingsInternal::LogEffectiveSettings() const { s.performance_issue_same_index); MLPERF_LOG(detail, "effective_performance_sample_count", s.performance_sample_count); + MLPERF_LOG(detail, "effective_sample_concatenate_permutation", + s.sample_concatenate_permutation); #else detail(""); detail("Effective Settings:"); diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index b422cbb59..a0e996af4 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -801,6 +801,21 @@ def uses_early_stopping(self, scenario): return ( scenario in ["Server", "SingleStream", "MultiStream"] ) + + def requires_equal_issue(self, model, division): + return ( + division in ["closed", "network"] and + model in [ + "3d-unet-99", + "3d-unet-99.9" + "gptj-99", + "gptj-99.9" + "llama2-70b-99", + "llama2-70b-99.9", + "mixtral-8x7b" + ] + and self.version in ["v4.1"] + ) def get_args(): @@ -1106,6 +1121,9 @@ def check_performance_dir( min_query_count = mlperf_log["effective_min_query_count"] samples_per_query = mlperf_log["effective_samples_per_query"] min_duration = mlperf_log["effective_min_duration_ms"] + equal_issue_used_check = (mlperf_log["effective_sample_concatenate_permutation"] == "true") + if not config.requires_equal_issue(model, division): + equal_issue_used_check = True sut_name = mlperf_log["sut_name"] # check if there are any errors in the detailed log @@ -1151,6 +1169,7 @@ def check_performance_dir( if scenario == "SingleStream" or scenario == "MultiStream": res /= MS_TO_NS + # Check if the current scenario uses early stopping uses_early_stopping = config.uses_early_stopping(scenario) @@ -1255,7 +1274,7 @@ def check_performance_dir( ) is_valid = False - return is_valid, res, inferred + return is_valid, res, inferred, equal_issue_used_check def get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, log_error=False): @@ -1946,18 +1965,21 @@ def log_result( errors += 1 continue else: - if not check_measurement_dir( + measurement_check, conf_equal_issue_check = check_measurement_dir( + config, measurement_dir, name, system_desc, os.path.join(division, submitter), model_name, scenario, + division, has_power, skip_meaningful_fields_emptiness_check, skip_empty_files_check, skip_check_power_measure_files, - ): + ) + if not measurement_check: log.error( "%s measurement_dir has issues", measurement_dir ) @@ -2039,7 +2061,7 @@ def log_result( continue try: - is_valid, r, is_inferred = check_performance_dir( + is_valid, r, is_inferred, performance_equal_issue_check = check_performance_dir( config, mlperf_model, perf_path, @@ -2053,6 +2075,16 @@ def log_result( log.info( "%s has inferred results, qps=%s", perf_path, r ) + + # Check equal issue mode + if not (conf_equal_issue_check or performance_equal_issue_check): + log.error( + "%s %s requires equal issue mode (sample_concatenate_permutation), expected=true, found=%s", + perf_path, + measurement_dir, + not (conf_equal_issue_check or performance_equal_issue_check), + ) + is_valid, r = False, None except Exception as e: log.error( "%s caused exception in check_performance_dir: %s", @@ -2310,12 +2342,14 @@ def check_system_desc_id_power( def check_measurement_dir( + config, measurement_dir, fname, system_desc, root, model, scenario, + division, has_power, skip_meaningful_fields_emptiness_check, skip_empty_files_check, @@ -2406,11 +2440,63 @@ def check_measurement_dir( if not os.path.exists(os.path.dirname(code_dir)): log.error("%s is missing code_dir %s", fname, code_dir) is_valid = False + + # Check equal issue mode + equal_issue_used = False + if "mlperf.conf" in files and config.requires_equal_issue(model, division): + with open(f"{measurement_dir}/mlperf.conf") as f: + lines = f.readlines() + conf_ref_model = model.replace("-99.9", "").replace("-99", "") + for line in lines: + line = line.replace(" ", "").replace("\n", "") + if line.startswith("#"): + continue + elif line == "": + continue + else: + key, val = line.split("=") + key.replace(" ", "") + val.replace(" ", "") + conf_model, conf_scenario, conf_key = key.split(".") + if ( + (conf_key == "sample_concatenate_permutation") and + ((conf_model == conf_ref_model) or conf_model == "*") and + ((conf_scenario == scenario) or conf_scenario == "*") + ): + if val.isnumeric(): + val = int(val) + equal_issue_used = (val == 1) + break + + if "user.conf" in files and config.requires_equal_issue(model, division): + with open(f"{measurement_dir}/user.conf") as f: + lines = f.readlines() + conf_ref_model = model.replace("-99.9", "").replace("-99", "") + for line in lines: + line = line.replace(" ", "").replace("\n", "") + if line.startswith("#"): + continue + elif line == "": + continue + else: + key, val = line.split("=") + key.replace(" ", "") + val.replace(" ", "") + conf_model, conf_scenario, conf_key = key.split(".") + if ( + (conf_key == "sample_concatenate_permutation") and + ((conf_model == conf_ref_model) or conf_model == "*") and + ((conf_scenario == scenario) or conf_scenario == "*") + ): + if val.isnumeric(): + val = int(val) + equal_issue_used = (val == 1) + break else: log.error("%s is missing %s*.json", fname, system_desc) is_valid = False - return is_valid + return is_valid, equal_issue_used def check_compliance_perf_dir(test_dir): @@ -2631,7 +2717,7 @@ def check_compliance_dir( compliance_perf_dir = os.path.join( compliance_dir, test, "performance", "run_1" ) - compliance_perf_valid, r, is_inferred = check_performance_dir( + compliance_perf_valid, r, is_inferred, _ = check_performance_dir( config, model, compliance_perf_dir, scenario, division, system_json ) if is_inferred: