Skip to content

Commit

Permalink
Check for equal issue mode in submission checker (#1802)
Browse files Browse the repository at this point in the history
  • Loading branch information
pgmpablo157321 authored Jul 24, 2024
1 parent 019f9ac commit 3fb6c3e
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 6 deletions.
4 changes: 4 additions & 0 deletions loadgen/test_settings_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,8 @@ void LogRequestedTestSettings(const TestSettings &s) {
s.performance_issue_same_index);
MLPERF_LOG(detail, "requested_performance_sample_count_override",
s.performance_sample_count_override);
MLPERF_LOG(detail, "requested_sample_concatenate_permutation",
s.sample_concatenate_permutation);
// Token latencies specific values
if (s.use_token_latencies){
MLPERF_LOG(detail, "requested_use_token_latencies", s.use_token_latencies);
Expand Down Expand Up @@ -443,6 +445,8 @@ void TestSettingsInternal::LogEffectiveSettings() const {
s.performance_issue_same_index);
MLPERF_LOG(detail, "effective_performance_sample_count",
s.performance_sample_count);
MLPERF_LOG(detail, "effective_sample_concatenate_permutation",
s.sample_concatenate_permutation);
#else
detail("");
detail("Effective Settings:");
Expand Down
98 changes: 92 additions & 6 deletions tools/submission/submission_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,21 @@ def uses_early_stopping(self, scenario):
return (
scenario in ["Server", "SingleStream", "MultiStream"]
)

def requires_equal_issue(self, model, division):
return (
division in ["closed", "network"] and
model in [
"3d-unet-99",
"3d-unet-99.9"
"gptj-99",
"gptj-99.9"
"llama2-70b-99",
"llama2-70b-99.9",
"mixtral-8x7b"
]
and self.version in ["v4.1"]
)


def get_args():
Expand Down Expand Up @@ -1106,6 +1121,9 @@ def check_performance_dir(
min_query_count = mlperf_log["effective_min_query_count"]
samples_per_query = mlperf_log["effective_samples_per_query"]
min_duration = mlperf_log["effective_min_duration_ms"]
equal_issue_used_check = (mlperf_log["effective_sample_concatenate_permutation"] == "true")
if not config.requires_equal_issue(model, division):
equal_issue_used_check = True
sut_name = mlperf_log["sut_name"]

# check if there are any errors in the detailed log
Expand Down Expand Up @@ -1151,6 +1169,7 @@ def check_performance_dir(

if scenario == "SingleStream" or scenario == "MultiStream":
res /= MS_TO_NS


# Check if the current scenario uses early stopping
uses_early_stopping = config.uses_early_stopping(scenario)
Expand Down Expand Up @@ -1255,7 +1274,7 @@ def check_performance_dir(
)
is_valid = False

return is_valid, res, inferred
return is_valid, res, inferred, equal_issue_used_check

def get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, log_error=False):

Expand Down Expand Up @@ -1946,18 +1965,21 @@ def log_result(
errors += 1
continue
else:
if not check_measurement_dir(
measurement_check, conf_equal_issue_check = check_measurement_dir(
config,
measurement_dir,
name,
system_desc,
os.path.join(division, submitter),
model_name,
scenario,
division,
has_power,
skip_meaningful_fields_emptiness_check,
skip_empty_files_check,
skip_check_power_measure_files,
):
)
if not measurement_check:
log.error(
"%s measurement_dir has issues", measurement_dir
)
Expand Down Expand Up @@ -2039,7 +2061,7 @@ def log_result(
continue

try:
is_valid, r, is_inferred = check_performance_dir(
is_valid, r, is_inferred, performance_equal_issue_check = check_performance_dir(
config,
mlperf_model,
perf_path,
Expand All @@ -2053,6 +2075,16 @@ def log_result(
log.info(
"%s has inferred results, qps=%s", perf_path, r
)

# Check equal issue mode
if not (conf_equal_issue_check or performance_equal_issue_check):
log.error(
"%s %s requires equal issue mode (sample_concatenate_permutation), expected=true, found=%s",
perf_path,
measurement_dir,
not (conf_equal_issue_check or performance_equal_issue_check),
)
is_valid, r = False, None
except Exception as e:
log.error(
"%s caused exception in check_performance_dir: %s",
Expand Down Expand Up @@ -2310,12 +2342,14 @@ def check_system_desc_id_power(


def check_measurement_dir(
config,
measurement_dir,
fname,
system_desc,
root,
model,
scenario,
division,
has_power,
skip_meaningful_fields_emptiness_check,
skip_empty_files_check,
Expand Down Expand Up @@ -2406,11 +2440,63 @@ def check_measurement_dir(
if not os.path.exists(os.path.dirname(code_dir)):
log.error("%s is missing code_dir %s", fname, code_dir)
is_valid = False

# Check equal issue mode
equal_issue_used = False
if "mlperf.conf" in files and config.requires_equal_issue(model, division):
with open(f"{measurement_dir}/mlperf.conf") as f:
lines = f.readlines()
conf_ref_model = model.replace("-99.9", "").replace("-99", "")
for line in lines:
line = line.replace(" ", "").replace("\n", "")
if line.startswith("#"):
continue
elif line == "":
continue
else:
key, val = line.split("=")
key.replace(" ", "")
val.replace(" ", "")
conf_model, conf_scenario, conf_key = key.split(".")
if (
(conf_key == "sample_concatenate_permutation") and
((conf_model == conf_ref_model) or conf_model == "*") and
((conf_scenario == scenario) or conf_scenario == "*")
):
if val.isnumeric():
val = int(val)
equal_issue_used = (val == 1)
break

if "user.conf" in files and config.requires_equal_issue(model, division):
with open(f"{measurement_dir}/user.conf") as f:
lines = f.readlines()
conf_ref_model = model.replace("-99.9", "").replace("-99", "")
for line in lines:
line = line.replace(" ", "").replace("\n", "")
if line.startswith("#"):
continue
elif line == "":
continue
else:
key, val = line.split("=")
key.replace(" ", "")
val.replace(" ", "")
conf_model, conf_scenario, conf_key = key.split(".")
if (
(conf_key == "sample_concatenate_permutation") and
((conf_model == conf_ref_model) or conf_model == "*") and
((conf_scenario == scenario) or conf_scenario == "*")
):
if val.isnumeric():
val = int(val)
equal_issue_used = (val == 1)
break
else:
log.error("%s is missing %s*.json", fname, system_desc)
is_valid = False

return is_valid
return is_valid, equal_issue_used


def check_compliance_perf_dir(test_dir):
Expand Down Expand Up @@ -2631,7 +2717,7 @@ def check_compliance_dir(
compliance_perf_dir = os.path.join(
compliance_dir, test, "performance", "run_1"
)
compliance_perf_valid, r, is_inferred = check_performance_dir(
compliance_perf_valid, r, is_inferred, _ = check_performance_dir(
config, model, compliance_perf_dir, scenario, division, system_json
)
if is_inferred:
Expand Down

0 comments on commit 3fb6c3e

Please sign in to comment.