Skip to content

Commit

Permalink
Fix sdxl submitter (#46)
Browse files Browse the repository at this point in the history
* Add support for sdxl to generate_tables

* Update code_axs.py to fix getting accuracy_report

* Rename generate_tables to generate_table, fix order of fetching FID_SCORE and CLIP_SCORE from sdxl accuracy experiments

* Remove debug print statement.

* Display sdxl accuracy ranges as target for sdxl
  • Loading branch information
sahelib25 authored Jun 25, 2024
1 parent 7367028 commit 94e88c3
Showing 1 changed file with 54 additions and 9 deletions.
63 changes: 54 additions & 9 deletions submitter/code_axs.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ def copy_readmes_for_code(experiment_entries, division, submitter, submitted_tre
print(f" NOT Copying: {file_to_copy_source_path} --> {code_model_program_path}", file=sys.stderr)


def generate_tables(experiment_entries, division, submitter, power, __entry__):
def generate_table(experiment_entries, division, submitter, power, __entry__):

col_names = ["SUT", "Scenario", "Mode / Compliance?", "Status", "Target metric", "Actual metric", "Power", "Efficiency"]
table_data = []
Expand All @@ -542,7 +542,8 @@ def generate_tables(experiment_entries, division, submitter, power, __entry__):
target_qps = experiment_entry.get("loadgen_target_qps")
target_latency = experiment_entry.get("loadgen_target_latency")
compliance_test_name = experiment_entry.get('loadgen_compliance_test')
accuracy_metric = experiment_entry.get("accuracy_report")
if mode == "Accuracy":
accuracy_metric = experiment_entry.get("accuracy_report")

# Function to extract the actual performance metric
def get_samples_per_second(file_path):
Expand Down Expand Up @@ -601,6 +602,18 @@ def extract_map(accuracy_metric):
map_value = map_part.split('%')[0].strip()
return map_value
return "mAP value not found"

def extract_accuracy_sdxl(accuracy_metric):
if accuracy_metric is not None and "\'FID_SCORE\'" in accuracy_metric and "\'CLIP_SCORE\'" in accuracy_metric:
fid_score_part = accuracy_metric.split('\'FID_SCORE\':')[1]
fid_score_value = fid_score_part.split(',')[0].strip()

clip_score_part = accuracy_metric.split('\'CLIP_SCORE\':')[1]
clip_score_value = clip_score_part.split('}')[0].strip()

return float(fid_score_value), float(clip_score_value)
return "Scores not found."


if power and "power_loadgen_output" in experiment_entry["tags"]:
target_entry = get_testing_entry(experiment_entry)
Expand All @@ -615,17 +628,26 @@ def extract_map(accuracy_metric):
"resnet50": round(76.46 * 0.99, 3),
"retinanet": round(37.55 * 0.99, 3),
"bert-99": round(90.874 * 0.99, 3),
"bert-99.9": round(90.874 * 0.999, 3)
"bert-99.9": round(90.874 * 0.999, 3),
"stable-diffusion-xl": ("FID_SCORE", 23.01085758, "CLIP_SCORE", 31.68631873)
}

# Actual accuracy for workloads
actual_accuracy = {
"resnet50": extract_accuracy_ic(accuracy_metric),
"retinanet": extract_map(accuracy_metric),
"bert-99": extract_accuracy_bert(accuracy_metric),
"bert-99.9": extract_accuracy_bert(accuracy_metric)
"bert-99.9": extract_accuracy_bert(accuracy_metric),
"stable-diffusion-xl": extract_accuracy_sdxl(accuracy_metric)
}

# Accuracy upper limit
accuracy_upper_limit = {
"stable-diffusion-xl": ("FID_SCORE", 23.95007626, "CLIP_SCORE", 31.81331801)
}

target_acc = target_accuracy[model_name]
actual_acc = actual_accuracy[model_name]

if "power_loadgen_output" in experiment_entry["tags"] and power:
power_experiment_entry = experiment_entry
Expand All @@ -644,11 +666,34 @@ def extract_map(accuracy_metric):
status = get_result_status(mlperf_log_path)

else:
if (target_accuracy[model_name]) <= float(actual_accuracy[model_name]):
status = "VALID"
actual_metric = actual_accuracy[model_name]
target = target_accuracy[model_name]
energy_eff = "N/A"
if model_name == "stable-diffusion-xl":
target_fid_score = target_acc[1]
target_clip_score = target_acc[3]
upper_fid_score = accuracy_upper_limit[model_name][1]
upper_clip_score = accuracy_upper_limit[model_name][3]
# Extract actual values
if isinstance(actual_acc, tuple) and len(actual_acc) == 2:
actual_fid_score, actual_clip_score = actual_acc
else:
raise ValueError("Invalid format for actual accuracy values")

# Compare values within the range
if target_fid_score <= actual_fid_score <= upper_fid_score and target_clip_score <= actual_clip_score <= upper_clip_score:
status = "VALID"
else:
status = "INVALID"

actual_metric =f"FID_SCORE: {actual_fid_score}\nCLIP_SCORE: {actual_clip_score}"
target = f"FID_SCORE range: [{target_fid_score}, {upper_fid_score}]\nCLIP_SCORE range: [{target_clip_score}, {upper_clip_score}]"

else:
if (float(actual_acc) >= target_acc):
status = "VALID"
else:
status = "INVALID"
actual_metric = actual_acc
target = target_acc
energy_eff = "N/A"

if scenario in ["Offline", "Server"] and mode.lower() == "performance" and not power:
target = target_qps
Expand Down

0 comments on commit 94e88c3

Please sign in to comment.