diff --git a/scripts/benchmarking/artv3_fullscript.py b/scripts/benchmarking/artv3_fullscript.py new file mode 100644 index 000000000..fda378ce9 --- /dev/null +++ b/scripts/benchmarking/artv3_fullscript.py @@ -0,0 +1,866 @@ +#!/usr/bin/env python3 +import os +import glob +import statistics +import argparse +from typing import List, Optional + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +# Try to import IPython display for notebook detection +try: + from IPython.display import display + from IPython import get_ipython + IN_NOTEBOOK = get_ipython() is not None +except Exception: + display = None + IN_NOTEBOOK = False + +# ========================= +# Global constants +# ========================= + +RAW_FLOPS_COL = "Mean gFlops (gigaFLOP per sec.)" +SMOOTH_FLOPS_COL = "Smoothed Mean flops (gigaFLOP per sec.)" +PROBLEM_SIZE_COL = "Problem size" +VARIANT_TUNING_COL = "Variant_Tuning" +KERNEL_COL = "Kernel" +BANDWIDTH_COL = "Bandwidth (GiB per sec.)" +SMOOTH_BW_COL = "Smoothed Bandwidth (GiB per sec.)" + +# ========================= +# General utilities +# ========================= + +def ensure_dir(path: str) -> None: + if path: + os.makedirs(path, exist_ok=True) + +def sanitize_filename(text: str) -> str: + return "".join(c if c.isalnum() or c in "-_." else "_" for c in str(text)) + +def find_csv_files(root_dir: str, patterns: List[str]) -> List[str]: + all_files: List[str] = [] + for pattern in patterns: + search_pattern = os.path.join(root_dir, pattern) + files = glob.glob(search_pattern, recursive=True) + all_files.extend(files) + all_files = sorted(set(all_files)) + return all_files + +# ========================= +# Header detection / reading +# ========================= + +def _likely_header_score(line: str) -> int: + tokens = [ + "Kernel", "Variant", "Problem size", "Problem Size", "Mean flops", + "GFlop", "GFLOP", "GFLOPs", "GFLOPS" + ] + score = 0 + for t in tokens: + if t in line: + score += 1 + return score + +def read_single_csv(path: str) -> Optional[pd.DataFrame]: + """Read a CSV and heuristically detect the header row.""" + try: + with open(path, "r", encoding="utf-8") as f: + lines = f.readlines() + except Exception as e: + print("Failed to read {}: {}".format(path, e)) + return None + + header_idx = None + best_score = -1 + for i, line in enumerate(lines[:50]): + if not line.strip(): + continue + score = _likely_header_score(line) + if score > best_score: + best_score = score + header_idx = i + if score >= 3: + break + + if header_idx is None: + print("Could not find header in {}, skipping.".format(path)) + return None + + try: + df = pd.read_csv(path, header=header_idx) + except Exception as e: + print("Failed to parse CSV {}: {}".format(path, e)) + return None + + df["__source_file__"] = path + return df + +# ========================= +# Column normalization +# ========================= + +def normalize_columns(df: pd.DataFrame) -> pd.DataFrame: + """Normalize various benchmark CSV headers to a standard schema.""" + candidates = { + "Kernel": ["Kernel", "Kernel name", "Benchmark", "Test"], + "Variant": ["Variant", "Implementation", "Policy", "Config", "Backend", "Suite"], + PROBLEM_SIZE_COL: [ + "Problem size", "Problem Size", "Size", "N", "DOF", "Elements", + "ProblemSize", "Problem-size" + ], + RAW_FLOPS_COL: [ + "Mean flops (gigaFLOP per sec.)", + "Mean flops (GFlop/s)", + "Mean Flops (GFlop/s)", + "GFLOP/s", "GFLOPs/s", "GFLOPS", "GFlops/s", "GFlop/s", "GF/s", + "Mean GFLOP/s", "Mean GFLOPs/s" + ], + BANDWIDTH_COL: [ + "Bandwidth (GiB per sec.)", + "Bandwidth (GiB/s)", + "Bandwidth (GB/s)", + "Bandwidth (GiB/sec)", + "Bandwidth (GB/sec)", + "Mean bandwidth (GiB/s)", + "Mean Bandwidth (GiB/s)", + "Mean Bandwidth (GiB per sec.)" + ], + } + + df = df.rename(columns={c: c.strip() for c in df.columns}) + new_col_map = {} + + for standard_name, names in candidates.items(): + for c in names: + if c in df.columns: + new_col_map[c] = standard_name + break + + df = df.rename(columns=new_col_map) + return df + +# ========================= +# Backend / tuning classification +# ========================= + +def classify_backend_from_variant(variant) -> str: + s = "" if pd.isna(variant) else str(variant).strip() + low = s.lower() + if "hip" in low: + return "HIP" + if "cuda" in low: + return "CUDA" + if "openmp" in low or low.endswith("_omp") or " omp" in low or low.startswith("omp"): + return "OpenMP" + if "seq" in low or "serial" in low or "baseline" in low or "sequential" in low: + return "Seq" + return "Unknown" + +def classify_tuning(row: pd.Series) -> str: + if "Tuning" in row and pd.notna(row["Tuning"]): + return str(row["Tuning"]).strip() + src = row.get("__source_file__", "") + if isinstance(src, str) and src: + return os.path.basename(src) + return "default" + +# ========================= +# Build combined table +# ========================= + +def build_combined_table( + root_dir: str = ".", + glob_patterns: Optional[List[str]] = None, + kernel_whitelist: Optional[List[str]] = None, + verbose: bool = True, +) -> Optional[pd.DataFrame]: + if glob_patterns is None: + glob_patterns = ["**/*factor*kernel-run-data.csv"] + + files = find_csv_files(root_dir, glob_patterns) + if not files: + if verbose: + print("No files matching patterns {} found under '{}'".format(glob_patterns, root_dir)) + return None + + if verbose: + print("Found CSV files:") + for f in files: + print(" ", f) + + dfs = [] + required_cols = {KERNEL_COL, "Variant", PROBLEM_SIZE_COL, RAW_FLOPS_COL} + + for path in files: + df = read_single_csv(path) + if df is None: + continue + df = normalize_columns(df) + missing = required_cols - set(df.columns) + if missing: + if verbose: + print("[SKIP] {} missing required columns after normalization: {}".format(path, sorted(missing))) + print(" Columns present:", list(df.columns)) + continue + dfs.append(df) + + if not dfs: + if verbose: + print("No CSV files could be parsed with required columns.") + return None + + combined_df = pd.concat(dfs, ignore_index=True) + + combined_df[KERNEL_COL] = combined_df[KERNEL_COL].astype(str).str.strip() + combined_df["Variant"] = combined_df["Variant"].astype(str).str.strip() + + if kernel_whitelist: + wl = [w.lower() for w in kernel_whitelist] + kernel_series = combined_df[KERNEL_COL].fillna("").astype(str).str.lower() + mask = kernel_series.apply(lambda k: any(w in k for w in wl)) + combined_df = combined_df[mask] + if combined_df.empty: + if verbose: + print("After applying kernel_whitelist, no rows remain.") + return None + + combined_df[PROBLEM_SIZE_COL] = pd.to_numeric(combined_df[PROBLEM_SIZE_COL], errors="coerce") + combined_df[RAW_FLOPS_COL] = pd.to_numeric(combined_df[RAW_FLOPS_COL], errors="coerce") + if BANDWIDTH_COL in combined_df.columns: + combined_df[BANDWIDTH_COL] = pd.to_numeric(combined_df[BANDWIDTH_COL], errors="coerce") + + before_drop = len(combined_df) + combined_df = combined_df.dropna(subset=[PROBLEM_SIZE_COL, RAW_FLOPS_COL]) + dropped = before_drop - len(combined_df) + if verbose and dropped > 0: + print("[CLEAN] Dropped {} rows with non-numeric {} or {}.".format( + dropped, PROBLEM_SIZE_COL, RAW_FLOPS_COL)) + + combined_df["Backend"] = combined_df["Variant"].apply(classify_backend_from_variant) + combined_df["Tuning"] = combined_df.apply(classify_tuning, axis=1) + + combined_df[VARIANT_TUNING_COL] = ( + combined_df["Variant"].astype(str) + "-" + combined_df["Tuning"].astype(str) + ) + + return combined_df + +# ========================= +# Smoothing utilities +# ========================= + +def moving_median_smooth(y, k: int = 5): + n = len(y) + if n == 0: + return [] + m = (k - 1) // 2 + y_smooth = [] + for i in range(n): + start = max(0, i - m) + end = min(n - 1, i + m) + window = y[start:end + 1] + y_smooth.append(statistics.median(window)) + return y_smooth + +def find_saturation_point(x, y_smooth, eps: float = 0.1, w: int = 3): + """ + Returns the x value of the first saturation point, or None if none is found. + Saturation is defined as the first run of length >= w where y_smooth + stays within (1 - eps) of the maximum value. + """ + if not y_smooth: + return None + + y_max = max(y_smooth) + threshold = (1.0 - eps) * y_max + n = len(y_smooth) + run_length = 0 + run_start_idx = None + + for i in range(n): + if y_smooth[i] >= threshold: + if run_length == 0: + run_start_idx = i + run_length += 1 + if run_length >= w: + return x[run_start_idx] + else: + run_length = 0 + run_start_idx = None + return None + +# ========================= +# Plotting for a single kernel (FLOPs) with FOM output as DataFrame +# ========================= + +def plot_kernel( + df: pd.DataFrame, + kernel: str, + k: int = 5, + eps: float = 0.1, + w: int = 3, + save_dir: Optional[str] = None, +): + plt.figure(figsize=(18, 7)) + variants = df[VARIANT_TUNING_COL].unique() + colors = plt.cm.tab10.colors + + if SMOOTH_FLOPS_COL not in df.columns: + df[SMOOTH_FLOPS_COL] = np.nan + + for idx, variant in enumerate(variants): + subdf = df[df[VARIANT_TUNING_COL] == variant].copy() + subdf = subdf.sort_values(PROBLEM_SIZE_COL) + x = subdf[PROBLEM_SIZE_COL].astype(float).values + y = subdf[RAW_FLOPS_COL].astype(float).values + if len(x) == 0: + continue + + y_smooth = moving_median_smooth(list(y), k=k) + + for xi, yi in zip(x, y_smooth): + mask = ( + (df[VARIANT_TUNING_COL] == variant) + & (df[PROBLEM_SIZE_COL] == xi) + ) + df.loc[mask, SMOOTH_FLOPS_COL] = yi + + plt.plot( + x, y, "-o", + label="{} (raw)".format(variant), + markersize=8, + color=colors[idx % len(colors)], + markerfacecolor=colors[idx % len(colors)], + markeredgewidth=0, + ) + + plt.plot( + x, y_smooth, "--", + label="{} (smoothed)".format(variant), + linewidth=3, + color=colors[idx % len(colors)], + ) + + plt.title("Kernel: {}".format(kernel), fontsize=22) + plt.xlabel("Problem size", fontsize=18) + plt.ylabel("Mean flops (gigaFLOP per sec.)", fontsize=18) + plt.grid(True, which="both", linestyle="--", linewidth=1) + plt.xticks(fontsize=16) + plt.yticks(fontsize=16) + plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), fontsize=16, frameon=False) + plt.tight_layout(rect=[0, 0, 0.75, 1]) + + if save_dir is not None: + ensure_dir(save_dir) + fname = os.path.join(save_dir, "{}.png".format(sanitize_filename(kernel))) + plt.savefig(fname, dpi=200) + print("Saved plot to {}".format(fname)) + if IN_NOTEBOOK: + plt.show() + else: + plt.close() + + # FOM-style output as DataFrame + fom_rows = [] + for variant in variants: + subdf = df[df[VARIANT_TUNING_COL] == variant].copy() + subdf = subdf.sort_values(PROBLEM_SIZE_COL) + x = subdf[PROBLEM_SIZE_COL].astype(float).values + y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values + sat_size = "" + sat_flops_raw = "" + sat_bw = "" + + # === MODIFIED/ADDED FOR SATURATION REPORTING === + if len(x) > 0 and len(y_smooth) > 0: + sat_idx = None + y_max = max(y_smooth) + threshold = (1.0 - eps) * y_max + run_length = 0 + run_start_idx = None + + for i in range(len(y_smooth)): + if y_smooth[i] >= threshold: + if run_length == 0: + run_start_idx = i + run_length += 1 + if run_length >= w: + sat_idx = run_start_idx + break + else: + run_length = 0 + run_start_idx = None + + if sat_idx is not None: + sat_size = x[sat_idx] + mask = (subdf[PROBLEM_SIZE_COL] == sat_size) + raw_at_sat = subdf.loc[mask, RAW_FLOPS_COL] + if not raw_at_sat.empty and pd.notna(raw_at_sat.iloc[0]): + sat_flops_raw = raw_at_sat.iloc[0] + + if BANDWIDTH_COL in subdf.columns: + bw_at_sat = subdf.loc[mask, BANDWIDTH_COL] + bw_non_nan = bw_at_sat.dropna() + if not bw_non_nan.empty: + sat_bw = bw_non_nan.iloc[0] + else: + sat_bw = "" + else: + sat_bw = "" + else: + print( + "[INFO] No saturation point found for kernel '{}' " + "variant '{}' (eps={}, w={})".format(kernel, variant, eps, w) + ) + else: + print( + "[INFO] Not enough data to compute saturation for " + "kernel '{}' variant '{}'".format(kernel, variant) + ) + # === END MODIFIED === + + fom_rows.append({ + "Kernel": f"{kernel}-{variant}", + "Sat Problem Size": sat_size if sat_size != "" else "N/A", + "Sat GFLOP/s": sat_flops_raw if sat_flops_raw != "" else "N/A", + "Sat B/W (GiB per sec.)": sat_bw if sat_bw != "" else "N/A", + }) + + fom_df = pd.DataFrame(fom_rows) + if IN_NOTEBOOK and display is not None: + display(fom_df) + else: + print(fom_df.to_string(index=False)) + +# ========================= +# Plotting for a single kernel (Bandwidth) +# ========================= + +def plot_kernel_bandwidth( + df: pd.DataFrame, + kernel: str, + k: int = 5, + eps: float = 0.1, + w: int = 3, + save_dir: Optional[str] = None, +): + if BANDWIDTH_COL not in df.columns: + print(f"[INFO] No '{BANDWIDTH_COL}' column for kernel '{kernel}', skipping bandwidth plot.") + return + + plt.figure(figsize=(18, 7)) + variants = df[VARIANT_TUNING_COL].unique() + colors = plt.cm.tab10.colors + + if SMOOTH_BW_COL not in df.columns: + df[SMOOTH_BW_COL] = np.nan + + for idx, variant in enumerate(variants): + subdf = df[df[VARIANT_TUNING_COL] == variant].copy() + subdf = subdf.sort_values(PROBLEM_SIZE_COL) + + x = subdf[PROBLEM_SIZE_COL].astype(float).values + y_bw = subdf[BANDWIDTH_COL].astype(float).values + + if len(x) == 0: + continue + + y_bw_smooth = moving_median_smooth(list(y_bw), k=k) + + for xi, yi in zip(x, y_bw_smooth): + mask = ( + (df[VARIANT_TUNING_COL] == variant) + & (df[PROBLEM_SIZE_COL] == xi) + ) + df.loc[mask, SMOOTH_BW_COL] = yi + + color = colors[idx % len(colors)] + + plt.plot( + x, y_bw, "-o", + label=f"{variant} (raw B/W)", + markersize=8, + color=color, + markerfacecolor=color, + markeredgewidth=0, + ) + + plt.plot( + x, y_bw_smooth, "--", + label=f"{variant} (smoothed B/W)", + linewidth=3, + color=color, + ) + + plt.title(f"Kernel: {kernel} - Bandwidth", fontsize=22) + plt.xlabel("Problem size", fontsize=18) + plt.ylabel("Bandwidth (GiB per sec.)", fontsize=18) + plt.grid(True, which="both", linestyle="--", linewidth=1) + plt.xticks(fontsize=16) + plt.yticks(fontsize=16) + plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), fontsize=16, frameon=False) + plt.tight_layout(rect=[0, 0, 0.75, 1]) + + if save_dir is not None: + ensure_dir(save_dir) + fname = os.path.join(save_dir, "{}_bandwidth.png".format(sanitize_filename(kernel))) + plt.savefig(fname, dpi=200) + print(f"Saved bandwidth plot to {fname}") + if IN_NOTEBOOK: + plt.show() + else: + plt.close() + +# ========================= +# Smooth and plot all kernels (FLOPs and Bandwidth) +# ========================= + +def smooth_and_plot_all_kernels( + df: pd.DataFrame, + k: int = 5, + eps: float = 0.1, + w: int = 3, + save_dir: Optional[str] = None, +) -> pd.DataFrame: + df = df.dropna(subset=[KERNEL_COL, PROBLEM_SIZE_COL, RAW_FLOPS_COL, VARIANT_TUNING_COL]).copy() + df[SMOOTH_FLOPS_COL] = np.nan + + if BANDWIDTH_COL in df.columns: + df[SMOOTH_BW_COL] = np.nan + + kernels = df[KERNEL_COL].unique() + print("Found {} kernels.".format(len(kernels))) + + for kernel in kernels: + tmp = df[df[KERNEL_COL] == kernel].copy() + + plot_kernel(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir) + df.loc[df[KERNEL_COL] == kernel, SMOOTH_FLOPS_COL] = tmp[SMOOTH_FLOPS_COL] + + if BANDWIDTH_COL in df.columns: + plot_kernel_bandwidth(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir) + if SMOOTH_BW_COL in tmp.columns: + df.loc[df[KERNEL_COL] == kernel, SMOOTH_BW_COL] = tmp[SMOOTH_BW_COL] + + return df + +# ========================= +# Per-kernel tables (raw and smoothed) +# ========================= + +def save_kernel_tables( + df: pd.DataFrame, + outdir: str = "kernel_tables", +) -> None: + ensure_dir(outdir) + kernels = df[KERNEL_COL].unique() + for kernel in kernels: + df_kernel = df[df[KERNEL_COL] == kernel] + raw_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=RAW_FLOPS_COL, + ) + raw_table = raw_table.sort_index() + raw_csv_path = os.path.join(outdir, "{}_raw.csv".format(sanitize_filename(kernel))) + raw_table.to_csv(raw_csv_path) + smooth_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=SMOOTH_FLOPS_COL, + ) + smooth_table = smooth_table.sort_index() + smooth_csv_path = os.path.join(outdir, "{}_smoothed.csv".format(sanitize_filename(kernel))) + smooth_table.to_csv(smooth_csv_path) + + if BANDWIDTH_COL in df_kernel.columns and SMOOTH_BW_COL in df_kernel.columns: + bw_raw_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=BANDWIDTH_COL, + ).sort_index() + bw_raw_csv_path = os.path.join(outdir, "{}_bandwidth_raw.csv".format(sanitize_filename(kernel))) + bw_raw_table.to_csv(bw_raw_csv_path) + + bw_smooth_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=SMOOTH_BW_COL, + ).sort_index() + bw_smooth_csv_path = os.path.join(outdir, "{}_bandwidth_smoothed.csv".format(sanitize_filename(kernel))) + bw_smooth_table.to_csv(bw_smooth_csv_path) + + print("Saved: {}, {}, {}, {}".format( + raw_csv_path, smooth_csv_path, bw_raw_csv_path, bw_smooth_csv_path)) + else: + print("Saved: {}, {}".format(raw_csv_path, smooth_csv_path)) + +# ========================= +# Per-kernel saturation curve data (raw + smoothed in one file) +# ========================= + +def save_saturation_curve_data( + df: pd.DataFrame, + outdir: str = "saturation-curve-data", +) -> None: + ensure_dir(outdir) + kernels = df[KERNEL_COL].unique() + for kernel in kernels: + df_kernel = df[df[KERNEL_COL] == kernel].copy() + raw_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=RAW_FLOPS_COL, + ) + smooth_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=SMOOTH_FLOPS_COL, + ) + raw_table = raw_table.sort_index() + smooth_table = smooth_table.reindex( + index=raw_table.index, + columns=raw_table.columns, + ) + combined = pd.DataFrame(index=raw_table.index) + for vt in raw_table.columns: + raw_col_name = f"{vt} (raw)" + smooth_col_name = f"{vt} (smoothed)" + combined[raw_col_name] = raw_table[vt] + combined[smooth_col_name] = smooth_table[vt] + + if BANDWIDTH_COL in df_kernel.columns and SMOOTH_BW_COL in df_kernel.columns: + bw_raw_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=BANDWIDTH_COL, + ).sort_index() + bw_smooth_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=SMOOTH_BW_COL, + ).sort_index() + bw_smooth_table = bw_smooth_table.reindex( + index=bw_raw_table.index, + columns=bw_raw_table.columns, + ) + for vt in bw_raw_table.columns: + bw_raw_col_name = f"{vt} (raw B/W)" + bw_smooth_col_name = f"{vt} (smoothed B/W)" + combined[bw_raw_col_name] = bw_raw_table[vt] + combined[bw_smooth_col_name] = bw_smooth_table[vt] + + combined = combined.reset_index().rename(columns={PROBLEM_SIZE_COL: "Problem size"}) + out_path = os.path.join(outdir, f"{sanitize_filename(kernel)}.csv") + combined.to_csv(out_path, index=False) + print(f"Saved saturation curve data: {out_path}") + +# ========================= +# Per-kernel FOM tables (saturation points, with units, NO Sat Smoothed B/W) +# ========================= + +def save_fom_tables( + df: pd.DataFrame, + outdir: str = "FOM", +) -> None: + ensure_dir(outdir) + kernels = df[KERNEL_COL].unique() + for kernel in kernels: + df_kernel = df[df[KERNEL_COL] == kernel].copy() + variant_tunings = df_kernel[VARIANT_TUNING_COL].unique() + rows = [] + for vt in variant_tunings: + subdf = df_kernel[df_kernel[VARIANT_TUNING_COL] == vt].copy() + subdf = subdf.sort_values(PROBLEM_SIZE_COL) + x = subdf[PROBLEM_SIZE_COL].astype(float).values + y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values + sat_size = None + sat_flops_raw = "" + sat_bw = "" + # === MODIFIED/ADDED FOR SATURATION REPORTING === + if len(x) > 0 and len(y_smooth) > 0: + sat_idx = None + y_max = max(y_smooth) + threshold = (1.0 - 0.1) * y_max + w = 3 + run_length = 0 + run_start_idx = None + + for i in range(len(y_smooth)): + if y_smooth[i] >= threshold: + if run_length == 0: + run_start_idx = i + run_length += 1 + if run_length >= w: + sat_idx = run_start_idx + break + else: + run_length = 0 + run_start_idx = None + + if sat_idx is not None: + sat_size = x[sat_idx] + mask = (subdf[PROBLEM_SIZE_COL] == sat_size) + raw_at_sat = subdf.loc[mask, RAW_FLOPS_COL] + if not raw_at_sat.empty and pd.notna(raw_at_sat.iloc[0]): + sat_flops_raw = raw_at_sat.iloc[0] + if BANDWIDTH_COL in subdf.columns: + bw_at_sat = subdf.loc[mask, BANDWIDTH_COL] + bw_non_nan = bw_at_sat.dropna() + if not bw_non_nan.empty: + sat_bw = bw_non_nan.iloc[0] + else: + sat_bw = "" + else: + print( + "[WARN] Bandwidth column missing for kernel '{}' variant '{}' " + "while computing saturation".format(kernel, vt) + ) + sat_bw = "" + else: + print( + "[INFO] No saturation point found for kernel '{}' variant '{}' " + "when building FOM table".format(kernel, vt) + ) + else: + print( + "[INFO] Not enough data to compute saturation for " + "kernel '{}' variant '{}' when building FOM table".format(kernel, vt) + ) + # === END MODIFIED === + + def na_if_empty(value): + return value if value not in [None, ""] else "N/A" + + rows.append({ + "Kernel": f"{kernel}-{vt}", + "Sat Problem Size": na_if_empty(sat_size), + "Sat GFLOP/s": na_if_empty(sat_flops_raw), + "Sat B/W (GiB per sec.)": na_if_empty(sat_bw), + }) + out_path = os.path.join(outdir, f"{sanitize_filename(kernel)}.csv") + pd.DataFrame(rows).to_csv(out_path, index=False) + print(f"Saved FOM table: {out_path}") + +# ========================= +# Main pipeline: callable from notebook or CLI +# ========================= + +def run_pipeline( + root_dir: str = ".", + output_dir: str = "./output", + glob_patterns: Optional[List[str]] = None, + kernel_whitelist: Optional[List[str]] = None, + smooth_window: int = 5, + saturation_eps: float = 0.1, + saturation_w: int = 3, +) -> Optional[pd.DataFrame]: + if glob_patterns is None: + glob_patterns = ["**/*factor*kernel-run-data.csv"] + + ensure_dir(output_dir) + fig_dir = os.path.join(output_dir, "figures") + ensure_dir(fig_dir) + + combined_df = build_combined_table( + root_dir=root_dir, + glob_patterns=glob_patterns, + kernel_whitelist=kernel_whitelist, + verbose=True, + ) + if combined_df is None: + print("No combined table created.") + return None + + combined_csv_path = os.path.join(output_dir, "combined_table.csv") + combined_df.to_csv(combined_csv_path, index=False) + print("[SAVE] Combined table saved to {}".format(combined_csv_path)) + + df_smoothed = smooth_and_plot_all_kernels( + combined_df, + k=smooth_window, + eps=saturation_eps, + w=saturation_w, + save_dir=fig_dir, + ) + + output_variant_tuning_path = os.path.join(output_dir, "output_with_variant_tuning.csv") + df_smoothed.to_csv(output_variant_tuning_path, index=False) + print("[SAVE] Smoothed combined table saved to {}".format(output_variant_tuning_path)) + + save_kernel_tables(df_smoothed, outdir=output_dir) + saturation_dir = os.path.join(output_dir, "saturation-curve-data") + save_saturation_curve_data(df_smoothed, outdir=saturation_dir) + fom_dir = os.path.join(output_dir, "FOM") + save_fom_tables(df_smoothed, outdir=fom_dir) + + return df_smoothed + +# ========================= +# CLI entry point +# ========================= + +def main_cli(): + parser = argparse.ArgumentParser( + description="Combine benchmark CSVs, smooth FLOPs/Bandwidth and generate plots/tables." + ) + parser.add_argument("--root-dir", default=".", help="Root directory to search for CSV files") + parser.add_argument("--output-dir", default="./output", help="Directory to write outputs") + parser.add_argument( + "--glob-pattern", + action="append", + default=None, + help="Glob pattern for CSV files (can be repeated). Default: **/*factor*kernel-run-data.csv", + ) + parser.add_argument( + "--kernel-whitelist", + nargs="*", + default=None, + help="List of substrings to filter Kernel names by (case insensitive).", + ) + parser.add_argument("--smooth-window", type=int, default=5, help="Moving median window size") + parser.add_argument("--saturation-eps", type=float, default=0.1, help="Epsilon for saturation threshold") + parser.add_argument("--saturation-w", type=int, default=3, help="Consecutive points needed for saturation") + + args = parser.parse_args() + + run_pipeline( + root_dir=args.root_dir, + output_dir=args.output_dir, + glob_patterns=args.glob_pattern, + kernel_whitelist=args.kernel_whitelist, + smooth_window=args.smooth_window, + saturation_eps=args.saturation_eps, + saturation_w=args.saturation_w, + ) + +# ========================= +# Notebook helper (no argparse) +# ========================= + +def main_notebook( + root_dir: str = ".", + output_dir: str = "./output", + glob_patterns: Optional[List[str]] = None, + kernel_whitelist: Optional[List[str]] = None, + smooth_window: int = 5, + saturation_eps: float = 0.1, + saturation_w: int = 3, +): + return run_pipeline( + root_dir=root_dir, + output_dir=output_dir, + glob_patterns=glob_patterns, + kernel_whitelist=kernel_whitelist, + smooth_window=smooth_window, + saturation_eps=saturation_eps, + saturation_w=saturation_w, + ) + +if __name__ == "__main__": + if not IN_NOTEBOOK: + main_cli() diff --git a/scripts/benchmarking/full-pipeline.sh b/scripts/benchmarking/full-pipeline.sh new file mode 100755 index 000000000..bc37e394e --- /dev/null +++ b/scripts/benchmarking/full-pipeline.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: +# run_full_pipeline.sh [cpx|spx] /path/to/output_dir +# +# Example: +# run_full_pipeline.sh cpx /p/lustre/artv3/raja_perf_runs/2026-01-29 + +MODE="${1:-cpx}" +OUTDIR="${2:-./raja_perf_results}" + +mkdir -p "${OUTDIR}" + +# 1) Run tier1 mode script (CPX or SPX) +case "${MODE}" in + cpx|CPX) + echo "Running CPX tier1 script..." + ./scripts/benchmarking/run_tier1_cpx-mode.sh "${OUTDIR}" + ;; + spx|SPX) + echo "Running SPX tier1 script..." + ./scripts/benchmarking/run_tier1_spx-mode.sh "${OUTDIR}" + ;; + *) + echo "ERROR: MODE must be 'cpx' or 'spx' (got: ${MODE})" >&2 + exit 1 + ;; +esac + +# 2) Run study_run_kernel_tunings.py +echo "Running study_run_kernel_tunings.py..." +python3 ./scripts/benchmarking/study_run_kernel_tunings.py \ + --output-dir "${OUTDIR}" + + +# 4) Run study_saturation.py and create FoMs +echo "Running study_saturation.py..." +python3 ./scripts/benchmarking/study_saturation.py \ + --output-dir "${OUTDIR}" + +# At this point we expect a combined_table.csv in ${OUTDIR} +COMBINED="${OUTDIR}/combined_table.csv" +if [[ ! -f "${COMBINED}" ]]; then + echo "ERROR: combined_table.csv not found in ${COMBINED}." >&2 + echo " Check that study_* scripts are configured to write it." >&2 + exit 1 +fi + +echo "Done. Final FOM files are in: ${OUTDIR}" \ No newline at end of file diff --git a/scripts/benchmarking/rhornung67_fullscript.py b/scripts/benchmarking/rhornung67_fullscript.py new file mode 100644 index 000000000..6d664d8ba --- /dev/null +++ b/scripts/benchmarking/rhornung67_fullscript.py @@ -0,0 +1,975 @@ +#!/usr/bin/env python3 +import os +import glob +import statistics +import argparse +from typing import List, Optional + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +# Try to import IPython display for notebook detection +try: + from IPython.display import display + from IPython import get_ipython + IN_NOTEBOOK = get_ipython() is not None +except Exception: + display = None + IN_NOTEBOOK = False + +# ========================= +# Global constants +# ========================= + +RAW_FLOPS_COL = "Mean gFlops (gigaFLOP per sec.)" +SMOOTH_FLOPS_COL = "Smoothed Mean flops (gigaFLOP per sec.)" +PROBLEM_SIZE_COL = "Problem size" +VARIANT_TUNING_COL = "Variant_Tuning" +KERNEL_COL = "Kernel" +BANDWIDTH_COL = "Bandwidth (GiB per sec.)" +SMOOTH_BW_COL = "Smoothed Bandwidth (GiB per sec.)" +SAT_IDX_COL = "Saturation Index" + +# ========================= +# General utilities +# ========================= + +def ensure_dir(path: str) -> None: + if path: + os.makedirs(path, exist_ok=True) + +def sanitize_filename(text: str) -> str: + return "".join(c if c.isalnum() or c in "-_." else "_" for c in str(text)) + +def find_csv_files(root_dir: str, patterns: List[str]) -> List[str]: + all_files: List[str] = [] + for pattern in patterns: + search_pattern = os.path.join(root_dir, pattern) + files = glob.glob(search_pattern, recursive=True) + all_files.extend(files) + all_files = sorted(set(all_files)) + return all_files + +# ========================= +# Header detection / reading +# ========================= + +def _likely_header_score(line: str) -> int: + tokens = [ + "Kernel", "Variant", "Problem size", "Problem Size", "Mean flops", + "GFlop", "GFLOP", "GFLOPs", "GFLOPS" + ] + score = 0 + for t in tokens: + if t in line: + score += 1 + return score + +def read_single_csv(path: str) -> Optional[pd.DataFrame]: + """Read a CSV and heuristically detect the header row.""" + try: + with open(path, "r", encoding="utf-8") as f: + lines = f.readlines() + except Exception as e: + print("Failed to read {}: {}".format(path, e)) + return None + + header_idx = None + best_score = -1 + for i, line in enumerate(lines[:50]): + if not line.strip(): + continue + score = _likely_header_score(line) + if score > best_score: + best_score = score + header_idx = i + if score >= 3: + break + + if header_idx is None: + print("Could not find header in {}, skipping.".format(path)) + return None + + try: + df = pd.read_csv(path, header=header_idx) + except Exception as e: + print("Failed to parse CSV {}: {}".format(path, e)) + return None + + df["__source_file__"] = path + return df + +# ========================= +# Column normalization +# ========================= + +def normalize_columns(df: pd.DataFrame) -> pd.DataFrame: + """Normalize various benchmark CSV headers to a standard schema.""" + candidates = { + "Kernel": ["Kernel", "Kernel name", "Benchmark", "Test"], + "Variant": ["Variant", "Implementation", "Policy", "Config", "Backend", "Suite"], + PROBLEM_SIZE_COL: [ + "Problem size", "Problem Size", "Size", "N", "DOF", "Elements", + "ProblemSize", "Problem-size" + ], + RAW_FLOPS_COL: [ + "Mean flops (gigaFLOP per sec.)", + "Mean flops (GFlop/s)", + "Mean Flops (GFlop/s)", + "GFLOP/s", "GFLOPs/s", "GFLOPS", "GFlops/s", "GFlop/s", "GF/s", + "Mean GFLOP/s", "Mean GFLOPs/s" + ], + BANDWIDTH_COL: [ + "Bandwidth (GiB per sec.)", + "Bandwidth (GiB/s)", + "Bandwidth (GB/s)", + "Bandwidth (GiB/sec)", + "Bandwidth (GB/sec)", + "Mean bandwidth (GiB/s)", + "Mean Bandwidth (GiB/s)", + "Mean Bandwidth (GiB per sec.)" + ], + } + + df = df.rename(columns={c: c.strip() for c in df.columns}) + new_col_map = {} + + for standard_name, names in candidates.items(): + for c in names: + if c in df.columns: + new_col_map[c] = standard_name + break + + df = df.rename(columns=new_col_map) + return df + +# ========================= +# Backend / tuning classification +# ========================= + +def classify_backend_from_variant(variant) -> str: + s = "" if pd.isna(variant) else str(variant).strip() + low = s.lower() + if "hip" in low: + return "HIP" + if "cuda" in low: + return "CUDA" + if "openmp" in low or low.endswith("_omp") or " omp" in low or low.startswith("omp"): + return "OpenMP" + if "seq" in low or "serial" in low or "baseline" in low or "sequential" in low: + return "Seq" + return "Unknown" + +def classify_tuning(row: pd.Series) -> str: + if "Tuning" in row and pd.notna(row["Tuning"]): + return str(row["Tuning"]).strip() + src = row.get("__source_file__", "") + if isinstance(src, str) and src: + return os.path.basename(src) + return "default" + +# ========================= +# Build combined table +# ========================= + +def build_combined_table( + root_dir: str = ".", + glob_patterns: Optional[List[str]] = None, + kernel_whitelist: Optional[List[str]] = None, + verbose: bool = True, +) -> Optional[pd.DataFrame]: + if glob_patterns is None: + glob_patterns = ["**/*factor*kernel-run-data.csv"] + + files = find_csv_files(root_dir, glob_patterns) + if not files: + if verbose: + print("No files matching patterns {} found under '{}'".format(glob_patterns, root_dir)) + return None + + if verbose: + print("Found CSV files:") + for f in files: + print(" ", f) + + dfs = [] + required_cols = {KERNEL_COL, "Variant", PROBLEM_SIZE_COL, RAW_FLOPS_COL} + + for path in files: + df = read_single_csv(path) + if df is None: + continue + df = normalize_columns(df) + missing = required_cols - set(df.columns) + if missing: + if verbose: + print("[SKIP] {} missing required columns after normalization: {}".format(path, sorted(missing))) + print(" Columns present:", list(df.columns)) + continue + dfs.append(df) + + if not dfs: + if verbose: + print("No CSV files could be parsed with required columns.") + return None + + combined_df = pd.concat(dfs, ignore_index=True) + + combined_df[KERNEL_COL] = combined_df[KERNEL_COL].astype(str).str.strip() + combined_df["Variant"] = combined_df["Variant"].astype(str).str.strip() + + if kernel_whitelist: + wl = [w.lower() for w in kernel_whitelist] + kernel_series = combined_df[KERNEL_COL].fillna("").astype(str).str.lower() + mask = kernel_series.apply(lambda k: any(w in k for w in wl)) + combined_df = combined_df[mask] + if combined_df.empty: + if verbose: + print("After applying kernel_whitelist, no rows remain.") + return None + + combined_df[PROBLEM_SIZE_COL] = pd.to_numeric(combined_df[PROBLEM_SIZE_COL], errors="coerce") + combined_df[RAW_FLOPS_COL] = pd.to_numeric(combined_df[RAW_FLOPS_COL], errors="coerce") + if BANDWIDTH_COL in combined_df.columns: + combined_df[BANDWIDTH_COL] = pd.to_numeric(combined_df[BANDWIDTH_COL], errors="coerce") + + before_drop = len(combined_df) + combined_df = combined_df.dropna(subset=[PROBLEM_SIZE_COL, RAW_FLOPS_COL]) + dropped = before_drop - len(combined_df) + if verbose and dropped > 0: + print("[CLEAN] Dropped {} rows with non-numeric {} or {}.".format( + dropped, PROBLEM_SIZE_COL, RAW_FLOPS_COL)) + + combined_df["Backend"] = combined_df["Variant"].apply(classify_backend_from_variant) + combined_df["Tuning"] = combined_df.apply(classify_tuning, axis=1) + + combined_df[VARIANT_TUNING_COL] = ( + combined_df["Variant"].astype(str) + "-" + combined_df["Tuning"].astype(str) + ) + + return combined_df + +# ========================= +# Smoothing and saturation point utilities +# ========================= + +def moving_median_smooth(y, k: int = 5): + n = len(y) + if n == 0: + return [] + m = (k - 1) // 2 + y_smooth = [] + for i in range(n): + start = max(0, i - m) + end = min(n - 1, i + m) + window = y[start:end + 1] + y_smooth.append(statistics.median(window)) + return y_smooth + +def find_saturation_point(x, y_smooth, eps: float = 0.1, w: int = 3): + """ + Returns the index of the x value at the first saturation point, + or None if none is found. + First, the method sets y_max = max(y_smooth) and then tries to + find the first run of y_smooth values of length >= w where + y_smooth >= (1 - eps) * y_max. If such a run is found, the + function returns the index of the first y_smooth value in the run. + Second, if the first attempt does not find such a run, it defines + y_end = y_smooth[n - 1], where n is the length of y_smooth. Then, + it searches for the first run of y_smooth values of length >= w + where abs( (y_smooth[i] - y_end) / y_end ) <= eps. If such a run is + found, the function returns the index of the first y_smooth value + in the run. + """ + + if len(x) < w or len(y_smooth) < w: + return None + + n = len(y_smooth) + +# ========================= +# First attempt to find index of saturation point +# ========================= + y_max = max(y_smooth) + threshold = (1.0 - eps) * y_max + run_length = 0 + run_start_idx = None + + for i in range(n): + if y_smooth[i] >= threshold: + if run_length == 0: + run_start_idx = i + run_length += 1 + if run_length >= w: + return run_start_idx + else: + run_length = 0 + run_start_idx = None + +# ========================= +# Second attempt to find index of saturation point +# ========================= + + y_end = y_smooth[n - 1] + threshold = eps + run_length = 0 + run_start_idx = None + + for i in range(n): + if abs( (y_smooth[i] - y_end) / y_end ) <= threshold: + if run_length == 0: + run_start_idx = i + run_length += 1 + if run_length >= w: + return run_start_idx + else: + run_length = 0 + run_start_idx = None + + return None + +# ========================= +# Find the saturation point for single kernel +# ========================= + +def find_saturation_kernel( + df: pd.DataFrame, + kernel: str, + k: int = 5, + eps: float = 0.1, + w: int = 3, + save_dir: Optional[str] = None, +): + variants = df[VARIANT_TUNING_COL].unique() + + # generate smoothed data + if SMOOTH_FLOPS_COL not in df.columns: + df[SMOOTH_FLOPS_COL] = np.nan + + if SMOOTH_BW_COL not in df.columns: + df[SMOOTH_BW_COL] = np.nan + + for idx, variant in enumerate(variants): + subdf = df[df[VARIANT_TUNING_COL] == variant].copy() + subdf = subdf.sort_values(PROBLEM_SIZE_COL) + x = subdf[PROBLEM_SIZE_COL].astype(float).values + + if len(x) == 0: + continue + + if RAW_FLOPS_COL in subdf.columns: + y = subdf[RAW_FLOPS_COL].astype(float).values + + y_smooth = moving_median_smooth(list(y), k=k) + + for xi, yi in zip(x, y_smooth): + mask = ( + (df[VARIANT_TUNING_COL] == variant) + & (df[PROBLEM_SIZE_COL] == xi) + ) + df.loc[mask, SMOOTH_FLOPS_COL] = yi + + if BANDWIDTH_COL in subdf.columns: + y_bw = subdf[BANDWIDTH_COL].astype(float).values + + y_bw_smooth = moving_median_smooth(list(y_bw), k=k) + + for xi, yi in zip(x, y_bw_smooth): + mask = ( + (df[VARIANT_TUNING_COL] == variant) + & (df[PROBLEM_SIZE_COL] == xi) + ) + df.loc[mask, SMOOTH_BW_COL] = yi + + # FOM-style output as DataFrame + fom_rows = [] + for variant in variants: + subdf = df[df[VARIANT_TUNING_COL] == variant].copy() + subdf = subdf.sort_values(PROBLEM_SIZE_COL) + x = subdf[PROBLEM_SIZE_COL].astype(float).values + y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values + sat_size = "" + sat_flops = "" + sat_bw = "" + + sat_idx = find_saturation_point(x, y_smooth, eps, w) + + df.loc[df[VARIANT_TUNING_COL] == variant, SAT_IDX_COL] = sat_idx + + if sat_idx is not None: + sat_size = x[sat_idx] + mask = (subdf[PROBLEM_SIZE_COL] == sat_size) + + raw_at_sat = subdf.loc[mask, RAW_FLOPS_COL] + if not raw_at_sat.empty and pd.notna(raw_at_sat.iloc[0]): + sat_flops = raw_at_sat.iloc[0] + + if BANDWIDTH_COL in subdf.columns: + bw_at_sat = subdf.loc[mask, BANDWIDTH_COL] + if not bw_at_sat.empty and pd.notna(bw_at_sat.iloc[0]): + sat_bw = bw_at_sat.iloc[0] + + else: + print( + "[INFO] No saturation point found for kernel '{}' " + "variant '{}' (eps={}, w={})".format(kernel, variant, eps, w) + ) + + fom_rows.append({ + "Kernel": f"{kernel}-{variant}", + "Sat Problem Size": sat_size if sat_size != "" else "N/A", + "Sat GFLOP/s": sat_flops if sat_flops != "" else "N/A", + "Sat B/W (GiB per sec.)": sat_bw if sat_bw != "" else "N/A", + }) + + fom_df = pd.DataFrame(fom_rows) + if IN_NOTEBOOK and display is not None: + display(fom_df) + else: + print(fom_df.to_string(index=False)) + +# ========================= +# Plotting for a single kernel (FLOPs) with FOM output as DataFrame +# ========================= + +def plot_kernel( + df: pd.DataFrame, + kernel: str, + k: int = 5, + eps: float = 0.1, + w: int = 3, + save_dir: Optional[str] = None, +): + if RAW_FLOPS_COL not in df.columns: + print(f"[INFO] No '{RAW_FLOPS_COL}' column for kernel '{kernel}', skipping flops plot.") + return + + variants = df[VARIANT_TUNING_COL].unique() + + plt.figure(figsize=(18, 7)) + colors = plt.cm.tab10.colors + + ymin = 1e99 + ymax = -1e99 + + for idx, variant in enumerate(variants): + subdf = df[df[VARIANT_TUNING_COL] == variant].copy() + subdf = subdf.sort_values(PROBLEM_SIZE_COL) + x = subdf[PROBLEM_SIZE_COL].astype(float).values + y = subdf[RAW_FLOPS_COL].astype(float).values + y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values + + if len(x) == 0: + continue + + y_min = min(y) + if (y_min < ymin): + ymin = y_min + y_smooth_min = min(y_smooth) + if (y_smooth_min < ymin): + ymin = y_smooth_min + + y_max = max(y) + if (y_max > ymax): + ymax = y_max + y_smooth_max = max(y_smooth) + if (y_smooth_max > ymax): + ymax = y_smooth_max + + plt.plot( + x, y, "-", + label="{} (raw flops)".format(variant), + color=colors[idx % len(colors)], + marker="o", + markersize=8, + markerfacecolor=colors[idx % len(colors)], + markeredgewidth=0 + ) + + plt.plot( + x, y_smooth, "--", + label="{} (smoothed flops)".format(variant), + linewidth=3, + color=colors[idx % len(colors)], + marker="+", + markersize=10 + ) + + if SAT_IDX_COL in subdf.columns: + sat_idx = subdf[SAT_IDX_COL].astype(int).values[0] + + sat_x = x[sat_idx] + sat_y = y[sat_idx] + + plt.plot( + [sat_x], [sat_y], "-", + label="{} saturation ({}, {})".format(variant, sat_x, sat_y), + linewidth=0, + color=colors[idx % len(colors)], + marker="*", + markersize=30 + ) + + if ymin > 0: + ymin = 0 + yrange = ymax-ymin + yoverhang = yrange*0.1 + + plt.title("Kernel: {}".format(kernel), fontsize=22) + plt.xlabel("Problem size (bytes)", fontsize=18) + plt.ylabel("Mean flops (GFLOP per sec.)", fontsize=18) + plt.grid(True, which="both", linestyle="--", linewidth=1) + plt.xticks(fontsize=16) + plt.yticks(fontsize=16) + plt.ylim((ymin-yoverhang, ymax+yoverhang)) + plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), fontsize=14, frameon=False) + plt.tight_layout(rect=[0, 0, 0.75, 1]) + + if save_dir is not None: + ensure_dir(save_dir) + fname = os.path.join(save_dir, "{}_flops.png".format(sanitize_filename(kernel))) + plt.savefig(fname, dpi=200) + print("Saved plot to {}".format(fname)) + if IN_NOTEBOOK: + plt.show() + else: + plt.close() + +# ========================= +# Plotting for a single kernel (Bandwidth) +# ========================= + +def plot_kernel_bandwidth( + df: pd.DataFrame, + kernel: str, + k: int = 5, + eps: float = 0.1, + w: int = 3, + save_dir: Optional[str] = None, +): + if BANDWIDTH_COL not in df.columns: + print(f"[INFO] No '{BANDWIDTH_COL}' column for kernel '{kernel}', skipping bandwidth plot.") + return + + variants = df[VARIANT_TUNING_COL].unique() + + plt.figure(figsize=(18, 7)) + colors = plt.cm.tab10.colors + + ymin = 1e99 + ymax = -1e99 + + for idx, variant in enumerate(variants): + subdf = df[df[VARIANT_TUNING_COL] == variant].copy() + subdf = subdf.sort_values(PROBLEM_SIZE_COL) + x = subdf[PROBLEM_SIZE_COL].astype(float).values + y_bw = subdf[BANDWIDTH_COL].astype(float).values + y_bw_smooth = subdf[SMOOTH_BW_COL].astype(float).values + + if len(x) == 0: + continue + + y_bw_min = min(y_bw) + if (y_bw_min < ymin): + ymin = y_bw_min + y_bw_smooth_min = min(y_bw_smooth) + if (y_bw_smooth_min < ymin): + ymin = y_bw_smooth_min + + y_bw_max = max(y_bw) + if (y_bw_max > ymax): + ymax = y_bw_max + y_bw_smooth_max = max(y_bw_smooth) + if (y_bw_smooth_max > ymax): + ymax = y_bw_smooth_max + + plt.plot( + x, y_bw, "-", + label=f"{variant} (raw B/W)", + color=colors[idx % len(colors)], + marker="o", + markersize=8, + markerfacecolor=colors[idx % len(colors)], + markeredgewidth=0, + ) + + plt.plot( + x, y_bw_smooth, "--", + label=f"{variant} (smoothed B/W)", + linewidth=3, + color=colors[idx % len(colors)], + marker="+", + markersize=10 + ) + + if SAT_IDX_COL in subdf.columns: + sat_idx = subdf[SAT_IDX_COL].astype(int).values[0] + + sat_x = x[sat_idx] + sat_y = y_bw[sat_idx] + + plt.plot( + [sat_x], [sat_y], "-", + label="{} saturation ({}, {})".format(variant, sat_x, sat_y), + linewidth=0, + color=colors[idx % len(colors)], + marker="*", + markersize=30 + ) + + if ymin > 0: + ymin = 0 + yrange = ymax-ymin + yoverhang = yrange*0.1 + + plt.title(f"Kernel: {kernel} - Bandwidth", fontsize=22) + plt.xlabel("Problem size", fontsize=18) + plt.ylabel("Bandwidth (GiB per sec.)", fontsize=18) + plt.grid(True, which="both", linestyle="--", linewidth=1) + plt.xticks(fontsize=16) + plt.yticks(fontsize=16) + plt.ylim((ymin-yoverhang, ymax+yoverhang)) + plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), fontsize=14, frameon=False) + plt.tight_layout(rect=[0, 0, 0.75, 1]) + + if save_dir is not None: + ensure_dir(save_dir) + fname = os.path.join(save_dir, "{}_bandwidth.png".format(sanitize_filename(kernel))) + plt.savefig(fname, dpi=200) + print(f"Saved bandwidth plot to {fname}") + if IN_NOTEBOOK: + plt.show() + else: + plt.close() + +# ========================= +# Smooth and plot all kernels (FLOPs and Bandwidth) +# ========================= + +def smooth_and_plot_all_kernels( + df: pd.DataFrame, + k: int = 5, + eps: float = 0.1, + w: int = 3, + save_dir: Optional[str] = None, +) -> pd.DataFrame: + df = df.dropna(subset=[KERNEL_COL, PROBLEM_SIZE_COL, RAW_FLOPS_COL, VARIANT_TUNING_COL]).copy() + df[SMOOTH_FLOPS_COL] = np.nan + + if BANDWIDTH_COL in df.columns: + df[SMOOTH_BW_COL] = np.nan + + kernels = df[KERNEL_COL].unique() + print("Found {} kernels.".format(len(kernels))) + + for kernel in kernels: + tmp = df[df[KERNEL_COL] == kernel].copy() + + find_saturation_kernel(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir) + if SMOOTH_FLOPS_COL in tmp.columns: + df.loc[df[KERNEL_COL] == kernel, SMOOTH_FLOPS_COL] = tmp[SMOOTH_FLOPS_COL] + if SMOOTH_BW_COL in tmp.columns: + df.loc[df[KERNEL_COL] == kernel, SMOOTH_BW_COL] = tmp[SMOOTH_BW_COL] + + plot_kernel(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir) + + if BANDWIDTH_COL in df.columns: + plot_kernel_bandwidth(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir) + + return df + +# ========================= +# Per-kernel tables (raw and smoothed) +# ========================= + +def save_kernel_tables( + df: pd.DataFrame, + outdir: str = "kernel_tables", +) -> None: + ensure_dir(outdir) + kernels = df[KERNEL_COL].unique() + for kernel in kernels: + df_kernel = df[df[KERNEL_COL] == kernel] + raw_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=RAW_FLOPS_COL, + ) + raw_table = raw_table.sort_index() + raw_csv_path = os.path.join(outdir, "{}_flops_raw.csv".format(sanitize_filename(kernel))) + raw_table.to_csv(raw_csv_path) + smooth_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=SMOOTH_FLOPS_COL, + ) + smooth_table = smooth_table.sort_index() + smooth_csv_path = os.path.join(outdir, "{}_flops_smoothed.csv".format(sanitize_filename(kernel))) + smooth_table.to_csv(smooth_csv_path) + + if BANDWIDTH_COL in df_kernel.columns and SMOOTH_BW_COL in df_kernel.columns: + bw_raw_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=BANDWIDTH_COL, + ).sort_index() + bw_raw_csv_path = os.path.join(outdir, "{}_bandwidth_raw.csv".format(sanitize_filename(kernel))) + bw_raw_table.to_csv(bw_raw_csv_path) + + bw_smooth_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=SMOOTH_BW_COL, + ).sort_index() + bw_smooth_csv_path = os.path.join(outdir, "{}_bandwidth_smoothed.csv".format(sanitize_filename(kernel))) + bw_smooth_table.to_csv(bw_smooth_csv_path) + + print("Saved: {}, {}, {}, {}".format( + raw_csv_path, smooth_csv_path, bw_raw_csv_path, bw_smooth_csv_path)) + else: + print("Saved: {}, {}".format(raw_csv_path, smooth_csv_path)) + +# ========================= +# Per-kernel saturation curve data (raw + smoothed in one file) +# ========================= + +def save_saturation_curve_data( + df: pd.DataFrame, + outdir: str = "saturation-curve-data", +) -> None: + ensure_dir(outdir) + kernels = df[KERNEL_COL].unique() + for kernel in kernels: + df_kernel = df[df[KERNEL_COL] == kernel].copy() + raw_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=RAW_FLOPS_COL, + ) + smooth_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=SMOOTH_FLOPS_COL, + ) + raw_table = raw_table.sort_index() + smooth_table = smooth_table.reindex( + index=raw_table.index, + columns=raw_table.columns, + ) + combined = pd.DataFrame(index=raw_table.index) + for vt in raw_table.columns: + raw_col_name = f"{vt} (raw)" + smooth_col_name = f"{vt} (smoothed)" + combined[raw_col_name] = raw_table[vt] + combined[smooth_col_name] = smooth_table[vt] + + if BANDWIDTH_COL in df_kernel.columns and SMOOTH_BW_COL in df_kernel.columns: + bw_raw_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=BANDWIDTH_COL, + ).sort_index() + bw_smooth_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=SMOOTH_BW_COL, + ).sort_index() + bw_smooth_table = bw_smooth_table.reindex( + index=bw_raw_table.index, + columns=bw_raw_table.columns, + ) + for vt in bw_raw_table.columns: + bw_raw_col_name = f"{vt} (raw B/W)" + bw_smooth_col_name = f"{vt} (smoothed B/W)" + combined[bw_raw_col_name] = bw_raw_table[vt] + combined[bw_smooth_col_name] = bw_smooth_table[vt] + + combined = combined.reset_index().rename(columns={PROBLEM_SIZE_COL: "Problem size"}) + out_path = os.path.join(outdir, f"{sanitize_filename(kernel)}.csv") + combined.to_csv(out_path, index=False) + print(f"Saved saturation curve data: {out_path}") + +# ========================= +# Per-kernel FOM tables (saturation points, with units, NO Sat Smoothed B/W) +# ========================= + +def save_fom_tables( + df: pd.DataFrame, + eps: float = 0.1, + w: int = 3, + outdir: str = "FOM" +) -> None: + ensure_dir(outdir) + kernels = df[KERNEL_COL].unique() + for kernel in kernels: + df_kernel = df[df[KERNEL_COL] == kernel].copy() + variant_tunings = df_kernel[VARIANT_TUNING_COL].unique() + rows = [] + for vt in variant_tunings: + subdf = df_kernel[df_kernel[VARIANT_TUNING_COL] == vt].copy() + subdf = subdf.sort_values(PROBLEM_SIZE_COL) + x = subdf[PROBLEM_SIZE_COL].astype(float).values + y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values + sat_size = None + sat_flops_raw = "" + sat_bw = "" + + sat_idx = find_saturation_point(x, y_smooth, eps, w) + + if sat_idx is not None: + sat_size = x[sat_idx] + mask = (subdf[PROBLEM_SIZE_COL] == sat_size) + raw_at_sat = subdf.loc[mask, RAW_FLOPS_COL] + if not raw_at_sat.empty and pd.notna(raw_at_sat.iloc[0]): + sat_flops_raw = raw_at_sat.iloc[0] + if BANDWIDTH_COL in subdf.columns: + bw_at_sat = subdf.loc[mask, BANDWIDTH_COL] + bw_non_nan = bw_at_sat.dropna() + if not bw_non_nan.empty: + sat_bw = bw_non_nan.iloc[0] + else: + sat_bw = "" + else: + print( + "[WARN] Bandwidth column missing for kernel '{}' variant '{}' " + "while computing saturation".format(kernel, vt) + ) + sat_bw = "" + else: + print( + "[INFO] No saturation point found for kernel '{}' variant '{}' " + "when building FOM table".format(kernel, vt) + ) + + def na_if_empty(value): + return value if value not in [None, ""] else "N/A" + + rows.append({ + "Kernel": f"{kernel}-{vt}", + "Sat Problem Size": na_if_empty(sat_size), + "Sat GFLOP/s": na_if_empty(sat_flops_raw), + "Sat B/W (GiB per sec.)": na_if_empty(sat_bw), + }) + out_path = os.path.join(outdir, f"{sanitize_filename(kernel)}.csv") + pd.DataFrame(rows).to_csv(out_path, index=False) + print(f"Saved FOM table: {out_path}") + +# ========================= +# Main pipeline: callable from notebook or CLI +# ========================= + +def run_pipeline( + root_dir: str = ".", + output_dir: str = "./output", + glob_patterns: Optional[List[str]] = None, + kernel_whitelist: Optional[List[str]] = None, + smooth_window: int = 5, + saturation_eps: float = 0.1, + saturation_w: int = 3, +) -> Optional[pd.DataFrame]: + if glob_patterns is None: + glob_patterns = ["**/*factor*kernel-run-data.csv"] + + ensure_dir(output_dir) + fig_dir = os.path.join(output_dir, "figures") + ensure_dir(fig_dir) + + combined_df = build_combined_table( + root_dir=root_dir, + glob_patterns=glob_patterns, + kernel_whitelist=kernel_whitelist, + verbose=True, + ) + if combined_df is None: + print("No combined table created.") + return None + + combined_csv_path = os.path.join(output_dir, "combined_table.csv") + combined_df.to_csv(combined_csv_path, index=False) + print("[SAVE] Combined table saved to {}".format(combined_csv_path)) + + df_smoothed = smooth_and_plot_all_kernels( + combined_df, + k=smooth_window, + eps=saturation_eps, + w=saturation_w, + save_dir=fig_dir, + ) + + output_variant_tuning_path = os.path.join(output_dir, "output_with_variant_tuning.csv") + df_smoothed.to_csv(output_variant_tuning_path, index=False) + print("[SAVE] Smoothed combined table saved to {}".format(output_variant_tuning_path)) + + save_kernel_tables(df_smoothed, outdir=output_dir) + saturation_dir = os.path.join(output_dir, "saturation-curve-data") + save_saturation_curve_data(df_smoothed, outdir=saturation_dir) + fom_dir = os.path.join(output_dir, "FOM") + save_fom_tables(df_smoothed, saturation_eps, saturation_w, outdir=fom_dir) + + return df_smoothed + +# ========================= +# CLI entry point +# ========================= + +def main_cli(): + parser = argparse.ArgumentParser( + description="Combine benchmark CSVs, smooth FLOPs/Bandwidth and generate plots/tables." + ) + parser.add_argument("--root-dir", default=".", help="Root directory to search for CSV files (default: '.')") + parser.add_argument("--output-dir", default="./output", help="Directory to write outputs (default: './output')") + parser.add_argument( + "--glob-pattern", + action="append", + default=None, + help="Glob pattern for CSV files (can be repeated). Default: **/*factor*kernel-run-data.csv", + ) + parser.add_argument( + "--kernel-whitelist", + nargs="*", + default=None, + help="List of substrings to filter Kernel names by (case insensitive, default: none).", + ) + parser.add_argument("--smooth-window", type=int, default=5, help="Moving median window size (default: '5')") + parser.add_argument("--saturation-eps", type=float, default=0.1, help="Epsilon for saturation threshold (default: '0.1')") + parser.add_argument("--saturation-w", type=int, default=3, help="Consecutive points needed for saturation (default: '3')") + + args = parser.parse_args() + + run_pipeline( + root_dir=args.root_dir, + output_dir=args.output_dir, + glob_patterns=args.glob_pattern, + kernel_whitelist=args.kernel_whitelist, + smooth_window=args.smooth_window, + saturation_eps=args.saturation_eps, + saturation_w=args.saturation_w, + ) + +# ========================= +# Notebook helper (no argparse) +# ========================= + +def main_notebook( + root_dir: str = ".", + output_dir: str = "./output", + glob_patterns: Optional[List[str]] = None, + kernel_whitelist: Optional[List[str]] = None, + smooth_window: int = 5, + saturation_eps: float = 0.1, + saturation_w: int = 3, +): + return run_pipeline( + root_dir=root_dir, + output_dir=output_dir, + glob_patterns=glob_patterns, + kernel_whitelist=kernel_whitelist, + smooth_window=smooth_window, + saturation_eps=saturation_eps, + saturation_w=saturation_w, + ) + +if __name__ == "__main__": + if not IN_NOTEBOOK: + main_cli() diff --git a/scripts/benchmarking/run_tier1_cpx-mode.sh b/scripts/benchmarking/run_tier1_cpx-mode.sh new file mode 100755 index 000000000..9fb0f3db7 --- /dev/null +++ b/scripts/benchmarking/run_tier1_cpx-mode.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +## Run all 'Priority 1' benchmark kernels for GPU, non-lambda variants only +## on 4 MPI ranks and dump the results in the specified directory. + +flux alloc -xN1 --amd-gpumode=CPX -t 45 bash -c ' + +OUTDIR=RPBenchmarkTestMPI_tier1-CPX + +# Reference memory footprint for SPX mode (MI300A) +# 256MB MALL on MI300A --> 256 * 1024 * 1024 = 268435456 bytes +#BASEMEM=268435456 + +# Reference memory footprint for CPX mode (MI300A) +# 256MB MALL on MI300A --> 256 * 1024 * 1024 / 6 = 44739242 bytes +BASEMEM=44739242 + +# Collection of problem size factors to run +FACTORS=(1 2 4 8 10 12 16 20) + +# List of kernels to run +KERNELS=("DIFFUSION3DPA" + "EDGE3D" + "ENERGY" + "INTSC_HEXRECT" + "MASS3DEA" + "MASS3DPA_ATOMIC" + "MASSVEC3DPA" + "MATVEC_3D_STENCIL" + "NODAL_ACCUMULATION_3D" + "VOL3D") + +for KERNEL_NAME in "${KERNELS[@]}"; do + echo "Running kernel: $KERNEL_NAME" + for factor in "${FACTORS[@]}"; do + mem=$((factor * BASEMEM)) + echo " Running with memory = $mem" + flux run -xN1 -n4 ./bin/raja-perf.exe \ + -k "$KERNEL_NAME" \ + --npasses 1 \ + --npasses-combiners Average Minimum Maximum \ + --outdir ${OUTDIR} \ + --outfile "${KERNEL_NAME}_factor_${factor}" \ + --memory-allocated "$mem" \ + --warmup-perfrun-same \ + -ev Seq Lambda + done +done +' + diff --git a/scripts/benchmarking/run_tier1_spx-mode.sh b/scripts/benchmarking/run_tier1_spx-mode.sh new file mode 100755 index 000000000..e16f06534 --- /dev/null +++ b/scripts/benchmarking/run_tier1_spx-mode.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +## Run all 'Priority 1' benchmark kernels for GPU, non-lambda variants only +## on 4 MPI ranks and dump the results in the specified directory. + +flux alloc -xN1 -t 45 bash -c ' + +OUTDIR=RPBenchmarkTestMPI_tier1-SPX + +# Reference memory footprint for SPX mode (MI300A) +# 256MB MALL on MI300A --> 256 * 1024 * 1024 = 268435456 bytes +BASEMEM=268435456 + +# Reference memory footprint for CPX mode (MI300A) +# 256MB MALL on MI300A --> 256 * 1024 * 1024 / 6 = 44739242 bytes +#BASEMEM=44739242 + +# Collection of problem size factors to run +FACTORS=(1 2 4 8 10 12 16 20) + +# List of kernels to run +KERNELS=("DIFFUSION3DPA" + "EDGE3D" + "ENERGY" + "INTSC_HEXRECT" + "MASS3DEA" + "MASS3DPA_ATOMIC" + "MASSVEC3DPA" + "MATVEC_3D_STENCIL" + "NODAL_ACCUMULATION_3D" + "VOL3D") + +for KERNEL_NAME in "${KERNELS[@]}"; do + echo "Running kernel: $KERNEL_NAME" + for factor in "${FACTORS[@]}"; do + mem=$((factor * BASEMEM)) + echo " Running with memory = $mem" + flux run -xN1 -n4 ./bin/raja-perf.exe \ + -k "$KERNEL_NAME" \ + --npasses 1 \ + --npasses-combiners Average Minimum Maximum \ + --outdir ${OUTDIR} \ + --outfile "${KERNEL_NAME}_factor_${factor}" \ + --memory-allocated "$mem" \ + --warmup-perfrun-same \ + -ev Seq Lambda + done +done +' + diff --git a/scripts/benchmarking/study_run_kernel_tunings.py b/scripts/benchmarking/study_run_kernel_tunings.py new file mode 100755 index 000000000..73a0d0d03 --- /dev/null +++ b/scripts/benchmarking/study_run_kernel_tunings.py @@ -0,0 +1,537 @@ +import os +import glob +import numpy as np +import pandas as pd +import argparse + +# ============= Configuration ============= + +ROOT_DIR = "." # change if needed + +# Use "factor" instead of "mref" in file patterns +GLOB_PATTERNS = [ + "**/*factor*kernel-run-data.csv", # broad match +] + +# Optional filter to only keep specific kernels by substring match (case-insensitive) +# Leave empty to include all kernels discovered. +KERNEL_WHITELIST = [ + # "MASS3DPA", +] + +# Derivative reporting configuration +DERIV_USE_RELATIVE = True +DERIV_EPS_REL = 0.03 # relative threshold on |dy/dx| normalized by (y_range/x_range) +DERIV_EPS_ABS = 1e-4 # absolute threshold on |dy/dx|, only used if DERIV_USE_RELATIVE=False +DERIV_MIN_CONSEC = 3 # minimum consecutive points below threshold to consider a plateau run +DERIV_SMOOTH_WINDOW = 3 # moving average window for smoothing y before derivative +DERIV_MIN_FRAC_OF_MAX_Y = 0.9 # only search after reaching this fraction of max(y) +DERIV_REPORT_MAX_POINTS = 8 # limit how many points to print per series +DERIV_REPORT_ABS = True # print |dy/dx| if True, else print signed dy/dx + + +FIG_DPI = 300 +SHOW_PLOTS = True # show interactive windows while also saving PNGs + +# Use a non-interactive backend only when not showing plots +if not SHOW_PLOTS: + import matplotlib + matplotlib.use("Agg") + +import matplotlib.pyplot as plt + + +# ============= Helper functions ============= + +def ensure_dir(path): + os.makedirs(path, exist_ok=True) + +def sanitize_filename(text): + return "".join(c if c.isalnum() or c in "-_." else "_" for c in str(text)) + +def find_csv_files(root_dir, patterns): + """Recursively find CSV files matching any of the given patterns.""" + all_files = [] + for pattern in patterns: + search_pattern = os.path.join(root_dir, pattern) + files = glob.glob(search_pattern, recursive=True) + all_files.extend(files) + all_files = sorted(set(all_files)) # Remove duplicates and sort + return all_files + +def _likely_header_score(line): + """ + Score a potential header line based on presence of common column tokens. + Higher is more likely to be the header. + """ + tokens = [ + "Kernel", + "Variant", + "Problem size", + "Problem Size", + "Mean flops", + "GFlop", + "GFLOP", + "GFLOPs", + "GFLOPS", + ] + score = 0 + for t in tokens: + if t in line: + score += 1 + return score + +def read_single_csv(path): + """ + Read one CSV, trying to detect the header row by locating a line + that contains key column names. Returns a DataFrame or None. + """ + try: + with open(path, "r", encoding="utf-8") as f: + lines = f.readlines() + except Exception as e: + print(f"Failed to read {path}: {e}") + return None + + header_idx = None + best_score = -1 + for i, line in enumerate(lines[:50]): # only inspect the first 50 lines + score = _likely_header_score(line) + if score > best_score: + best_score = score + header_idx = i + + if header_idx is None: + print(f"Could not find header in {path}, skipping.") + return None + + try: + df = pd.read_csv(path, header=header_idx) + except Exception as e: + print(f"Failed to parse CSV {path}: {e}") + return None + + df["__source_file__"] = path + return df + +def normalize_columns(df): + """ + Normalize common column names to a standard set if possible. + """ + candidates = { + # Standard name : possible variants + "Kernel": ["Kernel", "Kernel name", "Benchmark", "Test"], + "Variant": ["Variant", "Implementation", "Policy", "Config", "Backend", "Suite"], + "Problem size": [ + "Problem size", "Problem Size", "Size", "N", "DOF", "Elements", + "ProblemSize", "Problem-size" + ], + "Mean flops (gigaFLOP per sec.)": [ + "Mean flops (gigaFLOP per sec.)", + "Mean flops (GFlop/s)", + "Mean Flops (GFlop/s)", + "GFLOP/s", "GFLOPs/s", "GFLOPS", "GFlops/s", "GFlop/s", "GF/s", + "Mean GFLOP/s", "Mean GFLOPs/s" + ], + } + + new_col_map = {} + # strip whitespace from existing columns first + df = df.rename(columns={c: c.strip() for c in df.columns}) + + for standard_name, names in candidates.items(): + for c in names: + if c in df.columns: + new_col_map[c] = standard_name + break # first match wins + + df = df.rename(columns=new_col_map) + return df + +def _moving_average(y, window): + if window is None or window <= 1 or len(y) < 3: + return y + window = max(2, int(window)) + kernel = np.ones(window, dtype=float) / float(window) + return np.convolve(y, kernel, mode="same") + +def _find_first_run(mask, min_len): + """Return the start index and run length of the first run of True with length >= min_len.""" + run = 0 + for i, v in enumerate(mask): + if v: + run += 1 + if run >= min_len: + start = i - run + 1 + j = i + 1 + while j < len(mask) and mask[j]: + j += 1 + return start, j - start + else: + run = 0 + return None, 0 + +def classify_backend_from_variant(variant): + """ + Heuristic classification of backend based on the Variant string. + Captures common cases even if names do not end with specific suffixes. + """ + s = str(variant).strip() + low = s.lower() + if "hip" in low: + return "HIP" + if "cuda" in low: + return "CUDA" + if "openmp" in low or low.endswith("_omp") or " omp" in low or low.startswith("omp"): + return "OpenMP" + if "seq" in low or "serial" in low or "baseline" in low or "sequential" in low: + return "Seq" + return "Unknown" + +# NEW: classify tuning; adjust logic to match your actual naming scheme +def classify_tuning(row): + """ + Return a tuning label for a row. + You can customize this to use any available columns. + Examples: + - A dedicated 'Tuning' column + - Parsing 'Variant' into backend + tuning + - Using problem-size or factor strings + For now, use: + - If a 'Tuning' column exists, use that + - Else, if '__source_file__' used different settings, use its basename + - Else, return 'default' + """ + # If the CSV already has a Tuning column, use it + if "Tuning" in row and pd.notna(row["Tuning"]): + return str(row["Tuning"]).strip() + + # Otherwise, derive from source file name as a proxy for tuning + src = row.get("__source_file__", "") + if isinstance(src, str) and src: + return os.path.basename(src) + + return "default" + +def report_near_zero_derivative_points( + x, + y, + backend_label, + kernel, + variant, + tuning_label, + use_relative=DERIV_USE_RELATIVE, + eps_rel=DERIV_EPS_REL, + eps_abs=DERIV_EPS_ABS, + min_consecutive=DERIV_MIN_CONSEC, + smooth_window=DERIV_SMOOTH_WINDOW, + min_frac_of_max_y=DERIV_MIN_FRAC_OF_MAX_Y, + max_points=DERIV_REPORT_MAX_POINTS, + report_abs=DERIV_REPORT_ABS, +): + """ + Prints lines "Problem size=, dy/dx=" for points with small enough derivative. + Uses either a relative threshold or an absolute slope threshold. + Focuses on the near-peak region to avoid early flat areas. + """ + x = np.asarray(x, dtype=float) + y = np.asarray(y, dtype=float) + + # Aggregate duplicate x values by averaging y + if len(x) != len(np.unique(x)): + tmp = pd.DataFrame({"x": x, "y": y}).groupby("x", as_index=False)["y"].mean() + x = tmp["x"].values + y = tmp["y"].values + + # Sort by x + order = np.argsort(x) + x = x[order] + y = y[order] + + if len(x) < max(3, min_consecutive): + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: not enough points for derivative analysis" + ) + return + + # Optional smoothing + y_sm = _moving_average(y, smooth_window) + + x_range = float(x.max() - x.min()) + if x_range == 0.0: + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: zero x-range, cannot compute derivative" + ) + return + + deriv = np.gradient(y_sm, x) # dy/dx, same length as x + + # Restrict to near-peak region if requested + search_mask = np.ones_like(deriv, dtype=bool) + y_range = float(y_sm.max() - y_sm.min()) + if min_frac_of_max_y is not None and 0.0 < min_frac_of_max_y < 1.0 and y_range > 0: + thresh_y = y_sm.max() * float(min_frac_of_max_y) + search_mask = y_sm >= thresh_y + + if use_relative: + # Normalize slope by typical scale y_range/x_range for a dimensionless measure + norm_factor = (y_range / x_range) if y_range > 0 else 1.0 + deriv_norm = np.abs(deriv) / norm_factor + near_zero_mask = (deriv_norm <= float(eps_rel)) & search_mask + else: + near_zero_mask = (np.abs(deriv) <= float(eps_abs)) & search_mask + + # Prefer the first sustained run of small derivatives + start_idx, run_len = _find_first_run(near_zero_mask, int(min_consecutive)) + + if start_idx is not None: + run_indices = np.arange(start_idx, start_idx + run_len) + # Downsample to at most max_points for readability + if len(run_indices) > max_points: + picks_rel = np.linspace(0, len(run_indices) - 1, num=max_points) + pick = run_indices[np.round(picks_rel).astype(int)] + else: + pick = run_indices + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: sustained near-zero derivative region found, points={run_len}" + ) + else: + # Fallback: choose up to max_points with smallest slope in the search region + candidates = np.where(search_mask)[0] + if candidates.size == 0: + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: no valid search region for derivative analysis" + ) + return + + if use_relative: + norm_factor = (y_range / x_range) if y_range > 0 else 1.0 + deriv_norm = np.abs(deriv) / (norm_factor if norm_factor > 0 else 1.0) + order_c = np.argsort(deriv_norm[candidates]) + else: + order_c = np.argsort(np.abs(deriv[candidates])) + pick = candidates[order_c[:max_points]] + print( + f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: no sustained plateau, showing {len(pick)} smallest-slope points" + ) + + # Ensure sorted by x before printing + pick = np.array(sorted(pick.tolist())) + + # Print lines in the requested format + for idx in pick: + dy = deriv[idx] + dy_out = abs(dy) if report_abs else dy + print(f" Problem size={x[idx]:.6g}, dy/dx={dy_out:.6g}") + +def plot_backend(df_backend, backend_label, fig_dir, show_plots, fig_dpi): + """ + For a given backend: + - One figure per Kernel with solid line and markers per (Variant, Tuning). + - Prints derivative-based small-slope points per (Variant, Tuning). + - Saves each figure as PNG. + - Shows figures interactively if requested. + """ + if df_backend.empty: + print(f"\nNo data for backend {backend_label}.") + return + + kernels = sorted(df_backend["Kernel"].dropna().unique()) + + # Unique (Variant, Tuning) combos for color/marker assignments + vt_pairs = sorted( + df_backend[["Variant", "Tuning"]] + .dropna() + .drop_duplicates() + .itertuples(index=False, name=None) + ) + + cmap = plt.cm.tab20 + num_pairs = max(1, len(vt_pairs)) + colors = [cmap(i % cmap.N) for i in range(num_pairs)] + markers = ["o", "s", "^", "D", "v", ">", "<", "P", "X"] # repeat if needed + + style_map = {} + for idx, (variant, tuning) in enumerate(vt_pairs): + color = colors[idx % len(colors)] + marker = markers[idx % len(markers)] + style_map[(variant, tuning)] = (color, marker) + + for kernel in kernels: + df_k = df_backend[df_backend["Kernel"] == kernel] + if df_k.empty: + continue + + fig = plt.figure(figsize=(10, 6)) + + # group by Variant and Tuning to get separate curves + for (variant, tuning), g in df_k.groupby(["Variant", "Tuning"]): + g_sorted = g.sort_values("Problem size") + x = g_sorted["Problem size"].values + y = g_sorted["Mean flops (gigaFLOP per sec.)"].values + + color, marker = style_map.get((variant, tuning), ("black", "o")) + + label = f"{variant} | {tuning}" + + plt.plot( + x, + y, + marker=marker, + linestyle="-", + color=color, + label=label, + ) + + # Derivative-based report of small-slope points + report_near_zero_derivative_points( + x, + y, + backend_label, + kernel, + variant, + tuning, + use_relative=DERIV_USE_RELATIVE, + eps_rel=DERIV_EPS_REL, + eps_abs=DERIV_EPS_ABS, + min_consecutive=DERIV_MIN_CONSEC, + smooth_window=DERIV_SMOOTH_WINDOW, + min_frac_of_max_y=DERIV_MIN_FRAC_OF_MAX_Y, + max_points=DERIV_REPORT_MAX_POINTS, + report_abs=DERIV_REPORT_ABS, + ) + + plt.xlabel("Problem size") + plt.ylabel("Mean flops (gigaFLOP per sec.)") + + # TITLE MODIFIED: explicitly list backend + plt.title(f"Kernel: {kernel} | Backend: {backend_label}") + + plt.grid(True) + plt.tight_layout() + plt.legend(fontsize="small", bbox_to_anchor=(1.05, 1), loc="upper left") + + # Save figure as PNG + kernel_safe = sanitize_filename(kernel) + backend_safe = sanitize_filename(backend_label) + fname = f"{backend_safe}_Kernel-{kernel_safe}.png" + fig_path = os.path.join(fig_dir, fname) + plt.savefig(fig_path, dpi=fig_dpi, bbox_inches="tight") + print(f"[SAVE] Figure saved to: {fig_path}") + + if show_plots: + plt.show() + else: + plt.close(fig) + +# ============= Main logic ============= + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--output-dir", required=True, help="Output directory") + args = parser.parse_args() + # Output and plotting configuration + OUTPUT_DIR = args.output_dir + FIG_DIR = os.path.join(OUTPUT_DIR, "figures") + COMBINED_CSV_PATH = os.path.join(OUTPUT_DIR, "combined_table.csv") + ensure_dir(OUTPUT_DIR) + ensure_dir(FIG_DIR) + + files = find_csv_files(ROOT_DIR, GLOB_PATTERNS) + if not files: + print(f"No files matching patterns {GLOB_PATTERNS} found under '{ROOT_DIR}'") + return + + print("Found CSV files:") + for f in files: + print(" ", f) + + dfs = [] + for path in files: + df = read_single_csv(path) + if df is None: + continue + df = normalize_columns(df) + + # Verify required columns exist post-normalization, else report and skip + required_any = {"Kernel", "Variant", "Problem size", "Mean flops (gigaFLOP per sec.)"} + if not required_any.issubset(set(df.columns)): + print(f"[SKIP] {path} missing required columns after normalization.") + print(" Columns present:", list(df.columns)) + continue + + dfs.append(df) + + if not dfs: + print("No CSV files could be parsed with required columns.") + return + + combined_df = pd.concat(dfs, ignore_index=True) + + # Basic cleaning + combined_df["Kernel"] = combined_df["Kernel"].astype(str).str.strip() + combined_df["Variant"] = combined_df["Variant"].astype(str).str.strip() + + # Optional kernel filter + if KERNEL_WHITELIST: + wl = [w.lower() for w in KERNEL_WHITELIST] + combined_df = combined_df[ + combined_df["Kernel"].str.lower().apply(lambda k: any(w in k for w in wl)) + ] + if combined_df.empty: + print("After applying KERNEL_WHITELIST, no rows remain.") + return + + # Convert numeric columns + combined_df["Problem size"] = pd.to_numeric(combined_df["Problem size"], errors="coerce") + combined_df["Mean flops (gigaFLOP per sec.)"] = pd.to_numeric( + combined_df["Mean flops (gigaFLOP per sec.)"], errors="coerce" + ) + + # Drop rows without x or y + before_drop = len(combined_df) + combined_df = combined_df.dropna(subset=["Problem size", "Mean flops (gigaFLOP per sec.)"]) + dropped = before_drop - len(combined_df) + if dropped > 0: + print(f"[CLEAN] Dropped {dropped} rows with non-numeric Problem size or Mean flops.") + + # Backend classification + combined_df["Backend"] = combined_df["Variant"].apply(classify_backend_from_variant) + + # NEW: derive Tuning column + combined_df["Tuning"] = combined_df.apply(classify_tuning, axis=1) + + # Save concatenated table to CSV, now including Backend and Tuning + ensure_dir(os.path.dirname(COMBINED_CSV_PATH)) + combined_df.to_csv(COMBINED_CSV_PATH, index=False) + print(f"[SAVE] Combined table saved to: {COMBINED_CSV_PATH}") + + # Quick summary to help verify MASS3DPA is present + print("\nKernels discovered:") + print(sorted(combined_df["Kernel"].unique())) + + print("\nCounts by Kernel and Backend:") + summary = ( + combined_df.groupby(["Kernel", "Backend"]) + .size() + .reset_index(name="rows") + .sort_values(["Kernel", "Backend"]) + ) + for _, row in summary.iterrows(): + print(f" Kernel={row['Kernel']}, Backend={row['Backend']}: rows={row['rows']}") + + # Plot aggregated "All" view so kernels appear even if backend classification is Unknown + print("\n[Plot] Generating 'All' plots per kernel...") + plot_backend(combined_df, "All", FIG_DIR, SHOW_PLOTS, FIG_DPI) + + # Plot per requested backends, only if data exists + for b in ["CUDA", "HIP", "Seq", "OpenMP"]: + df_b = combined_df[combined_df["Backend"] == b] + if df_b.empty: + print(f"[Plot] Skipping backend {b}, no rows.") + continue + print(f"\n[Plot] Generating '{b}' plots per kernel...") + plot_backend(df_b, b, FIG_DIR, SHOW_PLOTS, FIG_DPI) + +if __name__ == "__main__": + main() diff --git a/scripts/benchmarking/study_saturation.py b/scripts/benchmarking/study_saturation.py new file mode 100644 index 000000000..a7dce682a --- /dev/null +++ b/scripts/benchmarking/study_saturation.py @@ -0,0 +1,191 @@ +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np +import statistics +import os +import argparse + + +SMOOTH_FLOPS_COL_NAME="Smoothed Mean flops (gigaFLOP per sec.)" +RAW_FLOPS_COL = 'Mean flops (gigaFLOP per sec.)' +SMOOTH_FLOPS_COL = 'Smoothed Mean flops (gigaFLOP per sec.)' +PROBLEM_SIZE_COL = 'Problem size' +VARIANT_TUNING_COL = 'Variant_Tuning' +KERNEL_COL = 'Kernel' + +def moving_median_smooth(y, k=5): + """ + Apply a moving median filter with window size k. + Returns a list of smoothed values. + """ + n = len(y) + m = (k - 1) // 2 + y_smooth = [] + for i in range(n): + start = max(0, i - m) + end = min(n - 1, i + m) + window = y[start:end + 1] + y_smooth.append(statistics.median(window)) + return y_smooth + +def find_saturation_point(x, y_smooth, eps=0.1, w=3): + """ + Find the smallest x_i where y_smooth stays above (1-eps)*y_max for w consecutive points. + Returns the corresponding x value or None if not found. + """ + y_max = max(y_smooth) + threshold = (1.0 - eps) * y_max + n = len(y_smooth) + run_length = 0 + run_start_idx = None + for i in range(n): + if y_smooth[i] >= threshold: + if run_length == 0: + run_start_idx = i + run_length += 1 + if run_length >= w: + return x[run_start_idx] + else: + run_length = 0 + run_start_idx = None + return None + +def plot_kernel(df, kernel, k=5, eps=0.1, w=3, save_dir=None): + """ + Plot throughput curves for each variant of a kernel. + Raw data: solid dots. Smoothed: dashed lines. + Wide, PowerPoint-friendly plot. + """ + plt.figure(figsize=(18, 7)) # Wider plot + variants = df['Variant_Tuning'].unique() + report = [] + colors = plt.cm.tab10.colors + for idx, variant in enumerate(variants): + subdf = df[df['Variant_Tuning'] == variant].copy() + subdf = subdf.sort_values('Problem size') + x = subdf['Problem size'].astype(float).values + y = subdf['Mean flops (gigaFLOP per sec.)'].astype(float).values + y_smooth = moving_median_smooth(list(y), k=k) + for xi, yi in zip(x, y_smooth): + df.loc[(df['Variant_Tuning'] == variant) & (df['Problem size'] == xi), SMOOTH_FLOPS_COL_NAME] = yi + # Raw data points: solid dots + plt.plot( + x, y, '-o', + label=f"{variant} (raw)", + markersize=8, + color=colors[idx % len(colors)], + markerfacecolor=colors[idx % len(colors)], + markeredgewidth=0 + ) + # Smoothed curve: dashed line + plt.plot( + x, y_smooth, '--', + label=f"{variant} (smoothed)", + linewidth=3, + color=colors[idx % len(colors)] + ) + y_max = max(y_smooth) + sat_x = find_saturation_point(x, y_smooth, eps=eps, w=w) + report.append({ + "Variant_Tuning": variant, + "y_max": y_max, + "saturation_x": sat_x + }) + + plt.title(f"Kernel: {kernel}", fontsize=22) + plt.xlabel("Problem size", fontsize=18) + plt.ylabel("Mean flops (gigaFLOP per sec.)", fontsize=18) + plt.grid(True, which='both', linestyle='--', linewidth=1) + plt.xticks(fontsize=16) + plt.yticks(fontsize=16) + plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1), fontsize=16, frameon=False) + plt.tight_layout(rect=[0, 0, 0.75, 1]) + + if save_dir: + import os + os.makedirs(save_dir, exist_ok=True) + fname = f"{save_dir}/{kernel}.png" + plt.savefig(fname, dpi=200) + print(f"Saved plot to {fname}") + plt.show() + + print(f"Kernel: {kernel}") + print(f"eps: {eps}, w: {w}") + print("Variant_Tuning | Global max of smoothed curve (y_max) | Saturation point (x)") + for r in report: + sat_str = f"{r['saturation_x']:.2f}" if r['saturation_x'] is not None else "None" + print(f"{r['Variant_Tuning']:16} | {r['y_max']:.6g} | {sat_str}") + +def smooth_and_plot_all_kernels(df, k=5, eps=0.1, w=3, save_dir=None): + """ + Load data and plot all kernels. + """ + # Drop rows missing key columns + df = df.dropna(subset=['Kernel', 'Problem size', 'Mean flops (gigaFLOP per sec.)', 'Variant_Tuning']) + df[SMOOTH_FLOPS_COL_NAME] = np.nan + kernels = df['Kernel'].unique() + print(f"Found {len(kernels)} kernels.") + for kernel in kernels: + tmp = df[df['Kernel'] == kernel] + plot_kernel(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir) + df.loc[df['Kernel'] == kernel, SMOOTH_FLOPS_COL_NAME] = tmp[SMOOTH_FLOPS_COL_NAME] + + return df + +def save_kernel_tables(df, outdir='kernel_tables'): + os.makedirs(outdir, exist_ok=True) + kernels = df[KERNEL_COL].unique() + for kernel in kernels: + df_kernel = df[df[KERNEL_COL] == kernel] + + # Pivot for raw FLOP/s + raw_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=RAW_FLOPS_COL + ) + raw_table = raw_table.sort_index() + raw_csv_path = os.path.join(outdir, f"{kernel}_raw.csv") + raw_table.to_csv(raw_csv_path) + + # Pivot for smoothed FLOP/s + smooth_table = df_kernel.pivot_table( + index=PROBLEM_SIZE_COL, + columns=VARIANT_TUNING_COL, + values=SMOOTH_FLOPS_COL + ) + smooth_table = smooth_table.sort_index() + smooth_csv_path = os.path.join(outdir, f"{kernel}_smoothed.csv") + smooth_table.to_csv(smooth_csv_path) + + print(f"Saved: {raw_csv_path}, {smooth_csv_path}") + + +parser = argparse.ArgumentParser() +parser.add_argument("--output-dir", required=True, help="Output directory") +args = parser.parse_args() + +OUTPUT_DIR = args.output_dir +FOM_DIR = os.path.join(OUTPUT_DIR ) +FIG_DIR = os.path.join(OUTPUT_DIR, "figures") +COMBINED_CSV_PATH = os.path.join(OUTPUT_DIR, "combined_table.csv") +OUTPUT_VARIANT_TUNING = os.path.join(OUTPUT_DIR, "output_with_variant_tuning.csv") + +# Assume 'df' is your DataFrame loaded from the text file +# For example: +df = pd.read_csv(COMBINED_CSV_PATH, delimiter=',') # Adjust delimiter as needed + +# Create the new column +df['Variant_Tuning'] = df['Variant'] + '-' + df['Tuning'] + +# Example: Show the first few rows +# print(df[['Variant', 'Tuning', 'Variant_Tuning']].head()) + +# Calculate smoothed values, and plot them alongside raw values. +df = smooth_and_plot_all_kernels(df, k=5, eps=0.1, w=3, save_dir=FIG_DIR) +df.to_csv(OUTPUT_VARIANT_TUNING, index=False) + +save_kernel_tables(df,outdir=OUTPUT_DIR) + +# Usage: +# save_kernel_tables(df, outdir='my_output_dir') diff --git a/scripts/benchmarking/workflow-todo.txt b/scripts/benchmarking/workflow-todo.txt new file mode 100644 index 000000000..e8458e9a3 --- /dev/null +++ b/scripts/benchmarking/workflow-todo.txt @@ -0,0 +1,111 @@ +Desired Workflow and TODO List for Generating Baselines: +Also a Description of What We Did and an Example for Vendors +================================================================= + +Desired Workflow +---------------- + +1) Execute script to run kernels over pre-defined problem sizes + (a la throughput study) and generate data for execution timings, + FLOP rates, mem B/W, etc. + +2) Execute script to process data and generate result files and plots: + a) File containing CSV table of relevant run data for all kernels run + b) Per-kernel file containing a CSV table of raw and smoothed throughput + curve data points (FLOP/s as a function of problem size) + c) FOM file containing a CSV table with a row for each kernel, variant, and + tuning run. Columns: + -- Kernel name (kernel-variant-tuning) + -- Saturation Problem Size (computed from smoothed throughput curve) + -- Saturation FLOP/s (raw FLOP/s at saturation problem size) + -- Saturation Mem B/W (mem B/W at saturation problem size) + + + +Tasks to do +----------- + +0a) We don't want a single script to run AND process data +0b) We want one data processing script that generates all the files described + in 2 above. + +1) Run scripts + - These are easy. Via command-line args, we can generate per-kernel and + aggregated files containing all necessary data for the analyses we want. + + - One script per architecture? + - Each script will take command line args for kernel subset to run + (tier1 or tier2) and run mode, if relevant (e.g., on ATS-4: SPX mode + and CPX mode). + +2) Data processing script + + -- Script should take two args: + - arg1 input data directory (where date files from run live) + - arg2 output directory (where to put processed data files) + + 2a) The script 'study_run_kernel_tunings.py' can generate what we want for + 2a above (combined_table.csv file) + 2b) The script 'study_saturation.py' generates something close to what we + want for 2b above. It makes two files for each kernel: one with a CVS + table of raw throughput data points and one for smoothed curve data. + + Script needs to be modified to put data for both curves in one file: + + For example, KernelName-saturation-curve-data.csv contains + + Problem Size, Variant1-Tuning1 (raw), Variant1-Tuning1 (smoothed), ... + , , , ... + , , , ... + , , , ... + ..... + + 2c) We need a script to generate a FOM file that contains a CSV table like + this: + + Kernel , Sat Problem Size, Sat FLOP/s , Sat B/W + Name-Variant1-Tuning1, , , + Name-Variant2-Tuning1, , , + ...... + Name-VariantN-TuningM, , , + ..... + Etc. for each kernel, variant, and tuning run. + + Notes: + - Saturation problem size is determined from smoothed data + (see algorithm above) + - Saturation FLOP/s and B/W are raw data values at the saturation + problem size + + +Smoothed Throughput Curves +-------------------------- +A method to approximate sauration point for noisy or non-monotonic +throughput curves.... + +Assume we have a set of x and y values: + +x0, x1, x2, ..., xN (e.g. problem size) +y0, y1, y2, ..., yN (e.g. FLOP / sec) + +Step 1: Smooth curve via moving median method (robust to outliers, + a few extreme values do not affect the result much) + +Choose window size k (odd integer such as 3, 5, or 7) +Define m = (k - 1) / 2 + +Define smooth curve with same x values as above, but with +y values y_smooth_i defined as: +For i in 0..N: + window indices = { max(0, i-m), ..., min(N, i+m) } + window values = x[window indices] + y_smooth[i] = median(window_values) + +Step 2: Find global max of smoothed curve + +y_max = max(y_smooth[i]) i = 0..N + +Step 3: Approximate saturation point + +smallest x_i value where y_smooth[i] >= (1 - eps) * y_max +for w consecutive i values; e.g., eps = 0.1 and w = 3