diff --git a/scripts/benchmarking/artv3_fullscript.py b/scripts/benchmarking/artv3_fullscript.py
new file mode 100644
index 000000000..fda378ce9
--- /dev/null
+++ b/scripts/benchmarking/artv3_fullscript.py
@@ -0,0 +1,866 @@
+#!/usr/bin/env python3
+import os
+import glob
+import statistics
+import argparse
+from typing import List, Optional
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+# Try to import IPython display for notebook detection
+try:
+    from IPython.display import display
+    from IPython import get_ipython
+    IN_NOTEBOOK = get_ipython() is not None
+except Exception:
+    display = None
+    IN_NOTEBOOK = False
+
+# =========================
+# Global constants
+# =========================
+
+RAW_FLOPS_COL = "Mean gFlops (gigaFLOP per sec.)"
+SMOOTH_FLOPS_COL = "Smoothed Mean flops (gigaFLOP per sec.)"
+PROBLEM_SIZE_COL = "Problem size"
+VARIANT_TUNING_COL = "Variant_Tuning"
+KERNEL_COL = "Kernel"
+BANDWIDTH_COL = "Bandwidth (GiB per sec.)"
+SMOOTH_BW_COL = "Smoothed Bandwidth (GiB per sec.)"
+
+# =========================
+# General utilities
+# =========================
+
+def ensure_dir(path: str) -> None:
+    if path:
+        os.makedirs(path, exist_ok=True)
+
+def sanitize_filename(text: str) -> str:
+    return "".join(c if c.isalnum() or c in "-_." else "_" for c in str(text))
+
+def find_csv_files(root_dir: str, patterns: List[str]) -> List[str]:
+    all_files: List[str] = []
+    for pattern in patterns:
+        search_pattern = os.path.join(root_dir, pattern)
+        files = glob.glob(search_pattern, recursive=True)
+        all_files.extend(files)
+    all_files = sorted(set(all_files))
+    return all_files
+
+# =========================
+# Header detection / reading
+# =========================
+
+def _likely_header_score(line: str) -> int:
+    tokens = [
+        "Kernel", "Variant", "Problem size", "Problem Size", "Mean flops",
+        "GFlop", "GFLOP", "GFLOPs", "GFLOPS"
+    ]
+    score = 0
+    for t in tokens:
+        if t in line:
+            score += 1
+    return score
+
+def read_single_csv(path: str) -> Optional[pd.DataFrame]:
+    """Read a CSV and heuristically detect the header row."""
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+    except Exception as e:
+        print("Failed to read {}: {}".format(path, e))
+        return None
+
+    header_idx = None
+    best_score = -1
+    for i, line in enumerate(lines[:50]):
+        if not line.strip():
+            continue
+        score = _likely_header_score(line)
+        if score > best_score:
+            best_score = score
+            header_idx = i
+            if score >= 3:
+                break
+
+    if header_idx is None:
+        print("Could not find header in {}, skipping.".format(path))
+        return None
+
+    try:
+        df = pd.read_csv(path, header=header_idx)
+    except Exception as e:
+        print("Failed to parse CSV {}: {}".format(path, e))
+        return None
+
+    df["__source_file__"] = path
+    return df
+
+# =========================
+# Column normalization
+# =========================
+
+def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
+    """Normalize various benchmark CSV headers to a standard schema."""
+    candidates = {
+        "Kernel": ["Kernel", "Kernel name", "Benchmark", "Test"],
+        "Variant": ["Variant", "Implementation", "Policy", "Config", "Backend", "Suite"],
+        PROBLEM_SIZE_COL: [
+            "Problem size", "Problem Size", "Size", "N", "DOF", "Elements",
+            "ProblemSize", "Problem-size"
+        ],
+        RAW_FLOPS_COL: [
+            "Mean flops (gigaFLOP per sec.)",
+            "Mean flops (GFlop/s)",
+            "Mean Flops (GFlop/s)",
+            "GFLOP/s", "GFLOPs/s", "GFLOPS", "GFlops/s", "GFlop/s", "GF/s",
+            "Mean GFLOP/s", "Mean GFLOPs/s"
+        ],
+        BANDWIDTH_COL: [
+            "Bandwidth (GiB per sec.)",
+            "Bandwidth (GiB/s)",
+            "Bandwidth (GB/s)",
+            "Bandwidth (GiB/sec)",
+            "Bandwidth (GB/sec)",
+            "Mean bandwidth (GiB/s)",
+            "Mean Bandwidth (GiB/s)",
+            "Mean Bandwidth (GiB per sec.)"
+        ],
+    }
+
+    df = df.rename(columns={c: c.strip() for c in df.columns})
+    new_col_map = {}
+
+    for standard_name, names in candidates.items():
+        for c in names:
+            if c in df.columns:
+                new_col_map[c] = standard_name
+                break
+
+    df = df.rename(columns=new_col_map)
+    return df
+
+# =========================
+# Backend / tuning classification
+# =========================
+
+def classify_backend_from_variant(variant) -> str:
+    s = "" if pd.isna(variant) else str(variant).strip()
+    low = s.lower()
+    if "hip" in low:
+        return "HIP"
+    if "cuda" in low:
+        return "CUDA"
+    if "openmp" in low or low.endswith("_omp") or " omp" in low or low.startswith("omp"):
+        return "OpenMP"
+    if "seq" in low or "serial" in low or "baseline" in low or "sequential" in low:
+        return "Seq"
+    return "Unknown"
+
+def classify_tuning(row: pd.Series) -> str:
+    if "Tuning" in row and pd.notna(row["Tuning"]):
+        return str(row["Tuning"]).strip()
+    src = row.get("__source_file__", "")
+    if isinstance(src, str) and src:
+        return os.path.basename(src)
+    return "default"
+
+# =========================
+# Build combined table
+# =========================
+
+def build_combined_table(
+    root_dir: str = ".",
+    glob_patterns: Optional[List[str]] = None,
+    kernel_whitelist: Optional[List[str]] = None,
+    verbose: bool = True,
+) -> Optional[pd.DataFrame]:
+    if glob_patterns is None:
+        glob_patterns = ["**/*factor*kernel-run-data.csv"]
+
+    files = find_csv_files(root_dir, glob_patterns)
+    if not files:
+        if verbose:
+            print("No files matching patterns {} found under '{}'".format(glob_patterns, root_dir))
+        return None
+
+    if verbose:
+        print("Found CSV files:")
+        for f in files:
+            print("  ", f)
+
+    dfs = []
+    required_cols = {KERNEL_COL, "Variant", PROBLEM_SIZE_COL, RAW_FLOPS_COL}
+
+    for path in files:
+        df = read_single_csv(path)
+        if df is None:
+            continue
+        df = normalize_columns(df)
+        missing = required_cols - set(df.columns)
+        if missing:
+            if verbose:
+                print("[SKIP] {} missing required columns after normalization: {}".format(path, sorted(missing)))
+                print("       Columns present:", list(df.columns))
+            continue
+        dfs.append(df)
+
+    if not dfs:
+        if verbose:
+            print("No CSV files could be parsed with required columns.")
+        return None
+
+    combined_df = pd.concat(dfs, ignore_index=True)
+
+    combined_df[KERNEL_COL] = combined_df[KERNEL_COL].astype(str).str.strip()
+    combined_df["Variant"] = combined_df["Variant"].astype(str).str.strip()
+
+    if kernel_whitelist:
+        wl = [w.lower() for w in kernel_whitelist]
+        kernel_series = combined_df[KERNEL_COL].fillna("").astype(str).str.lower()
+        mask = kernel_series.apply(lambda k: any(w in k for w in wl))
+        combined_df = combined_df[mask]
+        if combined_df.empty:
+            if verbose:
+                print("After applying kernel_whitelist, no rows remain.")
+            return None
+
+    combined_df[PROBLEM_SIZE_COL] = pd.to_numeric(combined_df[PROBLEM_SIZE_COL], errors="coerce")
+    combined_df[RAW_FLOPS_COL] = pd.to_numeric(combined_df[RAW_FLOPS_COL], errors="coerce")
+    if BANDWIDTH_COL in combined_df.columns:
+        combined_df[BANDWIDTH_COL] = pd.to_numeric(combined_df[BANDWIDTH_COL], errors="coerce")
+
+    before_drop = len(combined_df)
+    combined_df = combined_df.dropna(subset=[PROBLEM_SIZE_COL, RAW_FLOPS_COL])
+    dropped = before_drop - len(combined_df)
+    if verbose and dropped > 0:
+        print("[CLEAN] Dropped {} rows with non-numeric {} or {}.".format(
+            dropped, PROBLEM_SIZE_COL, RAW_FLOPS_COL))
+
+    combined_df["Backend"] = combined_df["Variant"].apply(classify_backend_from_variant)
+    combined_df["Tuning"] = combined_df.apply(classify_tuning, axis=1)
+
+    combined_df[VARIANT_TUNING_COL] = (
+        combined_df["Variant"].astype(str) + "-" + combined_df["Tuning"].astype(str)
+    )
+
+    return combined_df
+
+# =========================
+# Smoothing utilities
+# =========================
+
+def moving_median_smooth(y, k: int = 5):
+    n = len(y)
+    if n == 0:
+        return []
+    m = (k - 1) // 2
+    y_smooth = []
+    for i in range(n):
+        start = max(0, i - m)
+        end = min(n - 1, i + m)
+        window = y[start:end + 1]
+        y_smooth.append(statistics.median(window))
+    return y_smooth
+
+def find_saturation_point(x, y_smooth, eps: float = 0.1, w: int = 3):
+    """
+    Returns the x value of the first saturation point, or None if none is found.
+    Saturation is defined as the first run of length >= w where y_smooth
+    stays within (1 - eps) of the maximum value.
+    """
+    if not y_smooth:
+        return None
+
+    y_max = max(y_smooth)
+    threshold = (1.0 - eps) * y_max
+    n = len(y_smooth)
+    run_length = 0
+    run_start_idx = None
+
+    for i in range(n):
+        if y_smooth[i] >= threshold:
+            if run_length == 0:
+                run_start_idx = i
+            run_length += 1
+            if run_length >= w:
+                return x[run_start_idx]
+        else:
+            run_length = 0
+            run_start_idx = None
+    return None
+
+# =========================
+# Plotting for a single kernel (FLOPs) with FOM output as DataFrame
+# =========================
+
+def plot_kernel(
+    df: pd.DataFrame,
+    kernel: str,
+    k: int = 5,
+    eps: float = 0.1,
+    w: int = 3,
+    save_dir: Optional[str] = None,
+):
+    plt.figure(figsize=(18, 7))
+    variants = df[VARIANT_TUNING_COL].unique()
+    colors = plt.cm.tab10.colors
+
+    if SMOOTH_FLOPS_COL not in df.columns:
+        df[SMOOTH_FLOPS_COL] = np.nan
+
+    for idx, variant in enumerate(variants):
+        subdf = df[df[VARIANT_TUNING_COL] == variant].copy()
+        subdf = subdf.sort_values(PROBLEM_SIZE_COL)
+        x = subdf[PROBLEM_SIZE_COL].astype(float).values
+        y = subdf[RAW_FLOPS_COL].astype(float).values
+        if len(x) == 0:
+            continue
+
+        y_smooth = moving_median_smooth(list(y), k=k)
+
+        for xi, yi in zip(x, y_smooth):
+            mask = (
+                (df[VARIANT_TUNING_COL] == variant)
+                & (df[PROBLEM_SIZE_COL] == xi)
+            )
+            df.loc[mask, SMOOTH_FLOPS_COL] = yi
+
+        plt.plot(
+            x, y, "-o",
+            label="{} (raw)".format(variant),
+            markersize=8,
+            color=colors[idx % len(colors)],
+            markerfacecolor=colors[idx % len(colors)],
+            markeredgewidth=0,
+        )
+
+        plt.plot(
+            x, y_smooth, "--",
+            label="{} (smoothed)".format(variant),
+            linewidth=3,
+            color=colors[idx % len(colors)],
+        )
+
+    plt.title("Kernel: {}".format(kernel), fontsize=22)
+    plt.xlabel("Problem size", fontsize=18)
+    plt.ylabel("Mean flops (gigaFLOP per sec.)", fontsize=18)
+    plt.grid(True, which="both", linestyle="--", linewidth=1)
+    plt.xticks(fontsize=16)
+    plt.yticks(fontsize=16)
+    plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), fontsize=16, frameon=False)
+    plt.tight_layout(rect=[0, 0, 0.75, 1])
+
+    if save_dir is not None:
+        ensure_dir(save_dir)
+        fname = os.path.join(save_dir, "{}.png".format(sanitize_filename(kernel)))
+        plt.savefig(fname, dpi=200)
+        print("Saved plot to {}".format(fname))
+    if IN_NOTEBOOK:
+        plt.show()
+    else:
+        plt.close()
+
+    # FOM-style output as DataFrame
+    fom_rows = []
+    for variant in variants:
+        subdf = df[df[VARIANT_TUNING_COL] == variant].copy()
+        subdf = subdf.sort_values(PROBLEM_SIZE_COL)
+        x = subdf[PROBLEM_SIZE_COL].astype(float).values
+        y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values
+        sat_size = ""
+        sat_flops_raw = ""
+        sat_bw = ""
+
+        # === MODIFIED/ADDED FOR SATURATION REPORTING ===
+        if len(x) > 0 and len(y_smooth) > 0:
+            sat_idx = None
+            y_max = max(y_smooth)
+            threshold = (1.0 - eps) * y_max
+            run_length = 0
+            run_start_idx = None
+
+            for i in range(len(y_smooth)):
+                if y_smooth[i] >= threshold:
+                    if run_length == 0:
+                        run_start_idx = i
+                    run_length += 1
+                    if run_length >= w:
+                        sat_idx = run_start_idx
+                        break
+                else:
+                    run_length = 0
+                    run_start_idx = None
+
+            if sat_idx is not None:
+                sat_size = x[sat_idx]
+                mask = (subdf[PROBLEM_SIZE_COL] == sat_size)
+                raw_at_sat = subdf.loc[mask, RAW_FLOPS_COL]
+                if not raw_at_sat.empty and pd.notna(raw_at_sat.iloc[0]):
+                    sat_flops_raw = raw_at_sat.iloc[0]
+
+                if BANDWIDTH_COL in subdf.columns:
+                    bw_at_sat = subdf.loc[mask, BANDWIDTH_COL]
+                    bw_non_nan = bw_at_sat.dropna()
+                    if not bw_non_nan.empty:
+                        sat_bw = bw_non_nan.iloc[0]
+                    else:
+                        sat_bw = ""
+                else:
+                    sat_bw = ""
+            else:
+                print(
+                    "[INFO] No saturation point found for kernel '{}' "
+                    "variant '{}' (eps={}, w={})".format(kernel, variant, eps, w)
+                )
+        else:
+            print(
+                "[INFO] Not enough data to compute saturation for "
+                "kernel '{}' variant '{}'".format(kernel, variant)
+            )
+        # === END MODIFIED ===
+
+        fom_rows.append({
+            "Kernel": f"{kernel}-{variant}",
+            "Sat Problem Size": sat_size if sat_size != "" else "N/A",
+            "Sat GFLOP/s": sat_flops_raw if sat_flops_raw != "" else "N/A",
+            "Sat B/W (GiB per sec.)": sat_bw if sat_bw != "" else "N/A",
+        })
+
+    fom_df = pd.DataFrame(fom_rows)
+    if IN_NOTEBOOK and display is not None:
+        display(fom_df)
+    else:
+        print(fom_df.to_string(index=False))
+
+# =========================
+# Plotting for a single kernel (Bandwidth)
+# =========================
+
+def plot_kernel_bandwidth(
+    df: pd.DataFrame,
+    kernel: str,
+    k: int = 5,
+    eps: float = 0.1,
+    w: int = 3,
+    save_dir: Optional[str] = None,
+):
+    if BANDWIDTH_COL not in df.columns:
+        print(f"[INFO] No '{BANDWIDTH_COL}' column for kernel '{kernel}', skipping bandwidth plot.")
+        return
+
+    plt.figure(figsize=(18, 7))
+    variants = df[VARIANT_TUNING_COL].unique()
+    colors = plt.cm.tab10.colors
+
+    if SMOOTH_BW_COL not in df.columns:
+        df[SMOOTH_BW_COL] = np.nan
+
+    for idx, variant in enumerate(variants):
+        subdf = df[df[VARIANT_TUNING_COL] == variant].copy()
+        subdf = subdf.sort_values(PROBLEM_SIZE_COL)
+
+        x = subdf[PROBLEM_SIZE_COL].astype(float).values
+        y_bw = subdf[BANDWIDTH_COL].astype(float).values
+
+        if len(x) == 0:
+            continue
+
+        y_bw_smooth = moving_median_smooth(list(y_bw), k=k)
+
+        for xi, yi in zip(x, y_bw_smooth):
+            mask = (
+                (df[VARIANT_TUNING_COL] == variant)
+                & (df[PROBLEM_SIZE_COL] == xi)
+            )
+            df.loc[mask, SMOOTH_BW_COL] = yi
+
+        color = colors[idx % len(colors)]
+
+        plt.plot(
+            x, y_bw, "-o",
+            label=f"{variant} (raw B/W)",
+            markersize=8,
+            color=color,
+            markerfacecolor=color,
+            markeredgewidth=0,
+        )
+
+        plt.plot(
+            x, y_bw_smooth, "--",
+            label=f"{variant} (smoothed B/W)",
+            linewidth=3,
+            color=color,
+        )
+
+    plt.title(f"Kernel: {kernel} - Bandwidth", fontsize=22)
+    plt.xlabel("Problem size", fontsize=18)
+    plt.ylabel("Bandwidth (GiB per sec.)", fontsize=18)
+    plt.grid(True, which="both", linestyle="--", linewidth=1)
+    plt.xticks(fontsize=16)
+    plt.yticks(fontsize=16)
+    plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), fontsize=16, frameon=False)
+    plt.tight_layout(rect=[0, 0, 0.75, 1])
+
+    if save_dir is not None:
+        ensure_dir(save_dir)
+        fname = os.path.join(save_dir, "{}_bandwidth.png".format(sanitize_filename(kernel)))
+        plt.savefig(fname, dpi=200)
+        print(f"Saved bandwidth plot to {fname}")
+    if IN_NOTEBOOK:
+        plt.show()
+    else:
+        plt.close()
+
+# =========================
+# Smooth and plot all kernels (FLOPs and Bandwidth)
+# =========================
+
+def smooth_and_plot_all_kernels(
+    df: pd.DataFrame,
+    k: int = 5,
+    eps: float = 0.1,
+    w: int = 3,
+    save_dir: Optional[str] = None,
+) -> pd.DataFrame:
+    df = df.dropna(subset=[KERNEL_COL, PROBLEM_SIZE_COL, RAW_FLOPS_COL, VARIANT_TUNING_COL]).copy()
+    df[SMOOTH_FLOPS_COL] = np.nan
+
+    if BANDWIDTH_COL in df.columns:
+        df[SMOOTH_BW_COL] = np.nan
+
+    kernels = df[KERNEL_COL].unique()
+    print("Found {} kernels.".format(len(kernels)))
+
+    for kernel in kernels:
+        tmp = df[df[KERNEL_COL] == kernel].copy()
+
+        plot_kernel(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir)
+        df.loc[df[KERNEL_COL] == kernel, SMOOTH_FLOPS_COL] = tmp[SMOOTH_FLOPS_COL]
+
+        if BANDWIDTH_COL in df.columns:
+            plot_kernel_bandwidth(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir)
+            if SMOOTH_BW_COL in tmp.columns:
+                df.loc[df[KERNEL_COL] == kernel, SMOOTH_BW_COL] = tmp[SMOOTH_BW_COL]
+
+    return df
+
+# =========================
+# Per-kernel tables (raw and smoothed)
+# =========================
+
+def save_kernel_tables(
+    df: pd.DataFrame,
+    outdir: str = "kernel_tables",
+) -> None:
+    ensure_dir(outdir)
+    kernels = df[KERNEL_COL].unique()
+    for kernel in kernels:
+        df_kernel = df[df[KERNEL_COL] == kernel]
+        raw_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=RAW_FLOPS_COL,
+        )
+        raw_table = raw_table.sort_index()
+        raw_csv_path = os.path.join(outdir, "{}_raw.csv".format(sanitize_filename(kernel)))
+        raw_table.to_csv(raw_csv_path)
+        smooth_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=SMOOTH_FLOPS_COL,
+        )
+        smooth_table = smooth_table.sort_index()
+        smooth_csv_path = os.path.join(outdir, "{}_smoothed.csv".format(sanitize_filename(kernel)))
+        smooth_table.to_csv(smooth_csv_path)
+
+        if BANDWIDTH_COL in df_kernel.columns and SMOOTH_BW_COL in df_kernel.columns:
+            bw_raw_table = df_kernel.pivot_table(
+                index=PROBLEM_SIZE_COL,
+                columns=VARIANT_TUNING_COL,
+                values=BANDWIDTH_COL,
+            ).sort_index()
+            bw_raw_csv_path = os.path.join(outdir, "{}_bandwidth_raw.csv".format(sanitize_filename(kernel)))
+            bw_raw_table.to_csv(bw_raw_csv_path)
+
+            bw_smooth_table = df_kernel.pivot_table(
+                index=PROBLEM_SIZE_COL,
+                columns=VARIANT_TUNING_COL,
+                values=SMOOTH_BW_COL,
+            ).sort_index()
+            bw_smooth_csv_path = os.path.join(outdir, "{}_bandwidth_smoothed.csv".format(sanitize_filename(kernel)))
+            bw_smooth_table.to_csv(bw_smooth_csv_path)
+
+            print("Saved: {}, {}, {}, {}".format(
+                raw_csv_path, smooth_csv_path, bw_raw_csv_path, bw_smooth_csv_path))
+        else:
+            print("Saved: {}, {}".format(raw_csv_path, smooth_csv_path))
+
+# =========================
+# Per-kernel saturation curve data (raw + smoothed in one file)
+# =========================
+
+def save_saturation_curve_data(
+    df: pd.DataFrame,
+    outdir: str = "saturation-curve-data",
+) -> None:
+    ensure_dir(outdir)
+    kernels = df[KERNEL_COL].unique()
+    for kernel in kernels:
+        df_kernel = df[df[KERNEL_COL] == kernel].copy()
+        raw_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=RAW_FLOPS_COL,
+        )
+        smooth_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=SMOOTH_FLOPS_COL,
+        )
+        raw_table = raw_table.sort_index()
+        smooth_table = smooth_table.reindex(
+            index=raw_table.index,
+            columns=raw_table.columns,
+        )
+        combined = pd.DataFrame(index=raw_table.index)
+        for vt in raw_table.columns:
+            raw_col_name = f"{vt} (raw)"
+            smooth_col_name = f"{vt} (smoothed)"
+            combined[raw_col_name] = raw_table[vt]
+            combined[smooth_col_name] = smooth_table[vt]
+
+        if BANDWIDTH_COL in df_kernel.columns and SMOOTH_BW_COL in df_kernel.columns:
+            bw_raw_table = df_kernel.pivot_table(
+                index=PROBLEM_SIZE_COL,
+                columns=VARIANT_TUNING_COL,
+                values=BANDWIDTH_COL,
+            ).sort_index()
+            bw_smooth_table = df_kernel.pivot_table(
+                index=PROBLEM_SIZE_COL,
+                columns=VARIANT_TUNING_COL,
+                values=SMOOTH_BW_COL,
+            ).sort_index()
+            bw_smooth_table = bw_smooth_table.reindex(
+                index=bw_raw_table.index,
+                columns=bw_raw_table.columns,
+            )
+            for vt in bw_raw_table.columns:
+                bw_raw_col_name = f"{vt} (raw B/W)"
+                bw_smooth_col_name = f"{vt} (smoothed B/W)"
+                combined[bw_raw_col_name] = bw_raw_table[vt]
+                combined[bw_smooth_col_name] = bw_smooth_table[vt]
+
+        combined = combined.reset_index().rename(columns={PROBLEM_SIZE_COL: "Problem size"})
+        out_path = os.path.join(outdir, f"{sanitize_filename(kernel)}.csv")
+        combined.to_csv(out_path, index=False)
+        print(f"Saved saturation curve data: {out_path}")
+
+# =========================
+# Per-kernel FOM tables (saturation points, with units, NO Sat Smoothed B/W)
+# =========================
+
+def save_fom_tables(
+    df: pd.DataFrame,
+    outdir: str = "FOM",
+) -> None:
+    ensure_dir(outdir)
+    kernels = df[KERNEL_COL].unique()
+    for kernel in kernels:
+        df_kernel = df[df[KERNEL_COL] == kernel].copy()
+        variant_tunings = df_kernel[VARIANT_TUNING_COL].unique()
+        rows = []
+        for vt in variant_tunings:
+            subdf = df_kernel[df_kernel[VARIANT_TUNING_COL] == vt].copy()
+            subdf = subdf.sort_values(PROBLEM_SIZE_COL)
+            x = subdf[PROBLEM_SIZE_COL].astype(float).values
+            y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values
+            sat_size = None
+            sat_flops_raw = ""
+            sat_bw = ""
+            # === MODIFIED/ADDED FOR SATURATION REPORTING ===
+            if len(x) > 0 and len(y_smooth) > 0:
+                sat_idx = None
+                y_max = max(y_smooth)
+                threshold = (1.0 - 0.1) * y_max
+                w = 3
+                run_length = 0
+                run_start_idx = None
+
+                for i in range(len(y_smooth)):
+                    if y_smooth[i] >= threshold:
+                        if run_length == 0:
+                            run_start_idx = i
+                        run_length += 1
+                        if run_length >= w:
+                            sat_idx = run_start_idx
+                            break
+                    else:
+                        run_length = 0
+                        run_start_idx = None
+
+                if sat_idx is not None:
+                    sat_size = x[sat_idx]
+                    mask = (subdf[PROBLEM_SIZE_COL] == sat_size)
+                    raw_at_sat = subdf.loc[mask, RAW_FLOPS_COL]
+                    if not raw_at_sat.empty and pd.notna(raw_at_sat.iloc[0]):
+                        sat_flops_raw = raw_at_sat.iloc[0]
+                    if BANDWIDTH_COL in subdf.columns:
+                        bw_at_sat = subdf.loc[mask, BANDWIDTH_COL]
+                        bw_non_nan = bw_at_sat.dropna()
+                        if not bw_non_nan.empty:
+                            sat_bw = bw_non_nan.iloc[0]
+                        else:
+                            sat_bw = ""
+                    else:
+                        print(
+                            "[WARN] Bandwidth column missing for kernel '{}' variant '{}' "
+                            "while computing saturation".format(kernel, vt)
+                        )
+                        sat_bw = ""
+                else:
+                    print(
+                        "[INFO] No saturation point found for kernel '{}' variant '{}' "
+                        "when building FOM table".format(kernel, vt)
+                    )
+            else:
+                print(
+                    "[INFO] Not enough data to compute saturation for "
+                    "kernel '{}' variant '{}' when building FOM table".format(kernel, vt)
+                )
+            # === END MODIFIED ===
+
+            def na_if_empty(value):
+                return value if value not in [None, ""] else "N/A"
+
+            rows.append({
+                "Kernel": f"{kernel}-{vt}",
+                "Sat Problem Size": na_if_empty(sat_size),
+                "Sat GFLOP/s": na_if_empty(sat_flops_raw),
+                "Sat B/W (GiB per sec.)": na_if_empty(sat_bw),
+            })
+        out_path = os.path.join(outdir, f"{sanitize_filename(kernel)}.csv")
+        pd.DataFrame(rows).to_csv(out_path, index=False)
+        print(f"Saved FOM table: {out_path}")
+
+# =========================
+# Main pipeline: callable from notebook or CLI
+# =========================
+
+def run_pipeline(
+    root_dir: str = ".",
+    output_dir: str = "./output",
+    glob_patterns: Optional[List[str]] = None,
+    kernel_whitelist: Optional[List[str]] = None,
+    smooth_window: int = 5,
+    saturation_eps: float = 0.1,
+    saturation_w: int = 3,
+) -> Optional[pd.DataFrame]:
+    if glob_patterns is None:
+        glob_patterns = ["**/*factor*kernel-run-data.csv"]
+
+    ensure_dir(output_dir)
+    fig_dir = os.path.join(output_dir, "figures")
+    ensure_dir(fig_dir)
+
+    combined_df = build_combined_table(
+        root_dir=root_dir,
+        glob_patterns=glob_patterns,
+        kernel_whitelist=kernel_whitelist,
+        verbose=True,
+    )
+    if combined_df is None:
+        print("No combined table created.")
+        return None
+
+    combined_csv_path = os.path.join(output_dir, "combined_table.csv")
+    combined_df.to_csv(combined_csv_path, index=False)
+    print("[SAVE] Combined table saved to {}".format(combined_csv_path))
+
+    df_smoothed = smooth_and_plot_all_kernels(
+        combined_df,
+        k=smooth_window,
+        eps=saturation_eps,
+        w=saturation_w,
+        save_dir=fig_dir,
+    )
+
+    output_variant_tuning_path = os.path.join(output_dir, "output_with_variant_tuning.csv")
+    df_smoothed.to_csv(output_variant_tuning_path, index=False)
+    print("[SAVE] Smoothed combined table saved to {}".format(output_variant_tuning_path))
+
+    save_kernel_tables(df_smoothed, outdir=output_dir)
+    saturation_dir = os.path.join(output_dir, "saturation-curve-data")
+    save_saturation_curve_data(df_smoothed, outdir=saturation_dir)
+    fom_dir = os.path.join(output_dir, "FOM")
+    save_fom_tables(df_smoothed, outdir=fom_dir)
+
+    return df_smoothed
+
+# =========================
+# CLI entry point
+# =========================
+
+def main_cli():
+    parser = argparse.ArgumentParser(
+        description="Combine benchmark CSVs, smooth FLOPs/Bandwidth and generate plots/tables."
+    )
+    parser.add_argument("--root-dir", default=".", help="Root directory to search for CSV files")
+    parser.add_argument("--output-dir", default="./output", help="Directory to write outputs")
+    parser.add_argument(
+        "--glob-pattern",
+        action="append",
+        default=None,
+        help="Glob pattern for CSV files (can be repeated). Default: **/*factor*kernel-run-data.csv",
+    )
+    parser.add_argument(
+        "--kernel-whitelist",
+        nargs="*",
+        default=None,
+        help="List of substrings to filter Kernel names by (case insensitive).",
+    )
+    parser.add_argument("--smooth-window", type=int, default=5, help="Moving median window size")
+    parser.add_argument("--saturation-eps", type=float, default=0.1, help="Epsilon for saturation threshold")
+    parser.add_argument("--saturation-w", type=int, default=3, help="Consecutive points needed for saturation")
+
+    args = parser.parse_args()
+
+    run_pipeline(
+        root_dir=args.root_dir,
+        output_dir=args.output_dir,
+        glob_patterns=args.glob_pattern,
+        kernel_whitelist=args.kernel_whitelist,
+        smooth_window=args.smooth_window,
+        saturation_eps=args.saturation_eps,
+        saturation_w=args.saturation_w,
+    )
+
+# =========================
+# Notebook helper (no argparse)
+# =========================
+
+def main_notebook(
+    root_dir: str = ".",
+    output_dir: str = "./output",
+    glob_patterns: Optional[List[str]] = None,
+    kernel_whitelist: Optional[List[str]] = None,
+    smooth_window: int = 5,
+    saturation_eps: float = 0.1,
+    saturation_w: int = 3,
+):
+    return run_pipeline(
+        root_dir=root_dir,
+        output_dir=output_dir,
+        glob_patterns=glob_patterns,
+        kernel_whitelist=kernel_whitelist,
+        smooth_window=smooth_window,
+        saturation_eps=saturation_eps,
+        saturation_w=saturation_w,
+    )
+
+if __name__ == "__main__":
+    if not IN_NOTEBOOK:
+        main_cli()
diff --git a/scripts/benchmarking/full-pipeline.sh b/scripts/benchmarking/full-pipeline.sh
new file mode 100755
index 000000000..bc37e394e
--- /dev/null
+++ b/scripts/benchmarking/full-pipeline.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Usage:
+#   run_full_pipeline.sh [cpx|spx] /path/to/output_dir
+#
+# Example:
+#   run_full_pipeline.sh cpx /p/lustre/artv3/raja_perf_runs/2026-01-29
+
+MODE="${1:-cpx}"
+OUTDIR="${2:-./raja_perf_results}"
+
+mkdir -p "${OUTDIR}"
+
+# 1) Run tier1 mode script (CPX or SPX)
+case "${MODE}" in
+  cpx|CPX)
+    echo "Running CPX tier1 script..."
+    ./scripts/benchmarking/run_tier1_cpx-mode.sh "${OUTDIR}"
+    ;;
+  spx|SPX)
+    echo "Running SPX tier1 script..."
+    ./scripts/benchmarking/run_tier1_spx-mode.sh "${OUTDIR}"
+    ;;
+  *)
+    echo "ERROR: MODE must be 'cpx' or 'spx' (got: ${MODE})" >&2
+    exit 1
+    ;;
+esac
+
+# 2) Run study_run_kernel_tunings.py
+echo "Running study_run_kernel_tunings.py..."
+python3 ./scripts/benchmarking/study_run_kernel_tunings.py \
+  --output-dir "${OUTDIR}"
+
+
+# 4) Run study_saturation.py and create FoMs
+echo "Running study_saturation.py..."
+python3 ./scripts/benchmarking/study_saturation.py \
+  --output-dir "${OUTDIR}"
+
+# At this point we expect a combined_table.csv in ${OUTDIR}
+COMBINED="${OUTDIR}/combined_table.csv"
+if [[ ! -f "${COMBINED}" ]]; then
+  echo "ERROR: combined_table.csv not found in ${COMBINED}." >&2
+  echo "       Check that study_* scripts are configured to write it." >&2
+  exit 1
+fi
+
+echo "Done. Final FOM files are in: ${OUTDIR}"
\ No newline at end of file
diff --git a/scripts/benchmarking/rhornung67_fullscript.py b/scripts/benchmarking/rhornung67_fullscript.py
new file mode 100644
index 000000000..6d664d8ba
--- /dev/null
+++ b/scripts/benchmarking/rhornung67_fullscript.py
@@ -0,0 +1,975 @@
+#!/usr/bin/env python3
+import os
+import glob
+import statistics
+import argparse
+from typing import List, Optional
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+# Try to import IPython display for notebook detection
+try:
+    from IPython.display import display
+    from IPython import get_ipython
+    IN_NOTEBOOK = get_ipython() is not None
+except Exception:
+    display = None
+    IN_NOTEBOOK = False
+
+# =========================
+# Global constants
+# =========================
+
+RAW_FLOPS_COL = "Mean gFlops (gigaFLOP per sec.)"
+SMOOTH_FLOPS_COL = "Smoothed Mean flops (gigaFLOP per sec.)"
+PROBLEM_SIZE_COL = "Problem size"
+VARIANT_TUNING_COL = "Variant_Tuning"
+KERNEL_COL = "Kernel"
+BANDWIDTH_COL = "Bandwidth (GiB per sec.)"
+SMOOTH_BW_COL = "Smoothed Bandwidth (GiB per sec.)"
+SAT_IDX_COL = "Saturation Index"
+
+# =========================
+# General utilities
+# =========================
+
+def ensure_dir(path: str) -> None:
+    if path:
+        os.makedirs(path, exist_ok=True)
+
+def sanitize_filename(text: str) -> str:
+    return "".join(c if c.isalnum() or c in "-_." else "_" for c in str(text))
+
+def find_csv_files(root_dir: str, patterns: List[str]) -> List[str]:
+    all_files: List[str] = []
+    for pattern in patterns:
+        search_pattern = os.path.join(root_dir, pattern)
+        files = glob.glob(search_pattern, recursive=True)
+        all_files.extend(files)
+    all_files = sorted(set(all_files))
+    return all_files
+
+# =========================
+# Header detection / reading
+# =========================
+
+def _likely_header_score(line: str) -> int:
+    tokens = [
+        "Kernel", "Variant", "Problem size", "Problem Size", "Mean flops",
+        "GFlop", "GFLOP", "GFLOPs", "GFLOPS"
+    ]
+    score = 0
+    for t in tokens:
+        if t in line:
+            score += 1
+    return score
+
+def read_single_csv(path: str) -> Optional[pd.DataFrame]:
+    """Read a CSV and heuristically detect the header row."""
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+    except Exception as e:
+        print("Failed to read {}: {}".format(path, e))
+        return None
+
+    header_idx = None
+    best_score = -1
+    for i, line in enumerate(lines[:50]):
+        if not line.strip():
+            continue
+        score = _likely_header_score(line)
+        if score > best_score:
+            best_score = score
+            header_idx = i
+            if score >= 3:
+                break
+
+    if header_idx is None:
+        print("Could not find header in {}, skipping.".format(path))
+        return None
+
+    try:
+        df = pd.read_csv(path, header=header_idx)
+    except Exception as e:
+        print("Failed to parse CSV {}: {}".format(path, e))
+        return None
+
+    df["__source_file__"] = path
+    return df
+
+# =========================
+# Column normalization
+# =========================
+
+def normalize_columns(df: pd.DataFrame) -> pd.DataFrame:
+    """Normalize various benchmark CSV headers to a standard schema."""
+    candidates = {
+        "Kernel": ["Kernel", "Kernel name", "Benchmark", "Test"],
+        "Variant": ["Variant", "Implementation", "Policy", "Config", "Backend", "Suite"],
+        PROBLEM_SIZE_COL: [
+            "Problem size", "Problem Size", "Size", "N", "DOF", "Elements",
+            "ProblemSize", "Problem-size"
+        ],
+        RAW_FLOPS_COL: [
+            "Mean flops (gigaFLOP per sec.)",
+            "Mean flops (GFlop/s)",
+            "Mean Flops (GFlop/s)",
+            "GFLOP/s", "GFLOPs/s", "GFLOPS", "GFlops/s", "GFlop/s", "GF/s",
+            "Mean GFLOP/s", "Mean GFLOPs/s"
+        ],
+        BANDWIDTH_COL: [
+            "Bandwidth (GiB per sec.)",
+            "Bandwidth (GiB/s)",
+            "Bandwidth (GB/s)",
+            "Bandwidth (GiB/sec)",
+            "Bandwidth (GB/sec)",
+            "Mean bandwidth (GiB/s)",
+            "Mean Bandwidth (GiB/s)",
+            "Mean Bandwidth (GiB per sec.)"
+        ],
+    }
+
+    df = df.rename(columns={c: c.strip() for c in df.columns})
+    new_col_map = {}
+
+    for standard_name, names in candidates.items():
+        for c in names:
+            if c in df.columns:
+                new_col_map[c] = standard_name
+                break
+
+    df = df.rename(columns=new_col_map)
+    return df
+
+# =========================
+# Backend / tuning classification
+# =========================
+
+def classify_backend_from_variant(variant) -> str:
+    s = "" if pd.isna(variant) else str(variant).strip()
+    low = s.lower()
+    if "hip" in low:
+        return "HIP"
+    if "cuda" in low:
+        return "CUDA"
+    if "openmp" in low or low.endswith("_omp") or " omp" in low or low.startswith("omp"):
+        return "OpenMP"
+    if "seq" in low or "serial" in low or "baseline" in low or "sequential" in low:
+        return "Seq"
+    return "Unknown"
+
+def classify_tuning(row: pd.Series) -> str:
+    if "Tuning" in row and pd.notna(row["Tuning"]):
+        return str(row["Tuning"]).strip()
+    src = row.get("__source_file__", "")
+    if isinstance(src, str) and src:
+        return os.path.basename(src)
+    return "default"
+
+# =========================
+# Build combined table
+# =========================
+
+def build_combined_table(
+    root_dir: str = ".",
+    glob_patterns: Optional[List[str]] = None,
+    kernel_whitelist: Optional[List[str]] = None,
+    verbose: bool = True,
+) -> Optional[pd.DataFrame]:
+    if glob_patterns is None:
+        glob_patterns = ["**/*factor*kernel-run-data.csv"]
+
+    files = find_csv_files(root_dir, glob_patterns)
+    if not files:
+        if verbose:
+            print("No files matching patterns {} found under '{}'".format(glob_patterns, root_dir))
+        return None
+
+    if verbose:
+        print("Found CSV files:")
+        for f in files:
+            print("  ", f)
+
+    dfs = []
+    required_cols = {KERNEL_COL, "Variant", PROBLEM_SIZE_COL, RAW_FLOPS_COL}
+
+    for path in files:
+        df = read_single_csv(path)
+        if df is None:
+            continue
+        df = normalize_columns(df)
+        missing = required_cols - set(df.columns)
+        if missing:
+            if verbose:
+                print("[SKIP] {} missing required columns after normalization: {}".format(path, sorted(missing)))
+                print("       Columns present:", list(df.columns))
+            continue
+        dfs.append(df)
+
+    if not dfs:
+        if verbose:
+            print("No CSV files could be parsed with required columns.")
+        return None
+
+    combined_df = pd.concat(dfs, ignore_index=True)
+
+    combined_df[KERNEL_COL] = combined_df[KERNEL_COL].astype(str).str.strip()
+    combined_df["Variant"] = combined_df["Variant"].astype(str).str.strip()
+
+    if kernel_whitelist:
+        wl = [w.lower() for w in kernel_whitelist]
+        kernel_series = combined_df[KERNEL_COL].fillna("").astype(str).str.lower()
+        mask = kernel_series.apply(lambda k: any(w in k for w in wl))
+        combined_df = combined_df[mask]
+        if combined_df.empty:
+            if verbose:
+                print("After applying kernel_whitelist, no rows remain.")
+            return None
+
+    combined_df[PROBLEM_SIZE_COL] = pd.to_numeric(combined_df[PROBLEM_SIZE_COL], errors="coerce")
+    combined_df[RAW_FLOPS_COL] = pd.to_numeric(combined_df[RAW_FLOPS_COL], errors="coerce")
+    if BANDWIDTH_COL in combined_df.columns:
+        combined_df[BANDWIDTH_COL] = pd.to_numeric(combined_df[BANDWIDTH_COL], errors="coerce")
+
+    before_drop = len(combined_df)
+    combined_df = combined_df.dropna(subset=[PROBLEM_SIZE_COL, RAW_FLOPS_COL])
+    dropped = before_drop - len(combined_df)
+    if verbose and dropped > 0:
+        print("[CLEAN] Dropped {} rows with non-numeric {} or {}.".format(
+            dropped, PROBLEM_SIZE_COL, RAW_FLOPS_COL))
+
+    combined_df["Backend"] = combined_df["Variant"].apply(classify_backend_from_variant)
+    combined_df["Tuning"] = combined_df.apply(classify_tuning, axis=1)
+
+    combined_df[VARIANT_TUNING_COL] = (
+        combined_df["Variant"].astype(str) + "-" + combined_df["Tuning"].astype(str)
+    )
+
+    return combined_df
+
+# =========================
+# Smoothing and saturation point utilities
+# =========================
+
+def moving_median_smooth(y, k: int = 5):
+    n = len(y)
+    if n == 0:
+        return []
+    m = (k - 1) // 2
+    y_smooth = []
+    for i in range(n):
+        start = max(0, i - m)
+        end = min(n - 1, i + m)
+        window = y[start:end + 1]
+        y_smooth.append(statistics.median(window))
+    return y_smooth
+
+def find_saturation_point(x, y_smooth, eps: float = 0.1, w: int = 3):
+    """
+    Returns the index of the x value at the first saturation point,
+    or None if none is found. 
+    First, the method sets y_max = max(y_smooth) and then tries to
+    find the first run of y_smooth values of length >= w where
+    y_smooth >= (1 - eps) * y_max. If such a run is found, the 
+    function returns the index of the first y_smooth value in the run.
+    Second, if the first attempt does not find such a run, it defines
+    y_end = y_smooth[n - 1], where n is the length of y_smooth. Then,
+    it searches for the first run of y_smooth values of length >= w
+    where abs( (y_smooth[i] - y_end) / y_end ) <= eps. If such a run is
+    found, the function returns the index of the first y_smooth value
+    in the run.
+    """
+
+    if len(x) < w or len(y_smooth) < w:
+        return None
+
+    n = len(y_smooth)
+
+# =========================
+# First attempt to find index of saturation point
+# =========================
+    y_max = max(y_smooth)
+    threshold = (1.0 - eps) * y_max
+    run_length = 0
+    run_start_idx = None
+
+    for i in range(n):
+        if y_smooth[i] >= threshold:
+            if run_length == 0:
+                run_start_idx = i
+            run_length += 1
+            if run_length >= w:
+                return run_start_idx
+        else:
+            run_length = 0
+            run_start_idx = None
+
+# =========================
+# Second attempt to find index of saturation point
+# =========================
+
+    y_end = y_smooth[n - 1]
+    threshold = eps
+    run_length = 0
+    run_start_idx = None
+   
+    for i in range(n):
+        if abs( (y_smooth[i] - y_end) / y_end ) <= threshold:
+            if run_length == 0:
+                run_start_idx = i
+            run_length += 1
+            if run_length >= w:
+                return run_start_idx
+        else:
+            run_length = 0
+            run_start_idx = None
+
+    return None
+
+# =========================
+# Find the saturation point for single kernel
+# =========================
+
+def find_saturation_kernel(
+    df: pd.DataFrame,
+    kernel: str,
+    k: int = 5,
+    eps: float = 0.1,
+    w: int = 3,
+    save_dir: Optional[str] = None,
+):
+    variants = df[VARIANT_TUNING_COL].unique()
+
+    # generate smoothed data
+    if SMOOTH_FLOPS_COL not in df.columns:
+        df[SMOOTH_FLOPS_COL] = np.nan
+
+    if SMOOTH_BW_COL not in df.columns:
+        df[SMOOTH_BW_COL] = np.nan
+
+    for idx, variant in enumerate(variants):
+        subdf = df[df[VARIANT_TUNING_COL] == variant].copy()
+        subdf = subdf.sort_values(PROBLEM_SIZE_COL)
+        x = subdf[PROBLEM_SIZE_COL].astype(float).values
+
+        if len(x) == 0:
+            continue
+
+        if RAW_FLOPS_COL in subdf.columns:
+            y = subdf[RAW_FLOPS_COL].astype(float).values
+
+            y_smooth = moving_median_smooth(list(y), k=k)
+
+            for xi, yi in zip(x, y_smooth):
+                mask = (
+                    (df[VARIANT_TUNING_COL] == variant)
+                    & (df[PROBLEM_SIZE_COL] == xi)
+                )
+                df.loc[mask, SMOOTH_FLOPS_COL] = yi
+
+        if BANDWIDTH_COL in subdf.columns:
+            y_bw = subdf[BANDWIDTH_COL].astype(float).values
+
+            y_bw_smooth = moving_median_smooth(list(y_bw), k=k)
+
+            for xi, yi in zip(x, y_bw_smooth):
+                mask = (
+                    (df[VARIANT_TUNING_COL] == variant)
+                    & (df[PROBLEM_SIZE_COL] == xi)
+                )
+                df.loc[mask, SMOOTH_BW_COL] = yi
+
+    # FOM-style output as DataFrame
+    fom_rows = []
+    for variant in variants:
+        subdf = df[df[VARIANT_TUNING_COL] == variant].copy()
+        subdf = subdf.sort_values(PROBLEM_SIZE_COL)
+        x = subdf[PROBLEM_SIZE_COL].astype(float).values
+        y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values
+        sat_size = ""
+        sat_flops = ""
+        sat_bw = ""
+
+        sat_idx = find_saturation_point(x, y_smooth, eps, w)
+
+        df.loc[df[VARIANT_TUNING_COL] == variant, SAT_IDX_COL] = sat_idx
+
+        if sat_idx is not None:
+            sat_size = x[sat_idx]
+            mask = (subdf[PROBLEM_SIZE_COL] == sat_size)
+
+            raw_at_sat = subdf.loc[mask, RAW_FLOPS_COL]
+            if not raw_at_sat.empty and pd.notna(raw_at_sat.iloc[0]):
+                sat_flops = raw_at_sat.iloc[0]
+
+            if BANDWIDTH_COL in subdf.columns:
+                bw_at_sat = subdf.loc[mask, BANDWIDTH_COL]
+                if not bw_at_sat.empty and pd.notna(bw_at_sat.iloc[0]):
+                    sat_bw = bw_at_sat.iloc[0]
+
+        else:
+            print(
+                "[INFO] No saturation point found for kernel '{}' "
+                "variant '{}' (eps={}, w={})".format(kernel, variant, eps, w)
+            )
+
+        fom_rows.append({
+            "Kernel": f"{kernel}-{variant}",
+            "Sat Problem Size": sat_size if sat_size != "" else "N/A",
+            "Sat GFLOP/s": sat_flops if sat_flops != "" else "N/A",
+            "Sat B/W (GiB per sec.)": sat_bw if sat_bw != "" else "N/A",
+        })
+
+    fom_df = pd.DataFrame(fom_rows)
+    if IN_NOTEBOOK and display is not None:
+        display(fom_df)
+    else:
+        print(fom_df.to_string(index=False))
+
+# =========================
+# Plotting for a single kernel (FLOPs) with FOM output as DataFrame
+# =========================
+
+def plot_kernel(
+    df: pd.DataFrame,
+    kernel: str,
+    k: int = 5,
+    eps: float = 0.1,
+    w: int = 3,
+    save_dir: Optional[str] = None,
+):
+    if RAW_FLOPS_COL not in df.columns:
+        print(f"[INFO] No '{RAW_FLOPS_COL}' column for kernel '{kernel}', skipping flops plot.")
+        return
+
+    variants = df[VARIANT_TUNING_COL].unique()
+
+    plt.figure(figsize=(18, 7))
+    colors = plt.cm.tab10.colors
+
+    ymin = 1e99
+    ymax = -1e99
+
+    for idx, variant in enumerate(variants):
+        subdf = df[df[VARIANT_TUNING_COL] == variant].copy()
+        subdf = subdf.sort_values(PROBLEM_SIZE_COL)
+        x = subdf[PROBLEM_SIZE_COL].astype(float).values
+        y = subdf[RAW_FLOPS_COL].astype(float).values
+        y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values
+
+        if len(x) == 0:
+            continue
+
+        y_min = min(y)
+        if (y_min < ymin):
+            ymin = y_min
+        y_smooth_min = min(y_smooth)
+        if (y_smooth_min < ymin):
+            ymin = y_smooth_min
+
+        y_max = max(y)
+        if (y_max > ymax):
+            ymax = y_max
+        y_smooth_max = max(y_smooth)
+        if (y_smooth_max > ymax):
+            ymax = y_smooth_max
+
+        plt.plot(
+            x, y, "-",
+            label="{} (raw flops)".format(variant),
+            color=colors[idx % len(colors)],
+            marker="o",
+            markersize=8,
+            markerfacecolor=colors[idx % len(colors)],
+            markeredgewidth=0
+        )
+
+        plt.plot(
+            x, y_smooth, "--",
+            label="{} (smoothed flops)".format(variant),
+            linewidth=3,
+            color=colors[idx % len(colors)],
+            marker="+",
+            markersize=10
+        )
+
+        if SAT_IDX_COL in subdf.columns:
+            sat_idx = subdf[SAT_IDX_COL].astype(int).values[0]
+
+            sat_x = x[sat_idx]
+            sat_y = y[sat_idx]
+
+            plt.plot(
+                [sat_x], [sat_y], "-",
+                label="{} saturation ({}, {})".format(variant, sat_x, sat_y),
+                linewidth=0,
+                color=colors[idx % len(colors)],
+                marker="*",
+                markersize=30
+            )
+
+    if ymin > 0:
+        ymin = 0
+    yrange = ymax-ymin
+    yoverhang = yrange*0.1
+
+    plt.title("Kernel: {}".format(kernel), fontsize=22)
+    plt.xlabel("Problem size (bytes)", fontsize=18)
+    plt.ylabel("Mean flops (GFLOP per sec.)", fontsize=18)
+    plt.grid(True, which="both", linestyle="--", linewidth=1)
+    plt.xticks(fontsize=16)
+    plt.yticks(fontsize=16)
+    plt.ylim((ymin-yoverhang, ymax+yoverhang))
+    plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), fontsize=14, frameon=False)
+    plt.tight_layout(rect=[0, 0, 0.75, 1])
+
+    if save_dir is not None:
+        ensure_dir(save_dir)
+        fname = os.path.join(save_dir, "{}_flops.png".format(sanitize_filename(kernel)))
+        plt.savefig(fname, dpi=200)
+        print("Saved plot to {}".format(fname))
+    if IN_NOTEBOOK:
+        plt.show()
+    else:
+        plt.close()
+
+# =========================
+# Plotting for a single kernel (Bandwidth)
+# =========================
+
+def plot_kernel_bandwidth(
+    df: pd.DataFrame,
+    kernel: str,
+    k: int = 5,
+    eps: float = 0.1,
+    w: int = 3,
+    save_dir: Optional[str] = None,
+):
+    if BANDWIDTH_COL not in df.columns:
+        print(f"[INFO] No '{BANDWIDTH_COL}' column for kernel '{kernel}', skipping bandwidth plot.")
+        return
+
+    variants = df[VARIANT_TUNING_COL].unique()
+
+    plt.figure(figsize=(18, 7))
+    colors = plt.cm.tab10.colors
+
+    ymin = 1e99
+    ymax = -1e99
+
+    for idx, variant in enumerate(variants):
+        subdf = df[df[VARIANT_TUNING_COL] == variant].copy()
+        subdf = subdf.sort_values(PROBLEM_SIZE_COL)
+        x = subdf[PROBLEM_SIZE_COL].astype(float).values
+        y_bw = subdf[BANDWIDTH_COL].astype(float).values
+        y_bw_smooth = subdf[SMOOTH_BW_COL].astype(float).values
+
+        if len(x) == 0:
+            continue
+
+        y_bw_min = min(y_bw)
+        if (y_bw_min < ymin):
+            ymin = y_bw_min
+        y_bw_smooth_min = min(y_bw_smooth)
+        if (y_bw_smooth_min < ymin):
+            ymin = y_bw_smooth_min
+
+        y_bw_max = max(y_bw)
+        if (y_bw_max > ymax):
+            ymax = y_bw_max
+        y_bw_smooth_max = max(y_bw_smooth)
+        if (y_bw_smooth_max > ymax):
+            ymax = y_bw_smooth_max
+
+        plt.plot(
+            x, y_bw, "-",
+            label=f"{variant} (raw B/W)",
+            color=colors[idx % len(colors)],
+            marker="o",
+            markersize=8,
+            markerfacecolor=colors[idx % len(colors)],
+            markeredgewidth=0,
+        )
+
+        plt.plot(
+            x, y_bw_smooth, "--",
+            label=f"{variant} (smoothed B/W)",
+            linewidth=3,
+            color=colors[idx % len(colors)],
+            marker="+",
+            markersize=10
+        )
+
+        if SAT_IDX_COL in subdf.columns:
+            sat_idx = subdf[SAT_IDX_COL].astype(int).values[0]
+
+            sat_x = x[sat_idx]
+            sat_y = y_bw[sat_idx]
+
+            plt.plot(
+                [sat_x], [sat_y], "-",
+                label="{} saturation ({}, {})".format(variant, sat_x, sat_y),
+                linewidth=0,
+                color=colors[idx % len(colors)],
+                marker="*",
+                markersize=30
+            )
+
+    if ymin > 0:
+        ymin = 0
+    yrange = ymax-ymin
+    yoverhang = yrange*0.1
+
+    plt.title(f"Kernel: {kernel} - Bandwidth", fontsize=22)
+    plt.xlabel("Problem size", fontsize=18)
+    plt.ylabel("Bandwidth (GiB per sec.)", fontsize=18)
+    plt.grid(True, which="both", linestyle="--", linewidth=1)
+    plt.xticks(fontsize=16)
+    plt.yticks(fontsize=16)
+    plt.ylim((ymin-yoverhang, ymax+yoverhang))
+    plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1), fontsize=14, frameon=False)
+    plt.tight_layout(rect=[0, 0, 0.75, 1])
+
+    if save_dir is not None:
+        ensure_dir(save_dir)
+        fname = os.path.join(save_dir, "{}_bandwidth.png".format(sanitize_filename(kernel)))
+        plt.savefig(fname, dpi=200)
+        print(f"Saved bandwidth plot to {fname}")
+    if IN_NOTEBOOK:
+        plt.show()
+    else:
+        plt.close()
+
+# =========================
+# Smooth and plot all kernels (FLOPs and Bandwidth)
+# =========================
+
+def smooth_and_plot_all_kernels(
+    df: pd.DataFrame,
+    k: int = 5,
+    eps: float = 0.1,
+    w: int = 3,
+    save_dir: Optional[str] = None,
+) -> pd.DataFrame:
+    df = df.dropna(subset=[KERNEL_COL, PROBLEM_SIZE_COL, RAW_FLOPS_COL, VARIANT_TUNING_COL]).copy()
+    df[SMOOTH_FLOPS_COL] = np.nan
+
+    if BANDWIDTH_COL in df.columns:
+        df[SMOOTH_BW_COL] = np.nan
+
+    kernels = df[KERNEL_COL].unique()
+    print("Found {} kernels.".format(len(kernels)))
+
+    for kernel in kernels:
+        tmp = df[df[KERNEL_COL] == kernel].copy()
+
+        find_saturation_kernel(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir)
+        if SMOOTH_FLOPS_COL in tmp.columns:
+            df.loc[df[KERNEL_COL] == kernel, SMOOTH_FLOPS_COL] = tmp[SMOOTH_FLOPS_COL]
+        if SMOOTH_BW_COL in tmp.columns:
+            df.loc[df[KERNEL_COL] == kernel, SMOOTH_BW_COL] = tmp[SMOOTH_BW_COL]
+
+        plot_kernel(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir)
+
+        if BANDWIDTH_COL in df.columns:
+            plot_kernel_bandwidth(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir)
+
+    return df
+
+# =========================
+# Per-kernel tables (raw and smoothed)
+# =========================
+
+def save_kernel_tables(
+    df: pd.DataFrame,
+    outdir: str = "kernel_tables",
+) -> None:
+    ensure_dir(outdir)
+    kernels = df[KERNEL_COL].unique()
+    for kernel in kernels:
+        df_kernel = df[df[KERNEL_COL] == kernel]
+        raw_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=RAW_FLOPS_COL,
+        )
+        raw_table = raw_table.sort_index()
+        raw_csv_path = os.path.join(outdir, "{}_flops_raw.csv".format(sanitize_filename(kernel)))
+        raw_table.to_csv(raw_csv_path)
+        smooth_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=SMOOTH_FLOPS_COL,
+        )
+        smooth_table = smooth_table.sort_index()
+        smooth_csv_path = os.path.join(outdir, "{}_flops_smoothed.csv".format(sanitize_filename(kernel)))
+        smooth_table.to_csv(smooth_csv_path)
+
+        if BANDWIDTH_COL in df_kernel.columns and SMOOTH_BW_COL in df_kernel.columns:
+            bw_raw_table = df_kernel.pivot_table(
+                index=PROBLEM_SIZE_COL,
+                columns=VARIANT_TUNING_COL,
+                values=BANDWIDTH_COL,
+            ).sort_index()
+            bw_raw_csv_path = os.path.join(outdir, "{}_bandwidth_raw.csv".format(sanitize_filename(kernel)))
+            bw_raw_table.to_csv(bw_raw_csv_path)
+
+            bw_smooth_table = df_kernel.pivot_table(
+                index=PROBLEM_SIZE_COL,
+                columns=VARIANT_TUNING_COL,
+                values=SMOOTH_BW_COL,
+            ).sort_index()
+            bw_smooth_csv_path = os.path.join(outdir, "{}_bandwidth_smoothed.csv".format(sanitize_filename(kernel)))
+            bw_smooth_table.to_csv(bw_smooth_csv_path)
+
+            print("Saved: {}, {}, {}, {}".format(
+                raw_csv_path, smooth_csv_path, bw_raw_csv_path, bw_smooth_csv_path))
+        else:
+            print("Saved: {}, {}".format(raw_csv_path, smooth_csv_path))
+
+# =========================
+# Per-kernel saturation curve data (raw + smoothed in one file)
+# =========================
+
+def save_saturation_curve_data(
+    df: pd.DataFrame,
+    outdir: str = "saturation-curve-data",
+) -> None:
+    ensure_dir(outdir)
+    kernels = df[KERNEL_COL].unique()
+    for kernel in kernels:
+        df_kernel = df[df[KERNEL_COL] == kernel].copy()
+        raw_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=RAW_FLOPS_COL,
+        )
+        smooth_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=SMOOTH_FLOPS_COL,
+        )
+        raw_table = raw_table.sort_index()
+        smooth_table = smooth_table.reindex(
+            index=raw_table.index,
+            columns=raw_table.columns,
+        )
+        combined = pd.DataFrame(index=raw_table.index)
+        for vt in raw_table.columns:
+            raw_col_name = f"{vt} (raw)"
+            smooth_col_name = f"{vt} (smoothed)"
+            combined[raw_col_name] = raw_table[vt]
+            combined[smooth_col_name] = smooth_table[vt]
+
+        if BANDWIDTH_COL in df_kernel.columns and SMOOTH_BW_COL in df_kernel.columns:
+            bw_raw_table = df_kernel.pivot_table(
+                index=PROBLEM_SIZE_COL,
+                columns=VARIANT_TUNING_COL,
+                values=BANDWIDTH_COL,
+            ).sort_index()
+            bw_smooth_table = df_kernel.pivot_table(
+                index=PROBLEM_SIZE_COL,
+                columns=VARIANT_TUNING_COL,
+                values=SMOOTH_BW_COL,
+            ).sort_index()
+            bw_smooth_table = bw_smooth_table.reindex(
+                index=bw_raw_table.index,
+                columns=bw_raw_table.columns,
+            )
+            for vt in bw_raw_table.columns:
+                bw_raw_col_name = f"{vt} (raw B/W)"
+                bw_smooth_col_name = f"{vt} (smoothed B/W)"
+                combined[bw_raw_col_name] = bw_raw_table[vt]
+                combined[bw_smooth_col_name] = bw_smooth_table[vt]
+
+        combined = combined.reset_index().rename(columns={PROBLEM_SIZE_COL: "Problem size"})
+        out_path = os.path.join(outdir, f"{sanitize_filename(kernel)}.csv")
+        combined.to_csv(out_path, index=False)
+        print(f"Saved saturation curve data: {out_path}")
+
+# =========================
+# Per-kernel FOM tables (saturation points, with units, NO Sat Smoothed B/W)
+# =========================
+
+def save_fom_tables(
+    df: pd.DataFrame,
+    eps: float = 0.1,
+    w: int = 3,
+    outdir: str = "FOM"
+) -> None:
+    ensure_dir(outdir)
+    kernels = df[KERNEL_COL].unique()
+    for kernel in kernels:
+        df_kernel = df[df[KERNEL_COL] == kernel].copy()
+        variant_tunings = df_kernel[VARIANT_TUNING_COL].unique()
+        rows = []
+        for vt in variant_tunings:
+            subdf = df_kernel[df_kernel[VARIANT_TUNING_COL] == vt].copy()
+            subdf = subdf.sort_values(PROBLEM_SIZE_COL)
+            x = subdf[PROBLEM_SIZE_COL].astype(float).values
+            y_smooth = subdf[SMOOTH_FLOPS_COL].astype(float).values
+            sat_size = None
+            sat_flops_raw = ""
+            sat_bw = ""
+
+            sat_idx = find_saturation_point(x, y_smooth, eps, w)
+
+            if sat_idx is not None:
+                sat_size = x[sat_idx]
+                mask = (subdf[PROBLEM_SIZE_COL] == sat_size)
+                raw_at_sat = subdf.loc[mask, RAW_FLOPS_COL]
+                if not raw_at_sat.empty and pd.notna(raw_at_sat.iloc[0]):
+                    sat_flops_raw = raw_at_sat.iloc[0]
+                if BANDWIDTH_COL in subdf.columns:
+                    bw_at_sat = subdf.loc[mask, BANDWIDTH_COL]
+                    bw_non_nan = bw_at_sat.dropna()
+                    if not bw_non_nan.empty:
+                        sat_bw = bw_non_nan.iloc[0]
+                    else:
+                        sat_bw = ""
+                else:
+                    print(
+                        "[WARN] Bandwidth column missing for kernel '{}' variant '{}' "
+                        "while computing saturation".format(kernel, vt)
+                    )
+                    sat_bw = ""
+            else:
+                print(
+                    "[INFO] No saturation point found for kernel '{}' variant '{}' "
+                    "when building FOM table".format(kernel, vt)
+                )
+
+            def na_if_empty(value):
+                return value if value not in [None, ""] else "N/A"
+
+            rows.append({
+                "Kernel": f"{kernel}-{vt}",
+                "Sat Problem Size": na_if_empty(sat_size),
+                "Sat GFLOP/s": na_if_empty(sat_flops_raw),
+                "Sat B/W (GiB per sec.)": na_if_empty(sat_bw),
+            })
+        out_path = os.path.join(outdir, f"{sanitize_filename(kernel)}.csv")
+        pd.DataFrame(rows).to_csv(out_path, index=False)
+        print(f"Saved FOM table: {out_path}")
+
+# =========================
+# Main pipeline: callable from notebook or CLI
+# =========================
+
+def run_pipeline(
+    root_dir: str = ".",
+    output_dir: str = "./output",
+    glob_patterns: Optional[List[str]] = None,
+    kernel_whitelist: Optional[List[str]] = None,
+    smooth_window: int = 5,
+    saturation_eps: float = 0.1,
+    saturation_w: int = 3,
+) -> Optional[pd.DataFrame]:
+    if glob_patterns is None:
+        glob_patterns = ["**/*factor*kernel-run-data.csv"]
+
+    ensure_dir(output_dir)
+    fig_dir = os.path.join(output_dir, "figures")
+    ensure_dir(fig_dir)
+
+    combined_df = build_combined_table(
+        root_dir=root_dir,
+        glob_patterns=glob_patterns,
+        kernel_whitelist=kernel_whitelist,
+        verbose=True,
+    )
+    if combined_df is None:
+        print("No combined table created.")
+        return None
+
+    combined_csv_path = os.path.join(output_dir, "combined_table.csv")
+    combined_df.to_csv(combined_csv_path, index=False)
+    print("[SAVE] Combined table saved to {}".format(combined_csv_path))
+
+    df_smoothed = smooth_and_plot_all_kernels(
+        combined_df,
+        k=smooth_window,
+        eps=saturation_eps,
+        w=saturation_w,
+        save_dir=fig_dir,
+    )
+
+    output_variant_tuning_path = os.path.join(output_dir, "output_with_variant_tuning.csv")
+    df_smoothed.to_csv(output_variant_tuning_path, index=False)
+    print("[SAVE] Smoothed combined table saved to {}".format(output_variant_tuning_path))
+
+    save_kernel_tables(df_smoothed, outdir=output_dir)
+    saturation_dir = os.path.join(output_dir, "saturation-curve-data")
+    save_saturation_curve_data(df_smoothed, outdir=saturation_dir)
+    fom_dir = os.path.join(output_dir, "FOM")
+    save_fom_tables(df_smoothed, saturation_eps, saturation_w, outdir=fom_dir)
+
+    return df_smoothed
+
+# =========================
+# CLI entry point
+# =========================
+
+def main_cli():
+    parser = argparse.ArgumentParser(
+        description="Combine benchmark CSVs, smooth FLOPs/Bandwidth and generate plots/tables."
+    )
+    parser.add_argument("--root-dir", default=".", help="Root directory to search for CSV files (default: '.')")
+    parser.add_argument("--output-dir", default="./output", help="Directory to write outputs (default: './output')")
+    parser.add_argument(
+        "--glob-pattern",
+        action="append",
+        default=None,
+        help="Glob pattern for CSV files (can be repeated). Default: **/*factor*kernel-run-data.csv",
+    )
+    parser.add_argument(
+        "--kernel-whitelist",
+        nargs="*",
+        default=None,
+        help="List of substrings to filter Kernel names by (case insensitive, default: none).",
+    )
+    parser.add_argument("--smooth-window", type=int, default=5, help="Moving median window size (default: '5')")
+    parser.add_argument("--saturation-eps", type=float, default=0.1, help="Epsilon for saturation threshold (default: '0.1')")
+    parser.add_argument("--saturation-w", type=int, default=3, help="Consecutive points needed for saturation (default: '3')")
+
+    args = parser.parse_args()
+
+    run_pipeline(
+        root_dir=args.root_dir,
+        output_dir=args.output_dir,
+        glob_patterns=args.glob_pattern,
+        kernel_whitelist=args.kernel_whitelist,
+        smooth_window=args.smooth_window,
+        saturation_eps=args.saturation_eps,
+        saturation_w=args.saturation_w,
+    )
+
+# =========================
+# Notebook helper (no argparse)
+# =========================
+
+def main_notebook(
+    root_dir: str = ".",
+    output_dir: str = "./output",
+    glob_patterns: Optional[List[str]] = None,
+    kernel_whitelist: Optional[List[str]] = None,
+    smooth_window: int = 5,
+    saturation_eps: float = 0.1,
+    saturation_w: int = 3,
+):
+    return run_pipeline(
+        root_dir=root_dir,
+        output_dir=output_dir,
+        glob_patterns=glob_patterns,
+        kernel_whitelist=kernel_whitelist,
+        smooth_window=smooth_window,
+        saturation_eps=saturation_eps,
+        saturation_w=saturation_w,
+    )
+
+if __name__ == "__main__":
+    if not IN_NOTEBOOK:
+        main_cli()
diff --git a/scripts/benchmarking/run_tier1_cpx-mode.sh b/scripts/benchmarking/run_tier1_cpx-mode.sh
new file mode 100755
index 000000000..9fb0f3db7
--- /dev/null
+++ b/scripts/benchmarking/run_tier1_cpx-mode.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+## Run all 'Priority 1' benchmark kernels for GPU, non-lambda variants only
+## on 4 MPI ranks and dump the results in the specified directory.
+
+flux alloc -xN1 --amd-gpumode=CPX -t 45 bash -c '
+
+OUTDIR=RPBenchmarkTestMPI_tier1-CPX
+
+# Reference memory footprint for SPX mode (MI300A)
+# 256MB MALL on MI300A --> 256 * 1024 * 1024 = 268435456 bytes
+#BASEMEM=268435456
+
+# Reference memory footprint for CPX mode (MI300A)
+# 256MB MALL on MI300A --> 256 * 1024 * 1024 / 6 = 44739242 bytes
+BASEMEM=44739242
+
+# Collection of problem size factors to run
+FACTORS=(1 2 4 8 10 12 16 20)
+
+# List of kernels to run
+KERNELS=("DIFFUSION3DPA"
+         "EDGE3D"
+         "ENERGY"
+         "INTSC_HEXRECT"
+         "MASS3DEA"
+         "MASS3DPA_ATOMIC"
+         "MASSVEC3DPA"
+         "MATVEC_3D_STENCIL"
+         "NODAL_ACCUMULATION_3D"
+         "VOL3D")
+ 
+for KERNEL_NAME in "${KERNELS[@]}"; do
+    echo "Running kernel: $KERNEL_NAME"
+    for factor in "${FACTORS[@]}"; do
+        mem=$((factor * BASEMEM))
+        echo "  Running with memory = $mem"
+        flux run -xN1 -n4 ./bin/raja-perf.exe \
+            -k "$KERNEL_NAME" \
+            --npasses 1 \
+            --npasses-combiners Average Minimum Maximum \
+            --outdir ${OUTDIR} \
+            --outfile "${KERNEL_NAME}_factor_${factor}" \
+            --memory-allocated "$mem" \
+            --warmup-perfrun-same \
+            -ev Seq Lambda
+    done
+done
+'
+
diff --git a/scripts/benchmarking/run_tier1_spx-mode.sh b/scripts/benchmarking/run_tier1_spx-mode.sh
new file mode 100755
index 000000000..e16f06534
--- /dev/null
+++ b/scripts/benchmarking/run_tier1_spx-mode.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+## Run all 'Priority 1' benchmark kernels for GPU, non-lambda variants only
+## on 4 MPI ranks and dump the results in the specified directory.
+
+flux alloc -xN1 -t 45 bash -c '
+
+OUTDIR=RPBenchmarkTestMPI_tier1-SPX
+
+# Reference memory footprint for SPX mode (MI300A)
+# 256MB MALL on MI300A --> 256 * 1024 * 1024 = 268435456 bytes
+BASEMEM=268435456
+
+# Reference memory footprint for CPX mode (MI300A)
+# 256MB MALL on MI300A --> 256 * 1024 * 1024 / 6 = 44739242 bytes
+#BASEMEM=44739242
+
+# Collection of problem size factors to run
+FACTORS=(1 2 4 8 10 12 16 20)
+
+# List of kernels to run
+KERNELS=("DIFFUSION3DPA"
+         "EDGE3D"
+         "ENERGY"
+         "INTSC_HEXRECT"
+         "MASS3DEA"
+         "MASS3DPA_ATOMIC"
+         "MASSVEC3DPA"
+         "MATVEC_3D_STENCIL"
+         "NODAL_ACCUMULATION_3D"
+         "VOL3D")
+ 
+for KERNEL_NAME in "${KERNELS[@]}"; do
+    echo "Running kernel: $KERNEL_NAME"
+    for factor in "${FACTORS[@]}"; do
+        mem=$((factor * BASEMEM))
+        echo "  Running with memory = $mem"
+        flux run -xN1 -n4 ./bin/raja-perf.exe \
+            -k "$KERNEL_NAME" \
+            --npasses 1 \
+            --npasses-combiners Average Minimum Maximum \
+            --outdir ${OUTDIR} \
+            --outfile "${KERNEL_NAME}_factor_${factor}" \
+            --memory-allocated "$mem" \
+            --warmup-perfrun-same \
+            -ev Seq Lambda
+    done
+done
+'
+
diff --git a/scripts/benchmarking/study_run_kernel_tunings.py b/scripts/benchmarking/study_run_kernel_tunings.py
new file mode 100755
index 000000000..73a0d0d03
--- /dev/null
+++ b/scripts/benchmarking/study_run_kernel_tunings.py
@@ -0,0 +1,537 @@
+import os
+import glob
+import numpy as np
+import pandas as pd
+import argparse
+
+# ============= Configuration =============
+
+ROOT_DIR = "."  # change if needed
+
+# Use "factor" instead of "mref" in file patterns
+GLOB_PATTERNS = [
+    "**/*factor*kernel-run-data.csv",              # broad match
+]
+
+# Optional filter to only keep specific kernels by substring match (case-insensitive)
+# Leave empty to include all kernels discovered.
+KERNEL_WHITELIST = [
+    # "MASS3DPA",
+]
+
+# Derivative reporting configuration
+DERIV_USE_RELATIVE = True
+DERIV_EPS_REL = 0.03          # relative threshold on |dy/dx| normalized by (y_range/x_range)
+DERIV_EPS_ABS = 1e-4          # absolute threshold on |dy/dx|, only used if DERIV_USE_RELATIVE=False
+DERIV_MIN_CONSEC = 3          # minimum consecutive points below threshold to consider a plateau run
+DERIV_SMOOTH_WINDOW = 3       # moving average window for smoothing y before derivative
+DERIV_MIN_FRAC_OF_MAX_Y = 0.9 # only search after reaching this fraction of max(y)
+DERIV_REPORT_MAX_POINTS = 8   # limit how many points to print per series
+DERIV_REPORT_ABS = True       # print |dy/dx| if True, else print signed dy/dx
+
+
+FIG_DPI = 300
+SHOW_PLOTS = True  # show interactive windows while also saving PNGs
+
+# Use a non-interactive backend only when not showing plots
+if not SHOW_PLOTS:
+    import matplotlib
+    matplotlib.use("Agg")
+
+import matplotlib.pyplot as plt
+
+
+# ============= Helper functions =============
+
+def ensure_dir(path):
+    os.makedirs(path, exist_ok=True)
+
+def sanitize_filename(text):
+    return "".join(c if c.isalnum() or c in "-_." else "_" for c in str(text))
+
+def find_csv_files(root_dir, patterns):
+    """Recursively find CSV files matching any of the given patterns."""
+    all_files = []
+    for pattern in patterns:
+        search_pattern = os.path.join(root_dir, pattern)
+        files = glob.glob(search_pattern, recursive=True)
+        all_files.extend(files)
+    all_files = sorted(set(all_files))  # Remove duplicates and sort
+    return all_files
+
+def _likely_header_score(line):
+    """
+    Score a potential header line based on presence of common column tokens.
+    Higher is more likely to be the header.
+    """
+    tokens = [
+        "Kernel",
+        "Variant",
+        "Problem size",
+        "Problem Size",
+        "Mean flops",
+        "GFlop",
+        "GFLOP",
+        "GFLOPs",
+        "GFLOPS",
+    ]
+    score = 0
+    for t in tokens:
+        if t in line:
+            score += 1
+    return score
+
+def read_single_csv(path):
+    """
+    Read one CSV, trying to detect the header row by locating a line
+    that contains key column names. Returns a DataFrame or None.
+    """
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+    except Exception as e:
+        print(f"Failed to read {path}: {e}")
+        return None
+
+    header_idx = None
+    best_score = -1
+    for i, line in enumerate(lines[:50]):  # only inspect the first 50 lines
+        score = _likely_header_score(line)
+        if score > best_score:
+            best_score = score
+            header_idx = i
+
+    if header_idx is None:
+        print(f"Could not find header in {path}, skipping.")
+        return None
+
+    try:
+        df = pd.read_csv(path, header=header_idx)
+    except Exception as e:
+        print(f"Failed to parse CSV {path}: {e}")
+        return None
+
+    df["__source_file__"] = path
+    return df
+
+def normalize_columns(df):
+    """
+    Normalize common column names to a standard set if possible.
+    """
+    candidates = {
+        # Standard name : possible variants
+        "Kernel": ["Kernel", "Kernel name", "Benchmark", "Test"],
+        "Variant": ["Variant", "Implementation", "Policy", "Config", "Backend", "Suite"],
+        "Problem size": [
+            "Problem size", "Problem Size", "Size", "N", "DOF", "Elements",
+            "ProblemSize", "Problem-size"
+        ],
+        "Mean flops (gigaFLOP per sec.)": [
+            "Mean flops (gigaFLOP per sec.)",
+            "Mean flops (GFlop/s)",
+            "Mean Flops (GFlop/s)",
+            "GFLOP/s", "GFLOPs/s", "GFLOPS", "GFlops/s", "GFlop/s", "GF/s",
+            "Mean GFLOP/s", "Mean GFLOPs/s"
+        ],
+    }
+
+    new_col_map = {}
+    # strip whitespace from existing columns first
+    df = df.rename(columns={c: c.strip() for c in df.columns})
+
+    for standard_name, names in candidates.items():
+        for c in names:
+            if c in df.columns:
+                new_col_map[c] = standard_name
+                break  # first match wins
+
+    df = df.rename(columns=new_col_map)
+    return df
+
+def _moving_average(y, window):
+    if window is None or window <= 1 or len(y) < 3:
+        return y
+    window = max(2, int(window))
+    kernel = np.ones(window, dtype=float) / float(window)
+    return np.convolve(y, kernel, mode="same")
+
+def _find_first_run(mask, min_len):
+    """Return the start index and run length of the first run of True with length >= min_len."""
+    run = 0
+    for i, v in enumerate(mask):
+        if v:
+            run += 1
+            if run >= min_len:
+                start = i - run + 1
+                j = i + 1
+                while j < len(mask) and mask[j]:
+                    j += 1
+                return start, j - start
+        else:
+            run = 0
+    return None, 0
+
+def classify_backend_from_variant(variant):
+    """
+    Heuristic classification of backend based on the Variant string.
+    Captures common cases even if names do not end with specific suffixes.
+    """
+    s = str(variant).strip()
+    low = s.lower()
+    if "hip" in low:
+        return "HIP"
+    if "cuda" in low:
+        return "CUDA"
+    if "openmp" in low or low.endswith("_omp") or " omp" in low or low.startswith("omp"):
+        return "OpenMP"
+    if "seq" in low or "serial" in low or "baseline" in low or "sequential" in low:
+        return "Seq"
+    return "Unknown"
+
+# NEW: classify tuning; adjust logic to match your actual naming scheme
+def classify_tuning(row):
+    """
+    Return a tuning label for a row.
+    You can customize this to use any available columns.
+    Examples:
+      - A dedicated 'Tuning' column
+      - Parsing 'Variant' into backend + tuning
+      - Using problem-size or factor strings
+    For now, use:
+      - If a 'Tuning' column exists, use that
+      - Else, if '__source_file__' used different settings, use its basename
+      - Else, return 'default'
+    """
+    # If the CSV already has a Tuning column, use it
+    if "Tuning" in row and pd.notna(row["Tuning"]):
+        return str(row["Tuning"]).strip()
+
+    # Otherwise, derive from source file name as a proxy for tuning
+    src = row.get("__source_file__", "")
+    if isinstance(src, str) and src:
+        return os.path.basename(src)
+
+    return "default"
+
+def report_near_zero_derivative_points(
+    x,
+    y,
+    backend_label,
+    kernel,
+    variant,
+    tuning_label,
+    use_relative=DERIV_USE_RELATIVE,
+    eps_rel=DERIV_EPS_REL,
+    eps_abs=DERIV_EPS_ABS,
+    min_consecutive=DERIV_MIN_CONSEC,
+    smooth_window=DERIV_SMOOTH_WINDOW,
+    min_frac_of_max_y=DERIV_MIN_FRAC_OF_MAX_Y,
+    max_points=DERIV_REPORT_MAX_POINTS,
+    report_abs=DERIV_REPORT_ABS,
+):
+    """
+    Prints lines "Problem size=<x>, dy/dx=<val>" for points with small enough derivative.
+    Uses either a relative threshold or an absolute slope threshold.
+    Focuses on the near-peak region to avoid early flat areas.
+    """
+    x = np.asarray(x, dtype=float)
+    y = np.asarray(y, dtype=float)
+
+    # Aggregate duplicate x values by averaging y
+    if len(x) != len(np.unique(x)):
+        tmp = pd.DataFrame({"x": x, "y": y}).groupby("x", as_index=False)["y"].mean()
+        x = tmp["x"].values
+        y = tmp["y"].values
+
+    # Sort by x
+    order = np.argsort(x)
+    x = x[order]
+    y = y[order]
+
+    if len(x) < max(3, min_consecutive):
+        print(
+            f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: not enough points for derivative analysis"
+        )
+        return
+
+    # Optional smoothing
+    y_sm = _moving_average(y, smooth_window)
+
+    x_range = float(x.max() - x.min())
+    if x_range == 0.0:
+        print(
+            f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: zero x-range, cannot compute derivative"
+        )
+        return
+
+    deriv = np.gradient(y_sm, x)  # dy/dx, same length as x
+
+    # Restrict to near-peak region if requested
+    search_mask = np.ones_like(deriv, dtype=bool)
+    y_range = float(y_sm.max() - y_sm.min())
+    if min_frac_of_max_y is not None and 0.0 < min_frac_of_max_y < 1.0 and y_range > 0:
+        thresh_y = y_sm.max() * float(min_frac_of_max_y)
+        search_mask = y_sm >= thresh_y
+
+    if use_relative:
+        # Normalize slope by typical scale y_range/x_range for a dimensionless measure
+        norm_factor = (y_range / x_range) if y_range > 0 else 1.0
+        deriv_norm = np.abs(deriv) / norm_factor
+        near_zero_mask = (deriv_norm <= float(eps_rel)) & search_mask
+    else:
+        near_zero_mask = (np.abs(deriv) <= float(eps_abs)) & search_mask
+
+    # Prefer the first sustained run of small derivatives
+    start_idx, run_len = _find_first_run(near_zero_mask, int(min_consecutive))
+
+    if start_idx is not None:
+        run_indices = np.arange(start_idx, start_idx + run_len)
+        # Downsample to at most max_points for readability
+        if len(run_indices) > max_points:
+            picks_rel = np.linspace(0, len(run_indices) - 1, num=max_points)
+            pick = run_indices[np.round(picks_rel).astype(int)]
+        else:
+            pick = run_indices
+        print(
+            f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: sustained near-zero derivative region found, points={run_len}"
+        )
+    else:
+        # Fallback: choose up to max_points with smallest slope in the search region
+        candidates = np.where(search_mask)[0]
+        if candidates.size == 0:
+            print(
+                f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: no valid search region for derivative analysis"
+            )
+            return
+
+        if use_relative:
+            norm_factor = (y_range / x_range) if y_range > 0 else 1.0
+            deriv_norm = np.abs(deriv) / (norm_factor if norm_factor > 0 else 1.0)
+            order_c = np.argsort(deriv_norm[candidates])
+        else:
+            order_c = np.argsort(np.abs(deriv[candidates]))
+        pick = candidates[order_c[:max_points]]
+        print(
+            f"[DERIV] Backend={backend_label}, Kernel={kernel}, Variant={variant}, Tuning={tuning_label}: no sustained plateau, showing {len(pick)} smallest-slope points"
+        )
+
+    # Ensure sorted by x before printing
+    pick = np.array(sorted(pick.tolist()))
+
+    # Print lines in the requested format
+    for idx in pick:
+        dy = deriv[idx]
+        dy_out = abs(dy) if report_abs else dy
+        print(f"  Problem size={x[idx]:.6g}, dy/dx={dy_out:.6g}")
+
+def plot_backend(df_backend, backend_label, fig_dir, show_plots, fig_dpi):
+    """
+    For a given backend:
+      - One figure per Kernel with solid line and markers per (Variant, Tuning).
+      - Prints derivative-based small-slope points per (Variant, Tuning).
+      - Saves each figure as PNG.
+      - Shows figures interactively if requested.
+    """
+    if df_backend.empty:
+        print(f"\nNo data for backend {backend_label}.")
+        return
+
+    kernels = sorted(df_backend["Kernel"].dropna().unique())
+
+    # Unique (Variant, Tuning) combos for color/marker assignments
+    vt_pairs = sorted(
+        df_backend[["Variant", "Tuning"]]
+        .dropna()
+        .drop_duplicates()
+        .itertuples(index=False, name=None)
+    )
+
+    cmap = plt.cm.tab20
+    num_pairs = max(1, len(vt_pairs))
+    colors = [cmap(i % cmap.N) for i in range(num_pairs)]
+    markers = ["o", "s", "^", "D", "v", ">", "<", "P", "X"]  # repeat if needed
+
+    style_map = {}
+    for idx, (variant, tuning) in enumerate(vt_pairs):
+        color = colors[idx % len(colors)]
+        marker = markers[idx % len(markers)]
+        style_map[(variant, tuning)] = (color, marker)
+
+    for kernel in kernels:
+        df_k = df_backend[df_backend["Kernel"] == kernel]
+        if df_k.empty:
+            continue
+
+        fig = plt.figure(figsize=(10, 6))
+
+        # group by Variant and Tuning to get separate curves
+        for (variant, tuning), g in df_k.groupby(["Variant", "Tuning"]):
+            g_sorted = g.sort_values("Problem size")
+            x = g_sorted["Problem size"].values
+            y = g_sorted["Mean flops (gigaFLOP per sec.)"].values
+
+            color, marker = style_map.get((variant, tuning), ("black", "o"))
+
+            label = f"{variant} | {tuning}"
+
+            plt.plot(
+                x,
+                y,
+                marker=marker,
+                linestyle="-",
+                color=color,
+                label=label,
+            )
+
+            # Derivative-based report of small-slope points
+            report_near_zero_derivative_points(
+                x,
+                y,
+                backend_label,
+                kernel,
+                variant,
+                tuning,
+                use_relative=DERIV_USE_RELATIVE,
+                eps_rel=DERIV_EPS_REL,
+                eps_abs=DERIV_EPS_ABS,
+                min_consecutive=DERIV_MIN_CONSEC,
+                smooth_window=DERIV_SMOOTH_WINDOW,
+                min_frac_of_max_y=DERIV_MIN_FRAC_OF_MAX_Y,
+                max_points=DERIV_REPORT_MAX_POINTS,
+                report_abs=DERIV_REPORT_ABS,
+            )
+
+        plt.xlabel("Problem size")
+        plt.ylabel("Mean flops (gigaFLOP per sec.)")
+
+        # TITLE MODIFIED: explicitly list backend
+        plt.title(f"Kernel: {kernel} | Backend: {backend_label}")
+
+        plt.grid(True)
+        plt.tight_layout()
+        plt.legend(fontsize="small", bbox_to_anchor=(1.05, 1), loc="upper left")
+
+        # Save figure as PNG
+        kernel_safe = sanitize_filename(kernel)
+        backend_safe = sanitize_filename(backend_label)
+        fname = f"{backend_safe}_Kernel-{kernel_safe}.png"
+        fig_path = os.path.join(fig_dir, fname)
+        plt.savefig(fig_path, dpi=fig_dpi, bbox_inches="tight")
+        print(f"[SAVE] Figure saved to: {fig_path}")
+
+        if show_plots:
+            plt.show()
+        else:
+            plt.close(fig)
+
+# ============= Main logic =============
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output-dir", required=True, help="Output directory")
+    args = parser.parse_args()
+    # Output and plotting configuration
+    OUTPUT_DIR = args.output_dir
+    FIG_DIR = os.path.join(OUTPUT_DIR, "figures")
+    COMBINED_CSV_PATH = os.path.join(OUTPUT_DIR, "combined_table.csv")
+    ensure_dir(OUTPUT_DIR)
+    ensure_dir(FIG_DIR)
+
+    files = find_csv_files(ROOT_DIR, GLOB_PATTERNS)
+    if not files:
+        print(f"No files matching patterns {GLOB_PATTERNS} found under '{ROOT_DIR}'")
+        return
+
+    print("Found CSV files:")
+    for f in files:
+        print("  ", f)
+
+    dfs = []
+    for path in files:
+        df = read_single_csv(path)
+        if df is None:
+            continue
+        df = normalize_columns(df)
+
+        # Verify required columns exist post-normalization, else report and skip
+        required_any = {"Kernel", "Variant", "Problem size", "Mean flops (gigaFLOP per sec.)"}
+        if not required_any.issubset(set(df.columns)):
+            print(f"[SKIP] {path} missing required columns after normalization.")
+            print("       Columns present:", list(df.columns))
+            continue
+
+        dfs.append(df)
+
+    if not dfs:
+        print("No CSV files could be parsed with required columns.")
+        return
+
+    combined_df = pd.concat(dfs, ignore_index=True)
+
+    # Basic cleaning
+    combined_df["Kernel"] = combined_df["Kernel"].astype(str).str.strip()
+    combined_df["Variant"] = combined_df["Variant"].astype(str).str.strip()
+
+    # Optional kernel filter
+    if KERNEL_WHITELIST:
+        wl = [w.lower() for w in KERNEL_WHITELIST]
+        combined_df = combined_df[
+            combined_df["Kernel"].str.lower().apply(lambda k: any(w in k for w in wl))
+        ]
+        if combined_df.empty:
+            print("After applying KERNEL_WHITELIST, no rows remain.")
+            return
+
+    # Convert numeric columns
+    combined_df["Problem size"] = pd.to_numeric(combined_df["Problem size"], errors="coerce")
+    combined_df["Mean flops (gigaFLOP per sec.)"] = pd.to_numeric(
+        combined_df["Mean flops (gigaFLOP per sec.)"], errors="coerce"
+    )
+
+    # Drop rows without x or y
+    before_drop = len(combined_df)
+    combined_df = combined_df.dropna(subset=["Problem size", "Mean flops (gigaFLOP per sec.)"])
+    dropped = before_drop - len(combined_df)
+    if dropped > 0:
+        print(f"[CLEAN] Dropped {dropped} rows with non-numeric Problem size or Mean flops.")
+
+    # Backend classification
+    combined_df["Backend"] = combined_df["Variant"].apply(classify_backend_from_variant)
+
+    # NEW: derive Tuning column
+    combined_df["Tuning"] = combined_df.apply(classify_tuning, axis=1)
+
+    # Save concatenated table to CSV, now including Backend and Tuning
+    ensure_dir(os.path.dirname(COMBINED_CSV_PATH))
+    combined_df.to_csv(COMBINED_CSV_PATH, index=False)
+    print(f"[SAVE] Combined table saved to: {COMBINED_CSV_PATH}")
+
+    # Quick summary to help verify MASS3DPA is present
+    print("\nKernels discovered:")
+    print(sorted(combined_df["Kernel"].unique()))
+
+    print("\nCounts by Kernel and Backend:")
+    summary = (
+        combined_df.groupby(["Kernel", "Backend"])
+        .size()
+        .reset_index(name="rows")
+        .sort_values(["Kernel", "Backend"])
+    )
+    for _, row in summary.iterrows():
+        print(f"  Kernel={row['Kernel']}, Backend={row['Backend']}: rows={row['rows']}")
+
+    # Plot aggregated "All" view so kernels appear even if backend classification is Unknown
+    print("\n[Plot] Generating 'All' plots per kernel...")
+    plot_backend(combined_df, "All", FIG_DIR, SHOW_PLOTS, FIG_DPI)
+
+    # Plot per requested backends, only if data exists
+    for b in ["CUDA", "HIP", "Seq", "OpenMP"]:
+        df_b = combined_df[combined_df["Backend"] == b]
+        if df_b.empty:
+            print(f"[Plot] Skipping backend {b}, no rows.")
+            continue
+        print(f"\n[Plot] Generating '{b}' plots per kernel...")
+        plot_backend(df_b, b, FIG_DIR, SHOW_PLOTS, FIG_DPI)
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/benchmarking/study_saturation.py b/scripts/benchmarking/study_saturation.py
new file mode 100644
index 000000000..a7dce682a
--- /dev/null
+++ b/scripts/benchmarking/study_saturation.py
@@ -0,0 +1,191 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+import statistics
+import os
+import argparse
+
+
+SMOOTH_FLOPS_COL_NAME="Smoothed Mean flops (gigaFLOP per sec.)"
+RAW_FLOPS_COL = 'Mean flops (gigaFLOP per sec.)'
+SMOOTH_FLOPS_COL = 'Smoothed Mean flops (gigaFLOP per sec.)'
+PROBLEM_SIZE_COL = 'Problem size'
+VARIANT_TUNING_COL = 'Variant_Tuning'
+KERNEL_COL = 'Kernel'
+
+def moving_median_smooth(y, k=5):
+    """
+    Apply a moving median filter with window size k.
+    Returns a list of smoothed values.
+    """
+    n = len(y)
+    m = (k - 1) // 2
+    y_smooth = []
+    for i in range(n):
+        start = max(0, i - m)
+        end = min(n - 1, i + m)
+        window = y[start:end + 1]
+        y_smooth.append(statistics.median(window))
+    return y_smooth
+
+def find_saturation_point(x, y_smooth, eps=0.1, w=3):
+    """
+    Find the smallest x_i where y_smooth stays above (1-eps)*y_max for w consecutive points.
+    Returns the corresponding x value or None if not found.
+    """
+    y_max = max(y_smooth)
+    threshold = (1.0 - eps) * y_max
+    n = len(y_smooth)
+    run_length = 0
+    run_start_idx = None
+    for i in range(n):
+        if y_smooth[i] >= threshold:
+            if run_length == 0:
+                run_start_idx = i
+            run_length += 1
+            if run_length >= w:
+                return x[run_start_idx]
+        else:
+            run_length = 0
+            run_start_idx = None
+    return None
+
+def plot_kernel(df, kernel, k=5, eps=0.1, w=3, save_dir=None):
+    """
+    Plot throughput curves for each variant of a kernel.
+    Raw data: solid dots. Smoothed: dashed lines.
+    Wide, PowerPoint-friendly plot.
+    """
+    plt.figure(figsize=(18, 7))  # Wider plot
+    variants = df['Variant_Tuning'].unique()
+    report = []
+    colors = plt.cm.tab10.colors
+    for idx, variant in enumerate(variants):
+        subdf = df[df['Variant_Tuning'] == variant].copy()
+        subdf = subdf.sort_values('Problem size')
+        x = subdf['Problem size'].astype(float).values
+        y = subdf['Mean flops (gigaFLOP per sec.)'].astype(float).values
+        y_smooth = moving_median_smooth(list(y), k=k)
+        for xi, yi in zip(x, y_smooth):
+            df.loc[(df['Variant_Tuning'] == variant) & (df['Problem size'] == xi), SMOOTH_FLOPS_COL_NAME] = yi
+        # Raw data points: solid dots
+        plt.plot(
+            x, y, '-o', 
+            label=f"{variant} (raw)", 
+            markersize=8, 
+            color=colors[idx % len(colors)], 
+            markerfacecolor=colors[idx % len(colors)], 
+            markeredgewidth=0
+        )
+        # Smoothed curve: dashed line
+        plt.plot(
+            x, y_smooth, '--', 
+            label=f"{variant} (smoothed)", 
+            linewidth=3, 
+            color=colors[idx % len(colors)]
+        )
+        y_max = max(y_smooth)
+        sat_x = find_saturation_point(x, y_smooth, eps=eps, w=w)
+        report.append({
+            "Variant_Tuning": variant,
+            "y_max": y_max,
+            "saturation_x": sat_x
+        })
+
+    plt.title(f"Kernel: {kernel}", fontsize=22)
+    plt.xlabel("Problem size", fontsize=18)
+    plt.ylabel("Mean flops (gigaFLOP per sec.)", fontsize=18)
+    plt.grid(True, which='both', linestyle='--', linewidth=1)
+    plt.xticks(fontsize=16)
+    plt.yticks(fontsize=16)
+    plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1), fontsize=16, frameon=False)
+    plt.tight_layout(rect=[0, 0, 0.75, 1])
+
+    if save_dir:
+        import os
+        os.makedirs(save_dir, exist_ok=True)
+        fname = f"{save_dir}/{kernel}.png"
+        plt.savefig(fname, dpi=200)
+        print(f"Saved plot to {fname}")
+    plt.show()
+
+    print(f"Kernel: {kernel}")
+    print(f"eps: {eps}, w: {w}")
+    print("Variant_Tuning | Global max of smoothed curve (y_max) | Saturation point (x)")
+    for r in report:
+        sat_str = f"{r['saturation_x']:.2f}" if r['saturation_x'] is not None else "None"
+        print(f"{r['Variant_Tuning']:16} | {r['y_max']:.6g} | {sat_str}")
+
+def smooth_and_plot_all_kernels(df, k=5, eps=0.1, w=3, save_dir=None):
+    """
+    Load data and plot all kernels.
+    """
+    # Drop rows missing key columns
+    df = df.dropna(subset=['Kernel', 'Problem size', 'Mean flops (gigaFLOP per sec.)', 'Variant_Tuning'])
+    df[SMOOTH_FLOPS_COL_NAME] = np.nan
+    kernels = df['Kernel'].unique()
+    print(f"Found {len(kernels)} kernels.")
+    for kernel in kernels:
+        tmp = df[df['Kernel'] == kernel]
+        plot_kernel(tmp, kernel, k=k, eps=eps, w=w, save_dir=save_dir)
+        df.loc[df['Kernel'] == kernel, SMOOTH_FLOPS_COL_NAME] = tmp[SMOOTH_FLOPS_COL_NAME]
+        
+    return df
+    
+def save_kernel_tables(df, outdir='kernel_tables'):
+    os.makedirs(outdir, exist_ok=True)
+    kernels = df[KERNEL_COL].unique()
+    for kernel in kernels:
+        df_kernel = df[df[KERNEL_COL] == kernel]
+        
+        # Pivot for raw FLOP/s
+        raw_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=RAW_FLOPS_COL
+        )
+        raw_table = raw_table.sort_index()
+        raw_csv_path = os.path.join(outdir, f"{kernel}_raw.csv")
+        raw_table.to_csv(raw_csv_path)
+        
+        # Pivot for smoothed FLOP/s
+        smooth_table = df_kernel.pivot_table(
+            index=PROBLEM_SIZE_COL,
+            columns=VARIANT_TUNING_COL,
+            values=SMOOTH_FLOPS_COL
+        )
+        smooth_table = smooth_table.sort_index()
+        smooth_csv_path = os.path.join(outdir, f"{kernel}_smoothed.csv")
+        smooth_table.to_csv(smooth_csv_path)
+        
+        print(f"Saved: {raw_csv_path}, {smooth_csv_path}")
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--output-dir", required=True, help="Output directory")
+args = parser.parse_args()
+
+OUTPUT_DIR = args.output_dir
+FOM_DIR = os.path.join(OUTPUT_DIR )
+FIG_DIR = os.path.join(OUTPUT_DIR, "figures")
+COMBINED_CSV_PATH = os.path.join(OUTPUT_DIR, "combined_table.csv")
+OUTPUT_VARIANT_TUNING = os.path.join(OUTPUT_DIR, "output_with_variant_tuning.csv")
+
+# Assume 'df' is your DataFrame loaded from the text file
+# For example:
+df = pd.read_csv(COMBINED_CSV_PATH, delimiter=',')  # Adjust delimiter as needed
+
+# Create the new column
+df['Variant_Tuning'] = df['Variant'] + '-' + df['Tuning']
+
+# Example: Show the first few rows
+# print(df[['Variant', 'Tuning', 'Variant_Tuning']].head())
+
+# Calculate smoothed values, and plot them alongside raw values. 
+df = smooth_and_plot_all_kernels(df, k=5, eps=0.1, w=3, save_dir=FIG_DIR)
+df.to_csv(OUTPUT_VARIANT_TUNING, index=False)
+
+save_kernel_tables(df,outdir=OUTPUT_DIR)
+
+# Usage:
+# save_kernel_tables(df, outdir='my_output_dir')
diff --git a/scripts/benchmarking/workflow-todo.txt b/scripts/benchmarking/workflow-todo.txt
new file mode 100644
index 000000000..e8458e9a3
--- /dev/null
+++ b/scripts/benchmarking/workflow-todo.txt
@@ -0,0 +1,111 @@
+Desired Workflow and TODO List for Generating Baselines:
+Also a Description of What We Did and an Example for Vendors
+=================================================================
+
+Desired Workflow
+----------------
+
+1) Execute script to run kernels over pre-defined problem sizes
+   (a la throughput study) and generate data for execution timings, 
+   FLOP rates, mem B/W, etc.
+
+2) Execute script to process data and generate result files and plots:
+   a) File containing CSV table of relevant run data for all kernels run
+   b) Per-kernel file containing a CSV table of raw and smoothed throughput
+     curve data points (FLOP/s as a function of problem size)
+   c) FOM file containing a CSV table with a row for each kernel, variant, and
+     tuning run. Columns:
+     -- Kernel name (kernel-variant-tuning)
+     -- Saturation Problem Size (computed from smoothed throughput curve)
+     -- Saturation FLOP/s (raw FLOP/s at saturation problem size)
+     -- Saturation Mem B/W (mem B/W at saturation problem size)
+
+   
+
+Tasks to do
+-----------
+
+0a) We don't want a single script to run AND process data
+0b) We want one data processing script that generates all the files described
+    in 2 above.
+
+1) Run scripts
+   - These are easy. Via command-line args, we can generate per-kernel and
+     aggregated files containing all necessary data for the analyses we want.
+
+   - One script per architecture?
+   - Each script will take command line args for kernel subset to run
+     (tier1 or tier2) and run mode, if relevant (e.g., on ATS-4: SPX mode
+     and CPX mode).
+
+2) Data processing script
+
+   -- Script should take two args:
+      - arg1 input data directory (where date files from run live)
+      - arg2 output directory (where to put processed data files)
+
+   2a) The script 'study_run_kernel_tunings.py' can generate what we want for 
+       2a above (combined_table.csv file) 
+   2b) The script 'study_saturation.py' generates something close to what we
+       want for 2b above. It makes two files for each kernel: one with a CVS
+       table of raw throughput data points and one for smoothed curve data.
+
+       Script needs to be modified to put data for both curves in one file:
+
+       For example, KernelName-saturation-curve-data.csv contains
+ 
+       Problem Size, Variant1-Tuning1 (raw), Variant1-Tuning1 (smoothed), ...
+       <size 1>    , <FLOP/s>              , <FLOP/s>                   , ...
+       <size 2>    , <FLOP/s>              , <FLOP/s>                   , ...
+       <size 3>    , <FLOP/s>              , <FLOP/s>                   , ...
+       .....
+
+    2c) We need a script to generate a FOM file that contains a CSV table like
+        this:
+
+        Kernel               , Sat Problem Size, Sat FLOP/s   ,   Sat B/W
+        Name-Variant1-Tuning1, <sat size>      , <sat flop/s> ,   <sat b/w>
+        Name-Variant2-Tuning1, <sat size>      , <sat flop/s> ,   <sat b/w>
+        ......
+        Name-VariantN-TuningM, <sat size>      , <sat flop/s> ,   <sat b/w>
+        .....
+        Etc. for each kernel, variant, and tuning run.
+
+        Notes:
+          - Saturation problem size is determined from smoothed data 
+            (see algorithm above)
+          - Saturation FLOP/s and B/W are raw data values at the saturation
+            problem size
+
+
+Smoothed Throughput Curves
+--------------------------
+A method to approximate sauration point for noisy or non-monotonic
+throughput curves....
+
+Assume we have a set of x and y values:
+
+x0, x1, x2, ..., xN  (e.g. problem size)
+y0, y1, y2, ..., yN  (e.g. FLOP / sec)
+
+Step 1: Smooth curve via moving median method (robust to outliers,
+        a few extreme values do not affect the result much)
+
+Choose window size k (odd integer such as 3, 5, or 7)
+Define m = (k - 1) / 2
+
+Define smooth curve with same x values as above, but with
+y values y_smooth_i defined as:
+For i in 0..N:
+  window indices = { max(0, i-m), ..., min(N, i+m) }
+  window values = x[window indices]
+  y_smooth[i] = median(window_values)
+
+Step 2: Find global max of smoothed curve
+
+y_max = max(y_smooth[i]) i = 0..N
+
+Step 3: Approximate saturation point
+
+smallest x_i value where y_smooth[i] >= (1 - eps) * y_max
+for w consecutive i values; e.g., eps = 0.1 and w = 3