diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index b67f6fc7b..9a2c16247 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -56,7 +56,7 @@ jobs:
       - name: Run benchmark
         if: steps.changed-files.outputs.any_changed == 'true' || github.event_name == 'workflow_dispatch'
         run: |
-          tests/bench-aggregator.py
+          python3 tests/bench.py --json --quiet
 
       - name: Store benchmark results
         if: steps.changed-files.outputs.any_changed == 'true' || github.event_name == 'workflow_dispatch'
diff --git a/src/main.c b/src/main.c
index 1dcf97c85..509bd548b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -292,7 +292,7 @@ int main(int argc, char **args)
         .args_offset_size = ARGS_OFFSET_SIZE,
         .argc = prog_argc,
         .argv = prog_args,
-        .log_level = LOG_TRACE,
+        .log_level = LOG_WARN,
         .run_flag = run_flag,
         .profile_output_file = prof_out_file,
         .cycle_per_step = CYCLE_PER_STEP,
diff --git a/tests/bench-aggregator.py b/tests/bench-aggregator.py
deleted file mode 100755
index c82426711..000000000
--- a/tests/bench-aggregator.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python3
-
-import json
-import subprocess
-
-
-def run_benchmark(b):
-    interp = None
-    if "sh" in b:
-        interp = "bash"
-    elif "py" in b:
-        interp = "python3"
-
-    subprocess.run(args=[interp, b], shell=False, check=True)
-
-
-def load_benchmark(file):
-    f = open(file, "r")
-    return json.load(f)
-
-
-# run benchmarks
-benchmarks = ["tests/dhrystone.sh", "tests/coremark.py"]
-for b in benchmarks:
-    run_benchmark(b)
-
-# combine benchmarks output data
-benchmarks_output = ["dhrystone_output.json", "coremark_output.json"]
-benchmark_data = [load_benchmark(bo) for bo in benchmarks_output]
-
-benchmark_output = "benchmark_output.json"
-f = open(benchmark_output, "w")
-f.write(json.dumps(benchmark_data, indent=4))
-f.close()
diff --git a/tests/bench.py b/tests/bench.py
new file mode 100755
index 000000000..c089440e7
--- /dev/null
+++ b/tests/bench.py
@@ -0,0 +1,601 @@
+#!/usr/bin/env python3
+"""
+Unified benchmark runner for rv32emu.
+
+Benchmarks are registered via the @register_benchmark decorator.
+Supports parallel execution while preserving user-specified output order.
+"""
+
+import subprocess
+import re
+import statistics
+import os
+import sys
+import json
+import argparse
+import threading
+import time
+from abc import ABC, abstractmethod
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from subprocess import TimeoutExpired
+from typing import ClassVar, Dict, List, Optional, Tuple, Type
+
+# Configuration
+EMU_PATH = "build/rv32emu"
+DEFAULT_RUNS = 5  # Balance, providing reasonable statistics
+TIMEOUT_SECONDS = 600  # 10 min timeout per run (safety limit)
+SLOW_THRESHOLD_SECONDS = 300  # If single run > 5 min, use only 1 run
+MAX_BENCHMARK_SECONDS = 600  # 10 min max total time per benchmark
+
+# Benchmark registry
+_BENCHMARK_REGISTRY: Dict[str, Type["Benchmark"]] = {}
+
+
+class ProgressIndicator:
+    """Thread-safe progress indicator with spinner animation."""
+
+    SPINNER = ["◐", "◓", "◑", "◒"]  # Rotating circle animation
+
+    def __init__(self, benchmarks: List[str], n_runs: int, quiet: bool = False):
+        self.benchmarks = benchmarks
+        self.n_runs = n_runs
+        # Disable indicator if not a TTY to avoid log clutter
+        self.quiet = quiet or not sys.stdout.isatty()
+        self.lock = threading.Lock()
+        # Track status: {bench_name: status}
+        self.status: Dict[str, str] = {name: "pending" for name in benchmarks}
+        self.start_time = time.monotonic()
+        self.last_render = 0.0
+        self._stop_event = threading.Event()
+        self._spinner_thread: Optional[threading.Thread] = None
+
+    def start(self) -> None:
+        """Start the background spinner thread."""
+        if self.quiet:
+            return
+        # Reserve terminal space to avoid overwriting history
+        # (1 line for elapsed + 1 line per benchmark)
+        sys.stdout.write("\n" * (len(self.benchmarks) + 1))
+        sys.stdout.flush()
+        self._stop_event.clear()
+        self._spinner_thread = threading.Thread(
+            target=self._spinner_loop, daemon=True
+        )
+        self._spinner_thread.start()
+
+    def _spinner_loop(self) -> None:
+        """Background loop to update spinner every 1 second."""
+        while not self._stop_event.is_set():
+            with self.lock:
+                self._render()
+            self._stop_event.wait(1.0)
+
+    def update(
+        self, bench_name: str, run: int, status: str = "running"
+    ) -> None:
+        """Update status for a benchmark."""
+        with self.lock:
+            self.status[bench_name] = status
+
+    def _render(self) -> None:
+        """Render status for all benchmarks."""
+        if self.quiet:
+            return
+        elapsed = time.monotonic() - self.start_time
+        spinner_idx = int(elapsed) % len(self.SPINNER)
+        spinner = self.SPINNER[spinner_idx]
+
+        lines = [f"\033[2K  Elapsed: {elapsed:.1f}s\n"]
+
+        for name in self.benchmarks:
+            status = self.status[name]
+            if status == "pending":
+                indicator = "⏳"
+                state = ""
+            elif status == "done":
+                indicator = "✓"
+                state = ""
+            elif status == "failed":
+                indicator = "✗"
+                state = " (failed)"
+            else:  # running
+                indicator = spinner
+                state = " (running)"
+
+            lines.append(f"\033[2K  {indicator} {name}{state}\n")
+
+        # Move cursor up to overwrite
+        sys.stdout.write(f"\033[{len(lines)}A")
+        sys.stdout.write("".join(lines))
+        sys.stdout.flush()
+
+    def finish(self) -> None:
+        """Stop spinner and show final state (preserving failed status)."""
+        self._stop_event.set()
+        if self._spinner_thread:
+            self._spinner_thread.join(timeout=1.0)
+        if self.quiet:
+            return
+        with self.lock:
+            # Only mark pending/running as done, preserve failed status
+            for name in self.benchmarks:
+                if self.status[name] not in ("done", "failed"):
+                    self.status[name] = "done"
+            self._render()
+        # Move past display
+        print("\n" * (len(self.benchmarks) + 1))
+
+
+def register_benchmark(name: str):
+    """Decorator to register a benchmark class."""
+
+    def decorator(cls: Type["Benchmark"]) -> Type["Benchmark"]:
+        _BENCHMARK_REGISTRY[name.lower()] = cls
+        return cls
+
+    return decorator
+
+
+def get_registered_benchmarks() -> Dict[str, Type["Benchmark"]]:
+    """Return all registered benchmarks."""
+    return _BENCHMARK_REGISTRY.copy()
+
+
+class Benchmark(ABC):
+    """Abstract base class for all benchmarks."""
+
+    name: ClassVar[str]
+    unit: ClassVar[str]
+    BIN_PATH: ClassVar[str]
+
+    def __init__(
+        self, n_runs: int, progress: Optional[ProgressIndicator] = None
+    ):
+        self.n_runs = n_runs
+        self.progress = progress
+        self.logs: List[str] = []
+
+    def log(self, msg: str) -> None:
+        """Buffer log messages to avoid interleaving in parallel mode."""
+        self.logs.append(msg)
+
+    def get_logs(self) -> str:
+        """Return buffered logs as a single string."""
+        return "\n".join(self.logs)
+
+    @classmethod
+    def prepare(cls) -> None:
+        """Ensure dependencies are built. Run BEFORE parallel execution."""
+        if hasattr(cls, "BIN_PATH") and not os.path.exists(cls.BIN_PATH):
+            print(f"Building {cls.name}...")
+            result = subprocess.run(
+                ["make", "artifact"],
+                capture_output=True,
+                text=True,
+                check=False,
+            )
+            if result.returncode != 0:
+                raise RuntimeError(
+                    f"Failed to build {cls.name}\n"
+                    f"stdout: {result.stdout[:500]}\nstderr: {result.stderr[:500]}"
+                )
+            if not os.path.exists(cls.BIN_PATH):
+                raise RuntimeError(f"{cls.name} not found at {cls.BIN_PATH}")
+
+    @abstractmethod
+    def run_single(self) -> float:
+        """Run a single benchmark iteration and return the result."""
+        raise NotImplementedError
+
+    def validate(self) -> float:
+        """Run validation before benchmark. Returns the result for reuse."""
+        return self.run_single()
+
+    def run(self) -> Tuple[float, float, List[float], int]:
+        """Run the full benchmark suite. Returns (mean, stdev, filtered_values, actual_runs)."""
+        bench_key = self.name.lower()
+        bench_start = time.monotonic()
+
+        # Validation run (also serves as timing reference)
+        self.log(f"Validating {self.name}...")
+        if self.progress:
+            self.progress.update(bench_key, 0, "running")
+        run_start = time.monotonic()
+        first_value = self.validate()
+        run_elapsed = time.monotonic() - run_start
+        self.log(f"{self.name} validation passed ({run_elapsed:.1f}s)")
+
+        # Adaptive run count based on single run time
+        actual_runs = self.n_runs
+        if run_elapsed > SLOW_THRESHOLD_SECONDS:
+            self.log(
+                f"Warning: {self.name} took {run_elapsed:.1f}s (>{SLOW_THRESHOLD_SECONDS}s), "
+                "using single run only"
+            )
+            actual_runs = 1
+
+        values = [first_value]  # Include validation result
+        for i in range(1, actual_runs):
+            # Check time budget before starting next run.
+            # Note: uses validation run time as estimate; assumes runs are similar.
+            total_elapsed = time.monotonic() - bench_start
+            remaining = MAX_BENCHMARK_SECONDS - total_elapsed
+            if remaining < run_elapsed:
+                self.log(
+                    f"Time budget: {total_elapsed:.0f}s elapsed, "
+                    f"stopping after {len(values)} runs"
+                )
+                break
+            self.log(f"Running {self.name} benchmark - Run #{i + 1}")
+            if self.progress:
+                self.progress.update(bench_key, i + 1, "running")
+            values.append(self.run_single())
+
+        if self.progress:
+            self.progress.update(bench_key, len(values), "done")
+
+        avg, stdev, filtered = self.calculate_stats(values)
+        self.log("-" * 40)
+        self.log(
+            f"{self.name}: {avg:.3f} ± {stdev:.3f} {self.unit} "
+            f"({len(filtered)}/{len(values)} valid runs)"
+        )
+        self.log("-" * 40)
+
+        return avg, stdev, filtered, len(values)
+
+    def calculate_stats(
+        self, values: List[float]
+    ) -> Tuple[float, float, List[float]]:
+        """Filter outliers using median-based 2-sigma rule. Returns (mean, stdev, filtered)."""
+        if not values:
+            return 0.0, 0.0, []
+
+        n = len(values)
+        median = statistics.median(values)
+        stdev_val = statistics.stdev(values) if n > 1 else 0.0
+
+        # Filter values within 2 standard deviations of median
+        filtered = [x for x in values if abs(x - median) <= 2.0 * stdev_val]
+
+        if len(filtered) < 2:
+            self.log("Warning: Too many outliers filtered, using all results")
+            filtered = values
+
+        final_mean = statistics.mean(filtered)
+        final_stdev = statistics.stdev(filtered) if len(filtered) > 1 else 0.0
+
+        return final_mean, final_stdev, filtered
+
+
+@register_benchmark("dhrystone")
+class DhrystoneBenchmark(Benchmark):
+    """Dhrystone benchmark measuring DMIPS."""
+
+    name = "Dhrystone"
+    unit = "DMIPS"
+    BIN_PATH = "build/riscv32/dhrystone"
+
+    def run_single(self) -> float:
+        proc = subprocess.Popen(
+            [EMU_PATH, "-q", self.BIN_PATH],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+        )
+        try:
+            stdout, stderr = proc.communicate(timeout=TIMEOUT_SECONDS)
+        except TimeoutExpired:
+            proc.kill()
+            proc.communicate()  # Clean up buffers
+            raise RuntimeError(
+                f"Dhrystone timed out after {TIMEOUT_SECONDS} seconds"
+            )
+
+        if proc.returncode != 0:
+            raise RuntimeError(
+                f"Dhrystone failed (exit {proc.returncode})\n"
+                f"stdout: {stdout[:500]}\nstderr: {stderr[:500]}"
+            )
+
+        match = re.search(r"([0-9]+(?:\.[0-9]+)?) DMIPS", stdout)
+        if not match:
+            raise RuntimeError(f"Failed to parse DMIPS:\n{stdout[:500]}")
+
+        return float(match.group(1))
+
+    def validate(self) -> float:
+        dmips = self.run_single()
+        if dmips <= 0:
+            raise RuntimeError(f"Invalid DMIPS value: {dmips}")
+        return dmips
+
+
+@register_benchmark("coremark")
+class CoreMarkBenchmark(Benchmark):
+    """CoreMark benchmark measuring iterations/sec."""
+
+    name = "CoreMark"
+    unit = "iterations/sec"
+    BIN_PATH = "build/riscv32/coremark"
+
+    ITERATIONS = 30000
+
+    def run_single(self) -> float:
+        cmd = [
+            EMU_PATH,
+            "-q",
+            self.BIN_PATH,
+            "0x0",
+            "0x0",
+            "0x66",
+            str(self.ITERATIONS),
+            "7",
+            "1",
+            "2000",
+        ]
+        proc = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+        )
+        try:
+            stdout, stderr = proc.communicate(timeout=TIMEOUT_SECONDS)
+        except TimeoutExpired:
+            proc.kill()
+            proc.communicate()  # Clean up buffers
+            raise RuntimeError(
+                f"CoreMark timed out after {TIMEOUT_SECONDS} seconds"
+            )
+
+        if proc.returncode != 0:
+            raise RuntimeError(
+                f"CoreMark failed (exit {proc.returncode})\n"
+                f"stdout: {stdout[:500]}\nstderr: {stderr[:500]}"
+            )
+
+        match = re.search(r"Iterations/Sec\s*:\s*([0-9]+(?:\.[0-9]+)?)", stdout)
+        if not match:
+            raise RuntimeError(
+                f"Failed to parse Iterations/Sec:\n{stdout[:500]}"
+            )
+
+        return float(match.group(1))
+
+
+def run_benchmark_task(
+    bench_name: str, n_runs: int, progress: Optional[ProgressIndicator] = None
+) -> Tuple[str, dict, List[str], Optional[Exception]]:
+    """Run a single benchmark. Returns (name, result, logs, error)."""
+    bench = None
+    try:
+        bench_cls = _BENCHMARK_REGISTRY[bench_name]
+        bench = bench_cls(n_runs, progress)
+        avg, stdev, _, actual_runs = bench.run()
+        result = {
+            "name": bench.name,
+            "unit": bench.unit,  # Store raw unit for proper formatting
+            "value": round(avg, 3),
+            "stdev": round(stdev, 3),
+            "runs": actual_runs,  # Actual number of runs completed
+        }
+        return bench_name, result, bench.logs, None
+    except Exception as e:
+        if progress:
+            progress.update(bench_name, 0, "failed")
+        # Preserve logs even on failure for debugging
+        logs = bench.logs if bench else []
+        return bench_name, {}, logs, e
+
+
+def run_benchmarks(
+    selected: List[str],
+    output_json: bool,
+    n_runs: int,
+    parallel: int = 0,
+    quiet: bool = False,
+) -> None:
+    """Run selected benchmarks, optionally in parallel."""
+    if not os.path.exists(EMU_PATH):
+        print(
+            f"Error: {EMU_PATH} not found. Please compile first",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    # Validate selections
+    registry = get_registered_benchmarks()
+    for name in selected:
+        if name not in registry:
+            print(f"Error: Unknown benchmark '{name}'", file=sys.stderr)
+            print(
+                f"Available: {', '.join(sorted(registry.keys()))}",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+    # Prepare phase: build all binaries sequentially before running benchmarks
+    if not quiet:
+        print("Preparing benchmarks...")
+    try:
+        for name in selected:
+            registry[name].prepare()
+    except RuntimeError as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+    if not quiet:
+        print("Preparation complete.\n")
+
+    # Create and start progress indicator
+    progress = ProgressIndicator(selected, n_runs, quiet=quiet)
+    progress.start()
+
+    results: Dict[str, dict] = {}
+    all_logs: Dict[str, List[str]] = {}
+    errors: Dict[str, Exception] = {}
+
+    start_time = time.monotonic()
+
+    if parallel and parallel > 0 and len(selected) > 1:
+        workers = min(parallel, len(selected))
+        if not quiet:
+            print(
+                f">>> Running {len(selected)} benchmarks in parallel ({workers} workers) <<<"
+            )
+        with ThreadPoolExecutor(max_workers=workers) as executor:
+            futures = {
+                executor.submit(
+                    run_benchmark_task, name, n_runs, progress
+                ): name
+                for name in selected
+            }
+            for future in as_completed(futures):
+                name, result, logs, error = future.result()
+                all_logs[name] = logs
+                if error:
+                    errors[name] = error
+                else:
+                    results[name] = result
+
+        progress.finish()
+
+        # Print logs in user-specified order after all complete (only if not quiet)
+        if not quiet:
+            for name in selected:
+                if name in all_logs and all_logs[name]:
+                    print(f"\n[{name}]")
+                    for line in all_logs[name]:
+                        print(f"  {line}")
+    else:
+        for name in selected:
+            name, result, logs, error = run_benchmark_task(
+                name, n_runs, progress
+            )
+            all_logs[name] = logs
+            if error:
+                errors[name] = error
+            else:
+                results[name] = result
+
+        progress.finish()
+
+        # Print logs after spinner finishes to avoid garbled output
+        if not quiet:
+            for name in selected:
+                if name in all_logs and all_logs[name]:
+                    print(f"\n[{name}]")
+                    for line in all_logs[name]:
+                        print(f"  {line}")
+
+    elapsed = time.monotonic() - start_time
+
+    # Report errors
+    for name, error in errors.items():
+        print(f"\nError in {name}: {error}", file=sys.stderr)
+
+    if errors:
+        sys.exit(1)
+
+    # Output results in user-specified order
+    print("\n" + "=" * 50)
+    print("Benchmark results")
+    print("=" * 50)
+    ordered_results = []
+    for name in selected:
+        if name in results:
+            r = results[name]
+            ordered_results.append(
+                {
+                    "name": r["name"],
+                    "unit": r["unit"],
+                    "value": r["value"],
+                    "runs": r["runs"],
+                }
+            )
+            print(
+                f"  {r['name']}: {r['value']} ± {r['stdev']} {r['unit']} ({r['runs']} runs)"
+            )
+    print("=" * 50)
+    print(f"  Total time: {elapsed:.1f}s")
+
+    if output_json:
+        combined_file = "benchmark_output.json"
+        with open(combined_file, "w") as f:
+            json.dump(ordered_results, f, indent=4)
+        if not quiet:
+            print(f"Saved: {combined_file}")
+
+
+def parse_benchmarks(args: List[str]) -> List[str]:
+    """Parse benchmark arguments, preserving order."""
+    if not args:
+        # Default: all registered benchmarks in registration order
+        return list(_BENCHMARK_REGISTRY.keys())
+
+    # Handle comma-separated and space-separated inputs
+    result = []
+    for arg in args:
+        for part in arg.split(","):
+            name = part.strip().lower()
+            if name and name not in result:  # Preserve order, no duplicates
+                result.append(name)
+    return result
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run benchmarks for rv32emu",
+        epilog=f"Available benchmarks: {', '.join(sorted(_BENCHMARK_REGISTRY.keys()))}",
+    )
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Output results to JSON files",
+    )
+    parser.add_argument(
+        "--parallel",
+        type=int,
+        metavar="N",
+        help="Run benchmarks in parallel with N workers (default: sequential)",
+    )
+    parser.add_argument(
+        "--quiet",
+        action="store_true",
+        help="Quiet mode for CI (no progress indicator)",
+    )
+    parser.add_argument(
+        "--runs",
+        type=int,
+        default=DEFAULT_RUNS,
+        help=f"Number of runs per benchmark (default: {DEFAULT_RUNS})",
+    )
+    parser.add_argument(
+        "benchmarks",
+        nargs="*",
+        metavar="BENCH",
+        help="Benchmarks to run (comma or space-separated)",
+    )
+
+    args = parser.parse_args()
+
+    # Validate --runs
+    if args.runs < 1:
+        parser.error("--runs must be at least 1")
+
+    selected = parse_benchmarks(args.benchmarks)
+    if not selected:
+        print("Error: No benchmarks specified", file=sys.stderr)
+        sys.exit(1)
+
+    run_benchmarks(
+        selected,
+        args.json,
+        args.runs,
+        parallel=args.parallel or 0,
+        quiet=args.quiet,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/coremark.py b/tests/coremark.py
deleted file mode 100755
index 6c75a8d8d..000000000
--- a/tests/coremark.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python3
-import subprocess
-import re
-import numpy
-import os
-import json
-
-iter = 1
-coremark_param = "0x0 0x0 0x66 30000 7 1 2000"
-res = []
-file_exist = os.path.exists("build/rv32emu")
-if not file_exist:
-    print("Please compile before running test")
-    exit(1)
-print("Start Test CoreMark benchmark")
-comp_proc = subprocess.check_output(
-    "build/rv32emu build/riscv32/coremark {}".format(coremark_param), shell=True
-).decode("utf-8")
-if not comp_proc or comp_proc.find("Error") != -1:
-    print("Test Error")
-    exit(1)
-else:
-    print("Test Pass")
-
-for i in range(iter):
-    print("Running CoreMark benchmark - Run #{}".format(i + 1))
-    comp_proc = subprocess.check_output(
-        "build/rv32emu build/riscv32/coremark {}".format(coremark_param),
-        shell=True,
-    ).decode("utf-8")
-    if not comp_proc:
-        print("Fail\n")
-        exit(1)
-    else:
-        res.append(
-            float(
-                re.findall(r"Iterations/Sec   : [0-9]+.[0-9]+", comp_proc)[0][
-                    19:
-                ]
-            )
-        )
-
-mean = numpy.mean(res, dtype=numpy.float64)
-deviation = numpy.std(res, dtype=numpy.float64)
-for n in res:
-    if abs(n - mean) > (deviation * 2):
-        res.remove(n)
-
-print("{:.3f}".format(numpy.mean(res, dtype=numpy.float64)))
-
-# save Average Iterations/Sec in JSON format for benchmark action workflow
-benchmark_output = "coremark_output.json"
-benchmark_data = {
-    "name": "Coremark",
-    "unit": "Average iterations/sec over 10 runs",
-    "value": float("{:.3f}".format(numpy.mean(res, dtype=numpy.float64))),
-}
-f = open(benchmark_output, "w")
-f.write(json.dumps(benchmark_data))
-f.close()
diff --git a/tests/dhrystone.sh b/tests/dhrystone.sh
deleted file mode 100755
index 4fdf80d57..000000000
--- a/tests/dhrystone.sh
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/usr/bin/env bash
-
-source tests/common.sh
-
-# Set the number of runs for the Dhrystone benchmark
-N_RUNS=1
-
-function sanity_check()
-{
-    if test ! -f $O/riscv32/dhrystone; then
-        make artifact || exit 1
-    fi
-}
-
-function run_dhrystone()
-{
-    # Run Dhrystone and extract the DMIPS value
-    output=$($RUN $O/riscv32/dhrystone 2>&1)
-    local exit_code=$?
-    [ $exit_code -ne 0 ] && fail
-    dmips=$(echo "$output" | grep -Po '[0-9]+(?= DMIPS)' | awk '{print}')
-    echo "$dmips"
-}
-
-sanity_check
-# Run Dhrystone benchmark and collect DMIPS values
-dmips_values=()
-for ((i = 1; i <= $N_RUNS; i++)); do
-    echo "Running Dhrystone benchmark - Run #$i"
-    dmips=$(run_dhrystone)
-    exit_code=$?
-    [ $exit_code -ne 0 ] && fail
-    dmips_values+=("$dmips")
-done
-
-# Sort DMIPS values
-sorted_dmips=($(printf "%s\n" "${dmips_values[@]}" | sort -n))
-
-# Calculate Median Absolute Deviation (MAD)
-num_dmips=${#sorted_dmips[@]}
-median_index=$((num_dmips / 2))
-if ((num_dmips % 2 == 0)); then
-    median=$(echo "scale=2; (${sorted_dmips[median_index - 1]} + ${sorted_dmips[median_index]}) / 2" | bc -l)
-else
-    median=${sorted_dmips[median_index]}
-fi
-
-deviation=0
-for dmips in "${sorted_dmips[@]}"; do
-    if (($(echo "$dmips > $median" | bc -l))); then
-        diff=$(echo "$dmips - $median" | bc -l)
-    else
-        diff=$(echo "$median - $dmips" | bc -l)
-    fi
-    deviation=$(echo "scale=2; $deviation + $diff" | bc -l)
-done
-
-mad=$(echo "scale=2; $deviation / $num_dmips" | bc -l)
-
-# Filter outliers based on MAD
-filtered_dmips=()
-for dmips in "${sorted_dmips[@]}"; do
-    if (($(echo "$dmips > 0" | bc -l))); then
-        if (($(echo "$dmips > $median" | bc -l))); then
-            diff=$(echo "$dmips - $median" | bc -l)
-        else
-            diff=$(echo "$median - $dmips" | bc -l)
-        fi
-        if (($(echo "$diff <= $mad * 2" | bc -l))); then
-            filtered_dmips+=("$dmips")
-        fi
-    fi
-done
-
-#dhrystone benchmark output file
-benchmark_output=dhrystone_output.json
-# empty the file
-echo -n "" > $benchmark_output
-
-# Calculate average DMIPS excluding outliers
-num_filtered=${#filtered_dmips[@]}
-if ((num_filtered > 0)); then
-    total_dmips=0
-    for dmips in "${filtered_dmips[@]}"; do
-        total_dmips=$(echo "scale=2; $total_dmips + $dmips" | bc -l)
-    done
-
-    average_dmips=$(echo "scale=2; $total_dmips / $num_filtered" | bc -l)
-    echo "--------------------------"
-    echo "Average DMIPS : $average_dmips"
-    echo "--------------------------"
-
-    #save Average DMIPS in JSON format for benchmark action workflow
-    echo -n '{' >> $benchmark_output
-    echo -n '"name": "Dhrystone",' >> $benchmark_output
-    echo -n '"unit": "Average DMIPS over 10 runs",' >> $benchmark_output
-    echo -n '"value": ' >> $benchmark_output
-    echo -n $average_dmips >> $benchmark_output
-    echo -n '}' >> $benchmark_output
-else
-    fail
-fi