From c82b46a8e339ea9ce738d7913ba8c36e1bbd2301 Mon Sep 17 00:00:00 2001
From: Drew Ripberger <drew.ripberger@gmail.com>
Date: Tue, 23 Jan 2024 17:57:36 -0500
Subject: [PATCH 1/9] start to regression framework

---
 matrix_benchmarking/regression.py | 151 ++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100644 matrix_benchmarking/regression.py

diff --git a/matrix_benchmarking/regression.py b/matrix_benchmarking/regression.py
new file mode 100644
index 00000000..d781b421
--- /dev/null
+++ b/matrix_benchmarking/regression.py
@@ -0,0 +1,151 @@
+import os
+import json
+
+import numpy as np
+from functools import reduce
+from typing import Optional, Callable
+
+import matrix_benchmarking.common as common
+
+def get_from_path(d, path):
+    return reduce(dict.get, path.split("."), d)
+
+# check if ALL (k, v) pairs in part are present in full_dict
+def dict_part_eq(part, full_dict):
+    return reduce(lambda x, y: x and part[y] == full_dict[y], part.keys(), True)
+
+class RegressionStatus:
+    def __init__(
+            self,
+            status: int,
+            direction: Optional[int] = None,
+            explanation: Optional[str] = None,
+            details: Optional[dict] = None
+        ):
+        self.status = status
+        self.direction = direction
+        self.explanation = explanation
+        self.details = details
+
+
+class RegressionIndicator:
+    """
+    Assume the matrix that is passed in contains a prefiltered combination of settings,
+    or pass in the desired filter with the setings_filter option
+    """
+    def __init__(
+            self,
+            new_payloads: list[common.MatrixEntry],
+            lts_payloads: list[common.MatrixEntry],
+            x_var_key = lambda x: x.results.metadata.end,
+            kpis: Optional[list[str]] = None,
+            settings_filter: Optional[dict] = None,
+        ):
+        self.x_var_key = x_var_key
+        self.kpis = kpis
+        self.settings_filter = settings_filter
+
+        if self.settings_filter:
+            # Only store payloads that have equivalent (k, v) pairs
+            # as the settings_filter
+            self.new_payloads = list(
+                filter(
+                    lambda x: dict_part_eq(self.settings_filter, x),
+                    map(lambda x: x.settings, new_payloads)
+                )
+            )
+            self.lts_payloads = list(
+                filter(
+                    lambda x: dict_part_eq(self.settings_filter, x),
+                    map(lambda x: x.settings, lts_payloads)
+                )
+            )
+        else:
+            self.new_payloads = new_payloads
+            self.lts_payloads = lts_payloads
+
+        # Why isn't this working? I suspect gnarly python stuff
+        # self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry))
+
+
+    def analyze(self) -> list[dict]:
+
+        if not self.new_payloads:
+            return [(None, "", RegressionStatus(0, explanation="Not enough new data"))]
+        elif not self.lts_payloads:
+            return [(None, "", RegressionStatus(0, explanation="Not enough LTS data"))]
+
+        regression_results = []
+        for curr_result in self.new_payloads:
+            print(curr_result)
+            kpis_to_test = vars(curr_result.results.lts.kpis).keys() if not self.kpis else self.kpis
+            for kpi in kpis_to_test:
+                regression_results.append(
+                    {
+                        "result": curr_result,
+                        "kpi": kpi,
+                        "regression": self.regression_test(
+                            vars(curr_result.results.lts.kpis)[kpi].value,
+                            list(map(lambda x: vars(x.results.kpis).value, self.lts_payloads))
+                        )
+                    }
+                )
+                print(regression_results)
+        return regression_results
+
+    def regression_test(self, new_result: float, lts_result: np.array) -> RegressionStatus:
+        return RegressionStatus(0, explanation="Default return status")
+
+
+class ZScoreIndicator(RegressionIndicator):
+    """
+    Example regression indicator that uses the Z score as a metric
+    to determine if the recent test was an outlier
+    """
+    def __init__(self, *args, threshold=3, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.threshold = threshold
+
+    def regression_test(self, new_result, lts_results) -> RegressionStatus:
+        """
+        Determine if the curr_result is more/less than threshold
+        standard deviations away from the previous_results
+        """
+        mean = np.mean(prev_results)
+        std = np.std(prev_results)
+        z_score = (curr_result - mean) / std
+        if abs(z_score) > self.threshold:
+            return RegressionStatus(
+                1,
+                direction=1 if z_score > 0 else -1,
+                explanation="z-score greater than threshold",
+                details={"threshold": self.threshold, "zscore": z_score}
+            )
+        else:
+            return RegressionStatus(
+                0,
+                explanation="z-score not greater than threshold",
+                details={"threshold": self.threshold, "zscore": z_score}
+            )
+
+class PolynomialRegressionIndicator(RegressionIndicator):
+    """
+    Placeholder for polynomial regression that we could implement
+    somewhere in the pipeline
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def regression_test(self, curr_result, prev_results) -> RegressionStatus:
+        return RegressionStatus(0, explanation="Not implemented")
+
+class HunterWrapperIndicator(RegressionIndicator):
+    """
+    Some straightfoward indicators are implemented above but this also provides what should
+    be a simple way to wrap datastax/Hunter in a regression_test
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def regression_test(self, curr_result, prev_results) -> RegressionStatus:
+        return RegressionStatus(0, explanation="Not implemented")

From 6e024f713e887d8aab6ccf4407c60616a26c6bfd Mon Sep 17 00:00:00 2001
From: Drew Ripberger <drew.ripberger@gmail.com>
Date: Wed, 24 Jan 2024 16:25:44 -0500
Subject: [PATCH 2/9] fixed framework to be called in TOPSAIL

---
 matrix_benchmarking/regression.py | 99 +++++++++++++++++++------------
 1 file changed, 62 insertions(+), 37 deletions(-)

diff --git a/matrix_benchmarking/regression.py b/matrix_benchmarking/regression.py
index d781b421..46b250e2 100644
--- a/matrix_benchmarking/regression.py
+++ b/matrix_benchmarking/regression.py
@@ -1,6 +1,8 @@
 import os
 import json
-
+import logging
+import types
+import datetime
 import numpy as np
 from functools import reduce
 from typing import Optional, Callable
@@ -14,7 +16,7 @@ def get_from_path(d, path):
 def dict_part_eq(part, full_dict):
     return reduce(lambda x, y: x and part[y] == full_dict[y], part.keys(), True)
 
-class RegressionStatus:
+class RegressionStatus(types.SimpleNamespace):
     def __init__(
             self,
             status: int,
@@ -35,62 +37,85 @@ class RegressionIndicator:
     """
     def __init__(
             self,
-            new_payloads: list[common.MatrixEntry],
+            new_payload: common.MatrixEntry,
             lts_payloads: list[common.MatrixEntry],
-            x_var_key = lambda x: x.results.metadata.end,
+            x_var = None,
+            x_var_key = lambda x: x.results.metadata.end.astimezone(),
             kpis: Optional[list[str]] = None,
             settings_filter: Optional[dict] = None,
+            combine_funcs: dict = {},
         ):
+        self.new_payload = new_payload
+        self.x_var = x_var
         self.x_var_key = x_var_key
         self.kpis = kpis
+        self.combine_funcs = combine_funcs
         self.settings_filter = settings_filter
 
+        if self.settings_filter and self.x_var:
+            logging.warning("settings_filter and x_var set, only using settings_filter")
+        elif self.x_var:
+            settings = self.new_payload.get_settings()
+            settings.pop(self.x_var)
+            self.settings_filter = settings
+
         if self.settings_filter:
             # Only store payloads that have equivalent (k, v) pairs
             # as the settings_filter
-            self.new_payloads = list(
-                filter(
-                    lambda x: dict_part_eq(self.settings_filter, x),
-                    map(lambda x: x.settings, new_payloads)
-                )
-            )
             self.lts_payloads = list(
                 filter(
-                    lambda x: dict_part_eq(self.settings_filter, x),
-                    map(lambda x: x.settings, lts_payloads)
+                    lambda x: dict_part_eq(self.settings_filter, x.get_settings()),
+                    lts_payloads
                 )
             )
+
+            if not dict_part_eq(self.settings_filter, self.new_payload.get_settings()):
+                self.new_payload = None
+                logging.warning("settings_filter isn't satisfied for the new payload")
         else:
-            self.new_payloads = new_payloads
             self.lts_payloads = lts_payloads
 
-        # Why isn't this working? I suspect gnarly python stuff
-        # self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry))
+        # This isn't strictly necessary for all analysis techniques, but
+        # is useful to have
+        self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry))
 
 
     def analyze(self) -> list[dict]:
 
-        if not self.new_payloads:
-            return [(None, "", RegressionStatus(0, explanation="Not enough new data"))]
-        elif not self.lts_payloads:
-            return [(None, "", RegressionStatus(0, explanation="Not enough LTS data"))]
+        if not self.new_payload:
+            return [{"result": None, "kpi": None, "regression": vars(RegressionStatus(0, explanation="Not enough new data"))}]
+
+        if not self.lts_payloads:
+            return [{"result": None, "kpi": None, "regression": vars(RegressionStatus(0, explanation="Not enough LTS data"))}]
 
         regression_results = []
-        for curr_result in self.new_payloads:
-            print(curr_result)
-            kpis_to_test = vars(curr_result.results.lts.kpis).keys() if not self.kpis else self.kpis
-            for kpi in kpis_to_test:
-                regression_results.append(
-                    {
-                        "result": curr_result,
-                        "kpi": kpi,
-                        "regression": self.regression_test(
-                            vars(curr_result.results.lts.kpis)[kpi].value,
-                            list(map(lambda x: vars(x.results.kpis).value, self.lts_payloads))
+
+        kpis_to_test = vars(self.new_payload.results.lts.kpis).keys() if not self.kpis else self.kpis
+        for kpi in kpis_to_test:
+
+            curr_values = vars(self.new_payload.results.lts.kpis)[kpi].value
+            lts_values = list(map(lambda x: vars(x.results.kpis)[kpi].value, self.lts_payloads))
+
+            if type(vars(self.new_payload.results.lts.kpis)[kpi].value) is list:
+                if kpi in self.combine_funcs:
+                    curr_values = self.combine_funcs[kpi](curr_values)
+                    lts_values = [self.combine_funcs[kpi](v) for v in lts_values]
+                else:
+                    logging.warning(f"Skipping KPI with list of values, consider filtering KPIs or providing a combine_func for {kpi}")
+                    continue
+
+            regression_results.append(
+                {
+                    "result": self.new_payload.get_settings(),
+                    "kpi": kpi,
+                    "regression": vars(
+                        self.regression_test(
+                            curr_values,
+                            lts_values
                         )
-                    }
-                )
-                print(regression_results)
+                    )
+                }
+            )
         return regression_results
 
     def regression_test(self, new_result: float, lts_result: np.array) -> RegressionStatus:
@@ -106,14 +131,14 @@ def __init__(self, *args, threshold=3, **kwargs):
         super().__init__(*args, **kwargs)
         self.threshold = threshold
 
-    def regression_test(self, new_result, lts_results) -> RegressionStatus:
+    def regression_test(self, new_result: float, lts_results: np.array) -> RegressionStatus:
         """
         Determine if the curr_result is more/less than threshold
         standard deviations away from the previous_results
         """
-        mean = np.mean(prev_results)
-        std = np.std(prev_results)
-        z_score = (curr_result - mean) / std
+        mean = np.mean(lts_results)
+        std = np.std(lts_results)
+        z_score = (new_result - mean) / std
         if abs(z_score) > self.threshold:
             return RegressionStatus(
                 1,

From 720d1d01a35f10da5cade10d8804c9d3238b6983 Mon Sep 17 00:00:00 2001
From: Drew Ripberger <drew.ripberger@gmail.com>
Date: Fri, 26 Jan 2024 12:19:05 -0500
Subject: [PATCH 3/9] key existence check in filter

---
 matrix_benchmarking/download_lts.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/matrix_benchmarking/download_lts.py b/matrix_benchmarking/download_lts.py
index f72aec51..4f1483e7 100644
--- a/matrix_benchmarking/download_lts.py
+++ b/matrix_benchmarking/download_lts.py
@@ -38,7 +38,8 @@ def main(opensearch_host: str = "",
     opensearch_index: the OpenSearch index where the LTS payloads are stored (Mandatory)
 
     lts_results_dirname: The directory to place the downloaded LTS results files.
-    filters: If provided, only download the experiments matching the filters. Eg: {"image_name": "1.2"}. (Optional.)
+    filters: If provided, only download the experiments matching the filters. Eg: {"image_name": "1.2"}. 
+        If the provided value is "*", then we just check to ensure the keys existence (Optional.)
     max_records: Maximum number of records to retrieve from the OpenSearch instance. 10,000 is the largest number possible without paging (Optional.)
     force: Ignore the presence of the anchor file before downloading the results (Optional.)
     clean: Delete all the existing '.json' files in the lts-results-dirname before downloading the results (Optional.)
@@ -121,7 +122,7 @@ def download(client, opensearch_index, filters, lts_results_dirname, max_records
         query["query"] = {
             "bool": {
                 "must": [
-                    {"term": {f"{k}.keyword": v}} for k, v in filters.items()
+                    {"term": {f"{k}.keyword": v}} if v != "*" else {"exists": {"field": k}} for k, v in filters.items()
                 ]
             }
         }

From 0929b7ab01c7d9e83ca8cd4383cb92430063860a Mon Sep 17 00:00:00 2001
From: Drew Ripberger <drew.ripberger@gmail.com>
Date: Mon, 29 Jan 2024 17:27:46 -0500
Subject: [PATCH 4/9] add regression model, split indicator modules

---
 matrix_benchmarking/models.py                 |  8 ++
 .../{regression.py => regression/__init__.py} | 76 +++----------------
 matrix_benchmarking/regression/zscore.py      | 30 ++++++++
 3 files changed, 48 insertions(+), 66 deletions(-)
 rename matrix_benchmarking/{regression.py => regression/__init__.py} (62%)
 create mode 100644 matrix_benchmarking/regression/zscore.py

diff --git a/matrix_benchmarking/models.py b/matrix_benchmarking/models.py
index 88a81c0c..3afa6fad 100644
--- a/matrix_benchmarking/models.py
+++ b/matrix_benchmarking/models.py
@@ -130,3 +130,11 @@ def tostr(self):
     model.tostr = tostr
 
     return model
+
+class RegressionResult():
+    kpi: str
+    indicator: str
+    status: int
+    direction: Optional[int]
+    explanation: Optional[str]
+    details: Optional[dict[str, str]]
diff --git a/matrix_benchmarking/regression.py b/matrix_benchmarking/regression/__init__.py
similarity index 62%
rename from matrix_benchmarking/regression.py
rename to matrix_benchmarking/regression/__init__.py
index 46b250e2..b92c8bed 100644
--- a/matrix_benchmarking/regression.py
+++ b/matrix_benchmarking/regression/__init__.py
@@ -79,6 +79,8 @@ def __init__(
         # is useful to have
         self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry))
 
+    def get_name(self):
+        return "UndefinedRegressionIndicator"
 
     def analyze(self) -> list[dict]:
 
@@ -104,73 +106,15 @@ def analyze(self) -> list[dict]:
                     logging.warning(f"Skipping KPI with list of values, consider filtering KPIs or providing a combine_func for {kpi}")
                     continue
 
-            regression_results.append(
-                {
-                    "result": self.new_payload.get_settings(),
-                    "kpi": kpi,
-                    "regression": vars(
-                        self.regression_test(
-                            curr_values,
-                            lts_values
-                        )
-                    )
-                }
-            )
+
+            raw_results = self.regression_test(curr_values, lts_values)
+            stats = {
+                "kpi": kpi,
+                "indicator": self.get_name()
+            }
+            regression_results.append({**stats, **raw_results})
+
         return regression_results
 
     def regression_test(self, new_result: float, lts_result: np.array) -> RegressionStatus:
         return RegressionStatus(0, explanation="Default return status")
-
-
-class ZScoreIndicator(RegressionIndicator):
-    """
-    Example regression indicator that uses the Z score as a metric
-    to determine if the recent test was an outlier
-    """
-    def __init__(self, *args, threshold=3, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.threshold = threshold
-
-    def regression_test(self, new_result: float, lts_results: np.array) -> RegressionStatus:
-        """
-        Determine if the curr_result is more/less than threshold
-        standard deviations away from the previous_results
-        """
-        mean = np.mean(lts_results)
-        std = np.std(lts_results)
-        z_score = (new_result - mean) / std
-        if abs(z_score) > self.threshold:
-            return RegressionStatus(
-                1,
-                direction=1 if z_score > 0 else -1,
-                explanation="z-score greater than threshold",
-                details={"threshold": self.threshold, "zscore": z_score}
-            )
-        else:
-            return RegressionStatus(
-                0,
-                explanation="z-score not greater than threshold",
-                details={"threshold": self.threshold, "zscore": z_score}
-            )
-
-class PolynomialRegressionIndicator(RegressionIndicator):
-    """
-    Placeholder for polynomial regression that we could implement
-    somewhere in the pipeline
-    """
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def regression_test(self, curr_result, prev_results) -> RegressionStatus:
-        return RegressionStatus(0, explanation="Not implemented")
-
-class HunterWrapperIndicator(RegressionIndicator):
-    """
-    Some straightfoward indicators are implemented above but this also provides what should
-    be a simple way to wrap datastax/Hunter in a regression_test
-    """
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def regression_test(self, curr_result, prev_results) -> RegressionStatus:
-        return RegressionStatus(0, explanation="Not implemented")
diff --git a/matrix_benchmarking/regression/zscore.py b/matrix_benchmarking/regression/zscore.py
new file mode 100644
index 00000000..faf92b1a
--- /dev/null
+++ b/matrix_benchmarking/regression/zscore.py
@@ -0,0 +1,30 @@
+class ZScoreIndicator(RegressionIndicator):
+    """
+    Example regression indicator that uses the Z score as a metric
+    to determine if the recent test was an outlier
+    """
+    def __init__(self, *args, threshold=3, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.threshold = threshold
+
+    def get_name(self):
+        return f"ZScoreIndicator(threshold={self.threshold})"
+
+    def regression_test(self, new_result: float, lts_results: np.array) -> RegressionStatus:
+        """
+        Determine if the curr_result is more/less than threshold
+        standard deviations away from the previous_results
+        """
+        mean = np.mean(lts_results)
+        std = np.std(lts_results)
+        z_score = (new_result - mean) / std
+
+        status = 0
+        explanation = "z-score not greater than threshold"
+        details = {"threshold": self.threshold, "zscore": z_score}
+        if abs(z_score) > self.threshold:
+            status = 1
+            direction = 1 if z_score > 0 else -1,
+            explanation="z-score greater than threshold",
+
+        return RegressionStatus(0, direction=direction, explanation=explanation, details=details)

From e781630022e610ab21887b35ae85ee0ea1eea8e3 Mon Sep 17 00:00:00 2001
From: Drew Ripberger <drew.ripberger@gmail.com>
Date: Tue, 30 Jan 2024 11:28:38 -0500
Subject: [PATCH 5/9] fix module split for zscore

---
 matrix_benchmarking/models.py            |  2 +-
 matrix_benchmarking/regression/zscore.py | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/matrix_benchmarking/models.py b/matrix_benchmarking/models.py
index 3afa6fad..258b059d 100644
--- a/matrix_benchmarking/models.py
+++ b/matrix_benchmarking/models.py
@@ -131,7 +131,7 @@ def tostr(self):
 
     return model
 
-class RegressionResult():
+class RegressionResult(ExclusiveModel):
     kpi: str
     indicator: str
     status: int
diff --git a/matrix_benchmarking/regression/zscore.py b/matrix_benchmarking/regression/zscore.py
index faf92b1a..fc879178 100644
--- a/matrix_benchmarking/regression/zscore.py
+++ b/matrix_benchmarking/regression/zscore.py
@@ -1,4 +1,8 @@
-class ZScoreIndicator(RegressionIndicator):
+from matrix_benchmarking import regression
+
+import numpy as np
+
+class ZScoreIndicator(regression.RegressionIndicator):
     """
     Example regression indicator that uses the Z score as a metric
     to determine if the recent test was an outlier
@@ -10,7 +14,7 @@ def __init__(self, *args, threshold=3, **kwargs):
     def get_name(self):
         return f"ZScoreIndicator(threshold={self.threshold})"
 
-    def regression_test(self, new_result: float, lts_results: np.array) -> RegressionStatus:
+    def regression_test(self, new_result: float, lts_results: np.array) -> regression.RegressionStatus:
         """
         Determine if the curr_result is more/less than threshold
         standard deviations away from the previous_results
@@ -27,4 +31,4 @@ def regression_test(self, new_result: float, lts_results: np.array) -> Regressio
             direction = 1 if z_score > 0 else -1,
             explanation="z-score greater than threshold",
 
-        return RegressionStatus(0, direction=direction, explanation=explanation, details=details)
+        return regression.RegressionStatus(0, direction=direction, explanation=explanation, details=details)

From 7966e2ccb533f4876807c053d64a892c1b2b7782 Mon Sep 17 00:00:00 2001
From: Drew Ripberger <drew.ripberger@gmail.com>
Date: Tue, 30 Jan 2024 13:59:53 -0500
Subject: [PATCH 6/9] use RegressionResult model in indicator

---
 matrix_benchmarking/models.py              |  7 +++--
 matrix_benchmarking/regression/__init__.py | 34 +++++++++++++---------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/matrix_benchmarking/models.py b/matrix_benchmarking/models.py
index 258b059d..0357bfc9 100644
--- a/matrix_benchmarking/models.py
+++ b/matrix_benchmarking/models.py
@@ -133,8 +133,9 @@ def tostr(self):
 
 class RegressionResult(ExclusiveModel):
     kpi: str
+    setting: str
     indicator: str
     status: int
-    direction: Optional[int]
-    explanation: Optional[str]
-    details: Optional[dict[str, str]]
+    direction: Optional[int] = Field(default=None)
+    explanation: Optional[str] = Field(default=None)
+    details: Optional[dict[str, str]] = Field(default=None)
diff --git a/matrix_benchmarking/regression/__init__.py b/matrix_benchmarking/regression/__init__.py
index b92c8bed..951836ce 100644
--- a/matrix_benchmarking/regression/__init__.py
+++ b/matrix_benchmarking/regression/__init__.py
@@ -8,6 +8,7 @@
 from typing import Optional, Callable
 
 import matrix_benchmarking.common as common
+import matrix_benchmarking.models as models
 
 def get_from_path(d, path):
     return reduce(dict.get, path.split("."), d)
@@ -29,7 +30,6 @@ def __init__(
         self.explanation = explanation
         self.details = details
 
-
 class RegressionIndicator:
     """
     Assume the matrix that is passed in contains a prefiltered combination of settings,
@@ -82,13 +82,17 @@ def __init__(
     def get_name(self):
         return "UndefinedRegressionIndicator"
 
-    def analyze(self) -> list[dict]:
-
-        if not self.new_payload:
-            return [{"result": None, "kpi": None, "regression": vars(RegressionStatus(0, explanation="Not enough new data"))}]
+    def analyze(self) -> list[models.RegressionResult]:
 
-        if not self.lts_payloads:
-            return [{"result": None, "kpi": None, "regression": vars(RegressionStatus(0, explanation="Not enough LTS data"))}]
+        if not self.new_payload or not self.lts_payloads:
+            return [
+                models.RegressionResult(
+                    kpi="",
+                    setting="" if not self.x_var else self.x_var,
+                    indicator=self.get_name(),
+                    status=0
+                )
+            ]
 
         regression_results = []
 
@@ -107,12 +111,16 @@ def analyze(self) -> list[dict]:
                     continue
 
 
-            raw_results = self.regression_test(curr_values, lts_values)
-            stats = {
-                "kpi": kpi,
-                "indicator": self.get_name()
-            }
-            regression_results.append({**stats, **raw_results})
+            raw_results: RegressionStatus = self.regression_test(curr_values, lts_values)
+            result = models.RegressionResult(
+                kpi=kpi,
+                setting="" if not self.x_var else self.x_var,
+                indicator=self.get_name(),
+                direction=raw_results.direction,
+                explanation=raw_results.explanation,
+                details=raw_results.details
+            )
+            regression_results.append(result)
 
         return regression_results
 

From 920dda4e190122c744fcfaab78dfb14009930d33 Mon Sep 17 00:00:00 2001
From: Drew Ripberger <drew.ripberger@gmail.com>
Date: Thu, 1 Feb 2024 17:13:29 -0500
Subject: [PATCH 7/9] temp fix for stale payloads

---
 matrix_benchmarking/store/simple.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/matrix_benchmarking/store/simple.py b/matrix_benchmarking/store/simple.py
index afe3d591..3868d4ed 100644
--- a/matrix_benchmarking/store/simple.py
+++ b/matrix_benchmarking/store/simple.py
@@ -185,6 +185,11 @@ def has_lts_anchor(files):
             with open(filepath) as f:
                 document = json.load(f)
 
+            # Temporary fix for old payloads in OpenSearch,
+            # I need to clear this up before merge
+            if "regression_results" in document:
+                document.pop("regression_results")
+
             try:
                 lts_payload = store.lts_schema.parse_obj(document)
             except pydantic.error_wrappers.ValidationError as e:

From 83ff7531d555096f31600594edff9f18d55b73ef Mon Sep 17 00:00:00 2001
From: Drew Ripberger <drew.ripberger@gmail.com>
Date: Mon, 5 Feb 2024 16:48:18 -0500
Subject: [PATCH 8/9] handle gathered results

---
 matrix_benchmarking/common.py              |  3 +-
 matrix_benchmarking/models.py              |  4 +-
 matrix_benchmarking/regression/__init__.py | 64 +++++++++++++---------
 matrix_benchmarking/regression/zscore.py   |  7 ++-
 matrix_benchmarking/store/simple.py        |  4 --
 5 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/matrix_benchmarking/common.py b/matrix_benchmarking/common.py
index 7e1b4eb1..3af302a6 100644
--- a/matrix_benchmarking/common.py
+++ b/matrix_benchmarking/common.py
@@ -24,7 +24,8 @@ def __init__(self, location, results,
         self.results = results
 
         self.settings.__dict__.update(processed_settings)
-
+        
+        self.import_key = import_key
         self.processed_key = processed_key
         self.import_settings = processed_settings
 
diff --git a/matrix_benchmarking/models.py b/matrix_benchmarking/models.py
index 0357bfc9..a138cf2f 100644
--- a/matrix_benchmarking/models.py
+++ b/matrix_benchmarking/models.py
@@ -131,9 +131,9 @@ def tostr(self):
 
     return model
 
-class RegressionResult(ExclusiveModel):
+class Regression(ExclusiveModel):
     kpi: str
-    setting: str
+    metric: str
     indicator: str
     status: int
     direction: Optional[int] = Field(default=None)
diff --git a/matrix_benchmarking/regression/__init__.py b/matrix_benchmarking/regression/__init__.py
index 951836ce..7d335df7 100644
--- a/matrix_benchmarking/regression/__init__.py
+++ b/matrix_benchmarking/regression/__init__.py
@@ -6,6 +6,7 @@
 import numpy as np
 from functools import reduce
 from typing import Optional, Callable
+import copy
 
 import matrix_benchmarking.common as common
 import matrix_benchmarking.models as models
@@ -39,11 +40,12 @@ def __init__(
             self,
             new_payload: common.MatrixEntry,
             lts_payloads: list[common.MatrixEntry],
-            x_var = None,
-            x_var_key = lambda x: x.results.metadata.end.astimezone(),
+            x_var: str,
+            x_var_key = lambda x: x.metadata.end.astimezone(),
             kpis: Optional[list[str]] = None,
             settings_filter: Optional[dict] = None,
             combine_funcs: dict = {},
+            use_x_var = False # Automatically determine the settings for the x_var
         ):
         self.new_payload = new_payload
         self.x_var = x_var
@@ -51,11 +53,12 @@ def __init__(
         self.kpis = kpis
         self.combine_funcs = combine_funcs
         self.settings_filter = settings_filter
+        self.use_x_var = use_x_var
 
         if self.settings_filter and self.x_var:
             logging.warning("settings_filter and x_var set, only using settings_filter")
-        elif self.x_var:
-            settings = self.new_payload.get_settings()
+        elif self.x_var and self.use_x_var:
+            settings = dict(self.new_payload.get_settings())
             settings.pop(self.x_var)
             self.settings_filter = settings
 
@@ -75,20 +78,18 @@ def __init__(
         else:
             self.lts_payloads = lts_payloads
 
-        # This isn't strictly necessary for all analysis techniques, but
-        # is useful to have
-        self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry))
 
     def get_name(self):
         return "UndefinedRegressionIndicator"
 
-    def analyze(self) -> list[models.RegressionResult]:
+    def analyze(self) -> list[models.Regression]:
 
         if not self.new_payload or not self.lts_payloads:
+            logging.info("Missing a new payload or lts payloads")
             return [
-                models.RegressionResult(
+                models.Regression(
                     kpi="",
-                    setting="" if not self.x_var else self.x_var,
+                    metric="" if not self.x_var else self.x_var,
                     indicator=self.get_name(),
                     status=0
                 )
@@ -99,28 +100,41 @@ def analyze(self) -> list[models.RegressionResult]:
         kpis_to_test = vars(self.new_payload.results.lts.kpis).keys() if not self.kpis else self.kpis
         for kpi in kpis_to_test:
 
-            curr_values = vars(self.new_payload.results.lts.kpis)[kpi].value
-            lts_values = list(map(lambda x: vars(x.results.kpis)[kpi].value, self.lts_payloads))
+            curr_values = []
+            if type(self.new_payload.results) is list:
+                for result in self.new_payload.results:
+                    curr_values.append(vars(result.lts.kpis)[kpi].value)
+            else:
+                curr_values.append(vars(self.new_payload.results.lts.kpis)[kpi].value)
+
+            lts_values = []
+            for payload in self.lts_payloads:
+                if type(payload.results) is list:
+                    lts_values += list(map(lambda x: vars(x.results.kpis)[kpi].value, payload.results))
+                else:
+                    lts_values.append(vars(payload.results.kpis)[kpi].value)
+
 
-            if type(vars(self.new_payload.results.lts.kpis)[kpi].value) is list:
+            if any(map(lambda x: type(x) is list, curr_values + lts_values)):
                 if kpi in self.combine_funcs:
-                    curr_values = self.combine_funcs[kpi](curr_values)
+                    curr_values = [self.combine_funcs[kpi](v) for v in curr_values]
                     lts_values = [self.combine_funcs[kpi](v) for v in lts_values]
                 else:
                     logging.warning(f"Skipping KPI with list of values, consider filtering KPIs or providing a combine_func for {kpi}")
                     continue
 
-
-            raw_results: RegressionStatus = self.regression_test(curr_values, lts_values)
-            result = models.RegressionResult(
-                kpi=kpi,
-                setting="" if not self.x_var else self.x_var,
-                indicator=self.get_name(),
-                direction=raw_results.direction,
-                explanation=raw_results.explanation,
-                details=raw_results.details
-            )
-            regression_results.append(result)
+            for curr_value in curr_values:
+                raw_results: RegressionStatus = self.regression_test(curr_value, lts_values)
+                result = models.Regression(
+                    kpi=kpi,
+                    metric="" if not self.x_var else self.x_var,
+                    indicator=self.get_name(),
+                    status=raw_results.status,
+                    direction=raw_results.direction,
+                    explanation=raw_results.explanation,
+                    details=raw_results.details
+                )
+                regression_results.append(result)
 
         return regression_results
 
diff --git a/matrix_benchmarking/regression/zscore.py b/matrix_benchmarking/regression/zscore.py
index fc879178..b63e4941 100644
--- a/matrix_benchmarking/regression/zscore.py
+++ b/matrix_benchmarking/regression/zscore.py
@@ -24,11 +24,12 @@ def regression_test(self, new_result: float, lts_results: np.array) -> regressio
         z_score = (new_result - mean) / std
 
         status = 0
+        direction = 0
         explanation = "z-score not greater than threshold"
         details = {"threshold": self.threshold, "zscore": z_score}
         if abs(z_score) > self.threshold:
             status = 1
-            direction = 1 if z_score > 0 else -1,
-            explanation="z-score greater than threshold",
+            direction = 1 if z_score > 0 else -1
+            explanation="z-score greater than threshold"
 
-        return regression.RegressionStatus(0, direction=direction, explanation=explanation, details=details)
+        return regression.RegressionStatus(status, direction=direction, explanation=explanation, details=details)
diff --git a/matrix_benchmarking/store/simple.py b/matrix_benchmarking/store/simple.py
index 3868d4ed..aec9b0f9 100644
--- a/matrix_benchmarking/store/simple.py
+++ b/matrix_benchmarking/store/simple.py
@@ -185,10 +185,6 @@ def has_lts_anchor(files):
             with open(filepath) as f:
                 document = json.load(f)
 
-            # Temporary fix for old payloads in OpenSearch,
-            # I need to clear this up before merge
-            if "regression_results" in document:
-                document.pop("regression_results")
 
             try:
                 lts_payload = store.lts_schema.parse_obj(document)

From 54ae4a97128402c32fb722d2c9b0be5154f042b3 Mon Sep 17 00:00:00 2001
From: Drew Ripberger <drew.ripberger@gmail.com>
Date: Tue, 6 Feb 2024 14:02:24 -0500
Subject: [PATCH 9/9] add more details to zscore result

---
 matrix_benchmarking/regression/zscore.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/matrix_benchmarking/regression/zscore.py b/matrix_benchmarking/regression/zscore.py
index b63e4941..60c2da99 100644
--- a/matrix_benchmarking/regression/zscore.py
+++ b/matrix_benchmarking/regression/zscore.py
@@ -26,7 +26,13 @@ def regression_test(self, new_result: float, lts_results: np.array) -> regressio
         status = 0
         direction = 0
         explanation = "z-score not greater than threshold"
-        details = {"threshold": self.threshold, "zscore": z_score}
+        details = {
+            "new_result": new_result,
+            "threshold": self.threshold,
+            "zscore": z_score,
+            "mean": mean,
+            "stddev": std
+        }
         if abs(z_score) > self.threshold:
             status = 1
             direction = 1 if z_score > 0 else -1