From c82b46a8e339ea9ce738d7913ba8c36e1bbd2301 Mon Sep 17 00:00:00 2001 From: Drew Ripberger Date: Tue, 23 Jan 2024 17:57:36 -0500 Subject: [PATCH 1/9] start to regression framework --- matrix_benchmarking/regression.py | 151 ++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 matrix_benchmarking/regression.py diff --git a/matrix_benchmarking/regression.py b/matrix_benchmarking/regression.py new file mode 100644 index 00000000..d781b421 --- /dev/null +++ b/matrix_benchmarking/regression.py @@ -0,0 +1,151 @@ +import os +import json + +import numpy as np +from functools import reduce +from typing import Optional, Callable + +import matrix_benchmarking.common as common + +def get_from_path(d, path): + return reduce(dict.get, path.split("."), d) + +# check if ALL (k, v) pairs in part are present in full_dict +def dict_part_eq(part, full_dict): + return reduce(lambda x, y: x and part[y] == full_dict[y], part.keys(), True) + +class RegressionStatus: + def __init__( + self, + status: int, + direction: Optional[int] = None, + explanation: Optional[str] = None, + details: Optional[dict] = None + ): + self.status = status + self.direction = direction + self.explanation = explanation + self.details = details + + +class RegressionIndicator: + """ + Assume the matrix that is passed in contains a prefiltered combination of settings, + or pass in the desired filter with the setings_filter option + """ + def __init__( + self, + new_payloads: list[common.MatrixEntry], + lts_payloads: list[common.MatrixEntry], + x_var_key = lambda x: x.results.metadata.end, + kpis: Optional[list[str]] = None, + settings_filter: Optional[dict] = None, + ): + self.x_var_key = x_var_key + self.kpis = kpis + self.settings_filter = settings_filter + + if self.settings_filter: + # Only store payloads that have equivalent (k, v) pairs + # as the settings_filter + self.new_payloads = list( + filter( + lambda x: dict_part_eq(self.settings_filter, x), + map(lambda x: x.settings, new_payloads) + ) + ) + self.lts_payloads = list( + filter( + lambda x: dict_part_eq(self.settings_filter, x), + map(lambda x: x.settings, lts_payloads) + ) + ) + else: + self.new_payloads = new_payloads + self.lts_payloads = lts_payloads + + # Why isn't this working? I suspect gnarly python stuff + # self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry)) + + + def analyze(self) -> list[dict]: + + if not self.new_payloads: + return [(None, "", RegressionStatus(0, explanation="Not enough new data"))] + elif not self.lts_payloads: + return [(None, "", RegressionStatus(0, explanation="Not enough LTS data"))] + + regression_results = [] + for curr_result in self.new_payloads: + print(curr_result) + kpis_to_test = vars(curr_result.results.lts.kpis).keys() if not self.kpis else self.kpis + for kpi in kpis_to_test: + regression_results.append( + { + "result": curr_result, + "kpi": kpi, + "regression": self.regression_test( + vars(curr_result.results.lts.kpis)[kpi].value, + list(map(lambda x: vars(x.results.kpis).value, self.lts_payloads)) + ) + } + ) + print(regression_results) + return regression_results + + def regression_test(self, new_result: float, lts_result: np.array) -> RegressionStatus: + return RegressionStatus(0, explanation="Default return status") + + +class ZScoreIndicator(RegressionIndicator): + """ + Example regression indicator that uses the Z score as a metric + to determine if the recent test was an outlier + """ + def __init__(self, *args, threshold=3, **kwargs): + super().__init__(*args, **kwargs) + self.threshold = threshold + + def regression_test(self, new_result, lts_results) -> RegressionStatus: + """ + Determine if the curr_result is more/less than threshold + standard deviations away from the previous_results + """ + mean = np.mean(prev_results) + std = np.std(prev_results) + z_score = (curr_result - mean) / std + if abs(z_score) > self.threshold: + return RegressionStatus( + 1, + direction=1 if z_score > 0 else -1, + explanation="z-score greater than threshold", + details={"threshold": self.threshold, "zscore": z_score} + ) + else: + return RegressionStatus( + 0, + explanation="z-score not greater than threshold", + details={"threshold": self.threshold, "zscore": z_score} + ) + +class PolynomialRegressionIndicator(RegressionIndicator): + """ + Placeholder for polynomial regression that we could implement + somewhere in the pipeline + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def regression_test(self, curr_result, prev_results) -> RegressionStatus: + return RegressionStatus(0, explanation="Not implemented") + +class HunterWrapperIndicator(RegressionIndicator): + """ + Some straightfoward indicators are implemented above but this also provides what should + be a simple way to wrap datastax/Hunter in a regression_test + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def regression_test(self, curr_result, prev_results) -> RegressionStatus: + return RegressionStatus(0, explanation="Not implemented") From 6e024f713e887d8aab6ccf4407c60616a26c6bfd Mon Sep 17 00:00:00 2001 From: Drew Ripberger Date: Wed, 24 Jan 2024 16:25:44 -0500 Subject: [PATCH 2/9] fixed framework to be called in TOPSAIL --- matrix_benchmarking/regression.py | 99 +++++++++++++++++++------------ 1 file changed, 62 insertions(+), 37 deletions(-) diff --git a/matrix_benchmarking/regression.py b/matrix_benchmarking/regression.py index d781b421..46b250e2 100644 --- a/matrix_benchmarking/regression.py +++ b/matrix_benchmarking/regression.py @@ -1,6 +1,8 @@ import os import json - +import logging +import types +import datetime import numpy as np from functools import reduce from typing import Optional, Callable @@ -14,7 +16,7 @@ def get_from_path(d, path): def dict_part_eq(part, full_dict): return reduce(lambda x, y: x and part[y] == full_dict[y], part.keys(), True) -class RegressionStatus: +class RegressionStatus(types.SimpleNamespace): def __init__( self, status: int, @@ -35,62 +37,85 @@ class RegressionIndicator: """ def __init__( self, - new_payloads: list[common.MatrixEntry], + new_payload: common.MatrixEntry, lts_payloads: list[common.MatrixEntry], - x_var_key = lambda x: x.results.metadata.end, + x_var = None, + x_var_key = lambda x: x.results.metadata.end.astimezone(), kpis: Optional[list[str]] = None, settings_filter: Optional[dict] = None, + combine_funcs: dict = {}, ): + self.new_payload = new_payload + self.x_var = x_var self.x_var_key = x_var_key self.kpis = kpis + self.combine_funcs = combine_funcs self.settings_filter = settings_filter + if self.settings_filter and self.x_var: + logging.warning("settings_filter and x_var set, only using settings_filter") + elif self.x_var: + settings = self.new_payload.get_settings() + settings.pop(self.x_var) + self.settings_filter = settings + if self.settings_filter: # Only store payloads that have equivalent (k, v) pairs # as the settings_filter - self.new_payloads = list( - filter( - lambda x: dict_part_eq(self.settings_filter, x), - map(lambda x: x.settings, new_payloads) - ) - ) self.lts_payloads = list( filter( - lambda x: dict_part_eq(self.settings_filter, x), - map(lambda x: x.settings, lts_payloads) + lambda x: dict_part_eq(self.settings_filter, x.get_settings()), + lts_payloads ) ) + + if not dict_part_eq(self.settings_filter, self.new_payload.get_settings()): + self.new_payload = None + logging.warning("settings_filter isn't satisfied for the new payload") else: - self.new_payloads = new_payloads self.lts_payloads = lts_payloads - # Why isn't this working? I suspect gnarly python stuff - # self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry)) + # This isn't strictly necessary for all analysis techniques, but + # is useful to have + self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry)) def analyze(self) -> list[dict]: - if not self.new_payloads: - return [(None, "", RegressionStatus(0, explanation="Not enough new data"))] - elif not self.lts_payloads: - return [(None, "", RegressionStatus(0, explanation="Not enough LTS data"))] + if not self.new_payload: + return [{"result": None, "kpi": None, "regression": vars(RegressionStatus(0, explanation="Not enough new data"))}] + + if not self.lts_payloads: + return [{"result": None, "kpi": None, "regression": vars(RegressionStatus(0, explanation="Not enough LTS data"))}] regression_results = [] - for curr_result in self.new_payloads: - print(curr_result) - kpis_to_test = vars(curr_result.results.lts.kpis).keys() if not self.kpis else self.kpis - for kpi in kpis_to_test: - regression_results.append( - { - "result": curr_result, - "kpi": kpi, - "regression": self.regression_test( - vars(curr_result.results.lts.kpis)[kpi].value, - list(map(lambda x: vars(x.results.kpis).value, self.lts_payloads)) + + kpis_to_test = vars(self.new_payload.results.lts.kpis).keys() if not self.kpis else self.kpis + for kpi in kpis_to_test: + + curr_values = vars(self.new_payload.results.lts.kpis)[kpi].value + lts_values = list(map(lambda x: vars(x.results.kpis)[kpi].value, self.lts_payloads)) + + if type(vars(self.new_payload.results.lts.kpis)[kpi].value) is list: + if kpi in self.combine_funcs: + curr_values = self.combine_funcs[kpi](curr_values) + lts_values = [self.combine_funcs[kpi](v) for v in lts_values] + else: + logging.warning(f"Skipping KPI with list of values, consider filtering KPIs or providing a combine_func for {kpi}") + continue + + regression_results.append( + { + "result": self.new_payload.get_settings(), + "kpi": kpi, + "regression": vars( + self.regression_test( + curr_values, + lts_values ) - } - ) - print(regression_results) + ) + } + ) return regression_results def regression_test(self, new_result: float, lts_result: np.array) -> RegressionStatus: @@ -106,14 +131,14 @@ def __init__(self, *args, threshold=3, **kwargs): super().__init__(*args, **kwargs) self.threshold = threshold - def regression_test(self, new_result, lts_results) -> RegressionStatus: + def regression_test(self, new_result: float, lts_results: np.array) -> RegressionStatus: """ Determine if the curr_result is more/less than threshold standard deviations away from the previous_results """ - mean = np.mean(prev_results) - std = np.std(prev_results) - z_score = (curr_result - mean) / std + mean = np.mean(lts_results) + std = np.std(lts_results) + z_score = (new_result - mean) / std if abs(z_score) > self.threshold: return RegressionStatus( 1, From 720d1d01a35f10da5cade10d8804c9d3238b6983 Mon Sep 17 00:00:00 2001 From: Drew Ripberger Date: Fri, 26 Jan 2024 12:19:05 -0500 Subject: [PATCH 3/9] key existence check in filter --- matrix_benchmarking/download_lts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/matrix_benchmarking/download_lts.py b/matrix_benchmarking/download_lts.py index f72aec51..4f1483e7 100644 --- a/matrix_benchmarking/download_lts.py +++ b/matrix_benchmarking/download_lts.py @@ -38,7 +38,8 @@ def main(opensearch_host: str = "", opensearch_index: the OpenSearch index where the LTS payloads are stored (Mandatory) lts_results_dirname: The directory to place the downloaded LTS results files. - filters: If provided, only download the experiments matching the filters. Eg: {"image_name": "1.2"}. (Optional.) + filters: If provided, only download the experiments matching the filters. Eg: {"image_name": "1.2"}. + If the provided value is "*", then we just check to ensure the keys existence (Optional.) max_records: Maximum number of records to retrieve from the OpenSearch instance. 10,000 is the largest number possible without paging (Optional.) force: Ignore the presence of the anchor file before downloading the results (Optional.) clean: Delete all the existing '.json' files in the lts-results-dirname before downloading the results (Optional.) @@ -121,7 +122,7 @@ def download(client, opensearch_index, filters, lts_results_dirname, max_records query["query"] = { "bool": { "must": [ - {"term": {f"{k}.keyword": v}} for k, v in filters.items() + {"term": {f"{k}.keyword": v}} if v != "*" else {"exists": {"field": k}} for k, v in filters.items() ] } } From 0929b7ab01c7d9e83ca8cd4383cb92430063860a Mon Sep 17 00:00:00 2001 From: Drew Ripberger Date: Mon, 29 Jan 2024 17:27:46 -0500 Subject: [PATCH 4/9] add regression model, split indicator modules --- matrix_benchmarking/models.py | 8 ++ .../{regression.py => regression/__init__.py} | 76 +++---------------- matrix_benchmarking/regression/zscore.py | 30 ++++++++ 3 files changed, 48 insertions(+), 66 deletions(-) rename matrix_benchmarking/{regression.py => regression/__init__.py} (62%) create mode 100644 matrix_benchmarking/regression/zscore.py diff --git a/matrix_benchmarking/models.py b/matrix_benchmarking/models.py index 88a81c0c..3afa6fad 100644 --- a/matrix_benchmarking/models.py +++ b/matrix_benchmarking/models.py @@ -130,3 +130,11 @@ def tostr(self): model.tostr = tostr return model + +class RegressionResult(): + kpi: str + indicator: str + status: int + direction: Optional[int] + explanation: Optional[str] + details: Optional[dict[str, str]] diff --git a/matrix_benchmarking/regression.py b/matrix_benchmarking/regression/__init__.py similarity index 62% rename from matrix_benchmarking/regression.py rename to matrix_benchmarking/regression/__init__.py index 46b250e2..b92c8bed 100644 --- a/matrix_benchmarking/regression.py +++ b/matrix_benchmarking/regression/__init__.py @@ -79,6 +79,8 @@ def __init__( # is useful to have self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry)) + def get_name(self): + return "UndefinedRegressionIndicator" def analyze(self) -> list[dict]: @@ -104,73 +106,15 @@ def analyze(self) -> list[dict]: logging.warning(f"Skipping KPI with list of values, consider filtering KPIs or providing a combine_func for {kpi}") continue - regression_results.append( - { - "result": self.new_payload.get_settings(), - "kpi": kpi, - "regression": vars( - self.regression_test( - curr_values, - lts_values - ) - ) - } - ) + + raw_results = self.regression_test(curr_values, lts_values) + stats = { + "kpi": kpi, + "indicator": self.get_name() + } + regression_results.append({**stats, **raw_results}) + return regression_results def regression_test(self, new_result: float, lts_result: np.array) -> RegressionStatus: return RegressionStatus(0, explanation="Default return status") - - -class ZScoreIndicator(RegressionIndicator): - """ - Example regression indicator that uses the Z score as a metric - to determine if the recent test was an outlier - """ - def __init__(self, *args, threshold=3, **kwargs): - super().__init__(*args, **kwargs) - self.threshold = threshold - - def regression_test(self, new_result: float, lts_results: np.array) -> RegressionStatus: - """ - Determine if the curr_result is more/less than threshold - standard deviations away from the previous_results - """ - mean = np.mean(lts_results) - std = np.std(lts_results) - z_score = (new_result - mean) / std - if abs(z_score) > self.threshold: - return RegressionStatus( - 1, - direction=1 if z_score > 0 else -1, - explanation="z-score greater than threshold", - details={"threshold": self.threshold, "zscore": z_score} - ) - else: - return RegressionStatus( - 0, - explanation="z-score not greater than threshold", - details={"threshold": self.threshold, "zscore": z_score} - ) - -class PolynomialRegressionIndicator(RegressionIndicator): - """ - Placeholder for polynomial regression that we could implement - somewhere in the pipeline - """ - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def regression_test(self, curr_result, prev_results) -> RegressionStatus: - return RegressionStatus(0, explanation="Not implemented") - -class HunterWrapperIndicator(RegressionIndicator): - """ - Some straightfoward indicators are implemented above but this also provides what should - be a simple way to wrap datastax/Hunter in a regression_test - """ - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def regression_test(self, curr_result, prev_results) -> RegressionStatus: - return RegressionStatus(0, explanation="Not implemented") diff --git a/matrix_benchmarking/regression/zscore.py b/matrix_benchmarking/regression/zscore.py new file mode 100644 index 00000000..faf92b1a --- /dev/null +++ b/matrix_benchmarking/regression/zscore.py @@ -0,0 +1,30 @@ +class ZScoreIndicator(RegressionIndicator): + """ + Example regression indicator that uses the Z score as a metric + to determine if the recent test was an outlier + """ + def __init__(self, *args, threshold=3, **kwargs): + super().__init__(*args, **kwargs) + self.threshold = threshold + + def get_name(self): + return f"ZScoreIndicator(threshold={self.threshold})" + + def regression_test(self, new_result: float, lts_results: np.array) -> RegressionStatus: + """ + Determine if the curr_result is more/less than threshold + standard deviations away from the previous_results + """ + mean = np.mean(lts_results) + std = np.std(lts_results) + z_score = (new_result - mean) / std + + status = 0 + explanation = "z-score not greater than threshold" + details = {"threshold": self.threshold, "zscore": z_score} + if abs(z_score) > self.threshold: + status = 1 + direction = 1 if z_score > 0 else -1, + explanation="z-score greater than threshold", + + return RegressionStatus(0, direction=direction, explanation=explanation, details=details) From e781630022e610ab21887b35ae85ee0ea1eea8e3 Mon Sep 17 00:00:00 2001 From: Drew Ripberger Date: Tue, 30 Jan 2024 11:28:38 -0500 Subject: [PATCH 5/9] fix module split for zscore --- matrix_benchmarking/models.py | 2 +- matrix_benchmarking/regression/zscore.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/matrix_benchmarking/models.py b/matrix_benchmarking/models.py index 3afa6fad..258b059d 100644 --- a/matrix_benchmarking/models.py +++ b/matrix_benchmarking/models.py @@ -131,7 +131,7 @@ def tostr(self): return model -class RegressionResult(): +class RegressionResult(ExclusiveModel): kpi: str indicator: str status: int diff --git a/matrix_benchmarking/regression/zscore.py b/matrix_benchmarking/regression/zscore.py index faf92b1a..fc879178 100644 --- a/matrix_benchmarking/regression/zscore.py +++ b/matrix_benchmarking/regression/zscore.py @@ -1,4 +1,8 @@ -class ZScoreIndicator(RegressionIndicator): +from matrix_benchmarking import regression + +import numpy as np + +class ZScoreIndicator(regression.RegressionIndicator): """ Example regression indicator that uses the Z score as a metric to determine if the recent test was an outlier @@ -10,7 +14,7 @@ def __init__(self, *args, threshold=3, **kwargs): def get_name(self): return f"ZScoreIndicator(threshold={self.threshold})" - def regression_test(self, new_result: float, lts_results: np.array) -> RegressionStatus: + def regression_test(self, new_result: float, lts_results: np.array) -> regression.RegressionStatus: """ Determine if the curr_result is more/less than threshold standard deviations away from the previous_results @@ -27,4 +31,4 @@ def regression_test(self, new_result: float, lts_results: np.array) -> Regressio direction = 1 if z_score > 0 else -1, explanation="z-score greater than threshold", - return RegressionStatus(0, direction=direction, explanation=explanation, details=details) + return regression.RegressionStatus(0, direction=direction, explanation=explanation, details=details) From 7966e2ccb533f4876807c053d64a892c1b2b7782 Mon Sep 17 00:00:00 2001 From: Drew Ripberger Date: Tue, 30 Jan 2024 13:59:53 -0500 Subject: [PATCH 6/9] use RegressionResult model in indicator --- matrix_benchmarking/models.py | 7 +++-- matrix_benchmarking/regression/__init__.py | 34 +++++++++++++--------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/matrix_benchmarking/models.py b/matrix_benchmarking/models.py index 258b059d..0357bfc9 100644 --- a/matrix_benchmarking/models.py +++ b/matrix_benchmarking/models.py @@ -133,8 +133,9 @@ def tostr(self): class RegressionResult(ExclusiveModel): kpi: str + setting: str indicator: str status: int - direction: Optional[int] - explanation: Optional[str] - details: Optional[dict[str, str]] + direction: Optional[int] = Field(default=None) + explanation: Optional[str] = Field(default=None) + details: Optional[dict[str, str]] = Field(default=None) diff --git a/matrix_benchmarking/regression/__init__.py b/matrix_benchmarking/regression/__init__.py index b92c8bed..951836ce 100644 --- a/matrix_benchmarking/regression/__init__.py +++ b/matrix_benchmarking/regression/__init__.py @@ -8,6 +8,7 @@ from typing import Optional, Callable import matrix_benchmarking.common as common +import matrix_benchmarking.models as models def get_from_path(d, path): return reduce(dict.get, path.split("."), d) @@ -29,7 +30,6 @@ def __init__( self.explanation = explanation self.details = details - class RegressionIndicator: """ Assume the matrix that is passed in contains a prefiltered combination of settings, @@ -82,13 +82,17 @@ def __init__( def get_name(self): return "UndefinedRegressionIndicator" - def analyze(self) -> list[dict]: - - if not self.new_payload: - return [{"result": None, "kpi": None, "regression": vars(RegressionStatus(0, explanation="Not enough new data"))}] + def analyze(self) -> list[models.RegressionResult]: - if not self.lts_payloads: - return [{"result": None, "kpi": None, "regression": vars(RegressionStatus(0, explanation="Not enough LTS data"))}] + if not self.new_payload or not self.lts_payloads: + return [ + models.RegressionResult( + kpi="", + setting="" if not self.x_var else self.x_var, + indicator=self.get_name(), + status=0 + ) + ] regression_results = [] @@ -107,12 +111,16 @@ def analyze(self) -> list[dict]: continue - raw_results = self.regression_test(curr_values, lts_values) - stats = { - "kpi": kpi, - "indicator": self.get_name() - } - regression_results.append({**stats, **raw_results}) + raw_results: RegressionStatus = self.regression_test(curr_values, lts_values) + result = models.RegressionResult( + kpi=kpi, + setting="" if not self.x_var else self.x_var, + indicator=self.get_name(), + direction=raw_results.direction, + explanation=raw_results.explanation, + details=raw_results.details + ) + regression_results.append(result) return regression_results From 920dda4e190122c744fcfaab78dfb14009930d33 Mon Sep 17 00:00:00 2001 From: Drew Ripberger Date: Thu, 1 Feb 2024 17:13:29 -0500 Subject: [PATCH 7/9] temp fix for stale payloads --- matrix_benchmarking/store/simple.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/matrix_benchmarking/store/simple.py b/matrix_benchmarking/store/simple.py index afe3d591..3868d4ed 100644 --- a/matrix_benchmarking/store/simple.py +++ b/matrix_benchmarking/store/simple.py @@ -185,6 +185,11 @@ def has_lts_anchor(files): with open(filepath) as f: document = json.load(f) + # Temporary fix for old payloads in OpenSearch, + # I need to clear this up before merge + if "regression_results" in document: + document.pop("regression_results") + try: lts_payload = store.lts_schema.parse_obj(document) except pydantic.error_wrappers.ValidationError as e: From 83ff7531d555096f31600594edff9f18d55b73ef Mon Sep 17 00:00:00 2001 From: Drew Ripberger Date: Mon, 5 Feb 2024 16:48:18 -0500 Subject: [PATCH 8/9] handle gathered results --- matrix_benchmarking/common.py | 3 +- matrix_benchmarking/models.py | 4 +- matrix_benchmarking/regression/__init__.py | 64 +++++++++++++--------- matrix_benchmarking/regression/zscore.py | 7 ++- matrix_benchmarking/store/simple.py | 4 -- 5 files changed, 47 insertions(+), 35 deletions(-) diff --git a/matrix_benchmarking/common.py b/matrix_benchmarking/common.py index 7e1b4eb1..3af302a6 100644 --- a/matrix_benchmarking/common.py +++ b/matrix_benchmarking/common.py @@ -24,7 +24,8 @@ def __init__(self, location, results, self.results = results self.settings.__dict__.update(processed_settings) - + + self.import_key = import_key self.processed_key = processed_key self.import_settings = processed_settings diff --git a/matrix_benchmarking/models.py b/matrix_benchmarking/models.py index 0357bfc9..a138cf2f 100644 --- a/matrix_benchmarking/models.py +++ b/matrix_benchmarking/models.py @@ -131,9 +131,9 @@ def tostr(self): return model -class RegressionResult(ExclusiveModel): +class Regression(ExclusiveModel): kpi: str - setting: str + metric: str indicator: str status: int direction: Optional[int] = Field(default=None) diff --git a/matrix_benchmarking/regression/__init__.py b/matrix_benchmarking/regression/__init__.py index 951836ce..7d335df7 100644 --- a/matrix_benchmarking/regression/__init__.py +++ b/matrix_benchmarking/regression/__init__.py @@ -6,6 +6,7 @@ import numpy as np from functools import reduce from typing import Optional, Callable +import copy import matrix_benchmarking.common as common import matrix_benchmarking.models as models @@ -39,11 +40,12 @@ def __init__( self, new_payload: common.MatrixEntry, lts_payloads: list[common.MatrixEntry], - x_var = None, - x_var_key = lambda x: x.results.metadata.end.astimezone(), + x_var: str, + x_var_key = lambda x: x.metadata.end.astimezone(), kpis: Optional[list[str]] = None, settings_filter: Optional[dict] = None, combine_funcs: dict = {}, + use_x_var = False # Automatically determine the settings for the x_var ): self.new_payload = new_payload self.x_var = x_var @@ -51,11 +53,12 @@ def __init__( self.kpis = kpis self.combine_funcs = combine_funcs self.settings_filter = settings_filter + self.use_x_var = use_x_var if self.settings_filter and self.x_var: logging.warning("settings_filter and x_var set, only using settings_filter") - elif self.x_var: - settings = self.new_payload.get_settings() + elif self.x_var and self.use_x_var: + settings = dict(self.new_payload.get_settings()) settings.pop(self.x_var) self.settings_filter = settings @@ -75,20 +78,18 @@ def __init__( else: self.lts_payloads = lts_payloads - # This isn't strictly necessary for all analysis techniques, but - # is useful to have - self.lts_payloads.sort(key=lambda entry: self.x_var_key(entry)) def get_name(self): return "UndefinedRegressionIndicator" - def analyze(self) -> list[models.RegressionResult]: + def analyze(self) -> list[models.Regression]: if not self.new_payload or not self.lts_payloads: + logging.info("Missing a new payload or lts payloads") return [ - models.RegressionResult( + models.Regression( kpi="", - setting="" if not self.x_var else self.x_var, + metric="" if not self.x_var else self.x_var, indicator=self.get_name(), status=0 ) @@ -99,28 +100,41 @@ def analyze(self) -> list[models.RegressionResult]: kpis_to_test = vars(self.new_payload.results.lts.kpis).keys() if not self.kpis else self.kpis for kpi in kpis_to_test: - curr_values = vars(self.new_payload.results.lts.kpis)[kpi].value - lts_values = list(map(lambda x: vars(x.results.kpis)[kpi].value, self.lts_payloads)) + curr_values = [] + if type(self.new_payload.results) is list: + for result in self.new_payload.results: + curr_values.append(vars(result.lts.kpis)[kpi].value) + else: + curr_values.append(vars(self.new_payload.results.lts.kpis)[kpi].value) + + lts_values = [] + for payload in self.lts_payloads: + if type(payload.results) is list: + lts_values += list(map(lambda x: vars(x.results.kpis)[kpi].value, payload.results)) + else: + lts_values.append(vars(payload.results.kpis)[kpi].value) + - if type(vars(self.new_payload.results.lts.kpis)[kpi].value) is list: + if any(map(lambda x: type(x) is list, curr_values + lts_values)): if kpi in self.combine_funcs: - curr_values = self.combine_funcs[kpi](curr_values) + curr_values = [self.combine_funcs[kpi](v) for v in curr_values] lts_values = [self.combine_funcs[kpi](v) for v in lts_values] else: logging.warning(f"Skipping KPI with list of values, consider filtering KPIs or providing a combine_func for {kpi}") continue - - raw_results: RegressionStatus = self.regression_test(curr_values, lts_values) - result = models.RegressionResult( - kpi=kpi, - setting="" if not self.x_var else self.x_var, - indicator=self.get_name(), - direction=raw_results.direction, - explanation=raw_results.explanation, - details=raw_results.details - ) - regression_results.append(result) + for curr_value in curr_values: + raw_results: RegressionStatus = self.regression_test(curr_value, lts_values) + result = models.Regression( + kpi=kpi, + metric="" if not self.x_var else self.x_var, + indicator=self.get_name(), + status=raw_results.status, + direction=raw_results.direction, + explanation=raw_results.explanation, + details=raw_results.details + ) + regression_results.append(result) return regression_results diff --git a/matrix_benchmarking/regression/zscore.py b/matrix_benchmarking/regression/zscore.py index fc879178..b63e4941 100644 --- a/matrix_benchmarking/regression/zscore.py +++ b/matrix_benchmarking/regression/zscore.py @@ -24,11 +24,12 @@ def regression_test(self, new_result: float, lts_results: np.array) -> regressio z_score = (new_result - mean) / std status = 0 + direction = 0 explanation = "z-score not greater than threshold" details = {"threshold": self.threshold, "zscore": z_score} if abs(z_score) > self.threshold: status = 1 - direction = 1 if z_score > 0 else -1, - explanation="z-score greater than threshold", + direction = 1 if z_score > 0 else -1 + explanation="z-score greater than threshold" - return regression.RegressionStatus(0, direction=direction, explanation=explanation, details=details) + return regression.RegressionStatus(status, direction=direction, explanation=explanation, details=details) diff --git a/matrix_benchmarking/store/simple.py b/matrix_benchmarking/store/simple.py index 3868d4ed..aec9b0f9 100644 --- a/matrix_benchmarking/store/simple.py +++ b/matrix_benchmarking/store/simple.py @@ -185,10 +185,6 @@ def has_lts_anchor(files): with open(filepath) as f: document = json.load(f) - # Temporary fix for old payloads in OpenSearch, - # I need to clear this up before merge - if "regression_results" in document: - document.pop("regression_results") try: lts_payload = store.lts_schema.parse_obj(document) From 54ae4a97128402c32fb722d2c9b0be5154f042b3 Mon Sep 17 00:00:00 2001 From: Drew Ripberger Date: Tue, 6 Feb 2024 14:02:24 -0500 Subject: [PATCH 9/9] add more details to zscore result --- matrix_benchmarking/regression/zscore.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/matrix_benchmarking/regression/zscore.py b/matrix_benchmarking/regression/zscore.py index b63e4941..60c2da99 100644 --- a/matrix_benchmarking/regression/zscore.py +++ b/matrix_benchmarking/regression/zscore.py @@ -26,7 +26,13 @@ def regression_test(self, new_result: float, lts_results: np.array) -> regressio status = 0 direction = 0 explanation = "z-score not greater than threshold" - details = {"threshold": self.threshold, "zscore": z_score} + details = { + "new_result": new_result, + "threshold": self.threshold, + "zscore": z_score, + "mean": mean, + "stddev": std + } if abs(z_score) > self.threshold: status = 1 direction = 1 if z_score > 0 else -1