From e35ac197f168b1b51461e5153b45243f04d47e98 Mon Sep 17 00:00:00 2001 From: Far0n Date: Fri, 23 Mar 2018 18:07:11 +0100 Subject: [PATCH] F0.5, F2 and Accuracy for daicx (#507) --- src/cpu/metrics/metrics.cpp | 52 ++++++++++++- src/include/metrics/metrics.h | 19 ++++- src/interface_py/h2o4gpu/util/metrics.py | 95 +++++++++++++++++++++--- 3 files changed, 150 insertions(+), 16 deletions(-) diff --git a/src/cpu/metrics/metrics.cpp b/src/cpu/metrics/metrics.cpp index bcd1bc89e..a3902be03 100644 --- a/src/cpu/metrics/metrics.cpp +++ b/src/cpu/metrics/metrics.cpp @@ -29,11 +29,26 @@ namespace h2o4gpu { return (std::abs(y) < 1E-15) ? 0.0 : x / y; } + double f05(double tp, double tn, double fp, double fn) { + auto y = 1.25 * tp + fp + 0.25 * fn; + return (std::abs(y) < 1E-15) ? 0.0 : (1.25 * tp) / y; + } + double f1(double tp, double tn, double fp, double fn) { auto y = 2 * tp + fp + fn; return (std::abs(y) < 1E-15) ? 0.0 : (2 * tp) / y; } + double f2(double tp, double tn, double fp, double fn) { + auto y = 5 * tp + fp + 4 * fn; + return (std::abs(y) < 1E-15) ? 0.0 : (5 * tp) / y; + } + + double acc(double tp, double tn, double fp, double fn) { + auto y = tp + fp + tn + fn; + return (std::abs(y) < 1E-15) ? 0.0 : (tp + tn) / y; + } + double cm_metric_opt(std::vector y, std::vector yhat, std::vector w, CMMetricFunc metric) { auto idx = argsort(yhat); @@ -126,15 +141,48 @@ namespace h2o4gpu { std::vector(w, w + l), mcc); } + double f05_opt(double *y, int n, double *yhat, int m) { + std::vector w(n, 1.0); + return cm_metric_opt(std::vector(y, y + n), std::vector(yhat, yhat + m), + w, f05); + } + + double f05_opt(double *y, int n, double *yhat, int m, double *w, int l) { + return cm_metric_opt(std::vector(y, y + n), std::vector(yhat, yhat + m), + std::vector(w, w + l), f05); + } + double f1_opt(double *y, int n, double *yhat, int m) { std::vector w(n, 1.0); return cm_metric_opt(std::vector(y, y + n), std::vector(yhat, yhat + m), - w, f1); + w, f1); } double f1_opt(double *y, int n, double *yhat, int m, double *w, int l) { return cm_metric_opt(std::vector(y, y + n), std::vector(yhat, yhat + m), - std::vector(w, w + l), f1); + std::vector(w, w + l), f1); + } + + double f2_opt(double *y, int n, double *yhat, int m) { + std::vector w(n, 1.0); + return cm_metric_opt(std::vector(y, y + n), std::vector(yhat, yhat + m), + w, f2); + } + + double f2_opt(double *y, int n, double *yhat, int m, double *w, int l) { + return cm_metric_opt(std::vector(y, y + n), std::vector(yhat, yhat + m), + std::vector(w, w + l), f2); + } + + double acc_opt(double *y, int n, double *yhat, int m) { + std::vector w(n, 1.0); + return cm_metric_opt(std::vector(y, y + n), std::vector(yhat, yhat + m), + w, acc); + } + + double acc_opt(double *y, int n, double *yhat, int m, double *w, int l) { + return cm_metric_opt(std::vector(y, y + n), std::vector(yhat, yhat + m), + std::vector(w, w + l), acc); } void confusion_matrices(double *y, int n, double *yhat, int m, double *cm, int k, int j) { diff --git a/src/include/metrics/metrics.h b/src/include/metrics/metrics.h index 060cc0986..8cbecbc27 100644 --- a/src/include/metrics/metrics.h +++ b/src/include/metrics/metrics.h @@ -3,20 +3,31 @@ namespace h2o4gpu { - + double mcc(double tp, double tn, double fp, double fn); + double f05(double tp, double tn, double fp, double fn); double f1(double tp, double tn, double fp, double fn); + double f2(double tp, double tn, double fp, double fn); + double acc(double tp, double tn, double fp, double fn); double mcc_opt(double *y, int n, double *yhat, int m); double mcc_opt(double *y, int n, double *yhat, int m, double* w, int l); + double f05_opt(double *y, int n, double *yhat, int m); + double f05_opt(double *y, int n, double *yhat, int m, double* w, int l); + double f1_opt(double *y, int n, double *yhat, int m); double f1_opt(double *y, int n, double *yhat, int m, double* w, int l); - + + double f2_opt(double *y, int n, double *yhat, int m); + double f2_opt(double *y, int n, double *yhat, int m, double* w, int l); + + double acc_opt(double *y, int n, double *yhat, int m); + double acc_opt(double *y, int n, double *yhat, int m, double* w, int l); + void confusion_matrices(double *y, int n, double *yhat, int m, double *cm, int k, int j); void confusion_matrices(double *y, int n, double *yhat, int m, double* w, int l, double *cm, int k, int j); - + } #endif - diff --git a/src/interface_py/h2o4gpu/util/metrics.py b/src/interface_py/h2o4gpu/util/metrics.py index ab8760a38..8111c8ca9 100644 --- a/src/interface_py/h2o4gpu/util/metrics.py +++ b/src/interface_py/h2o4gpu/util/metrics.py @@ -275,11 +275,35 @@ def auc(actual, posterior): return area_under_curve +def f05_opt(actual, predicted, sample_weight=None): + """ + Computes the F0.5-Score after optimal predictions thresholding. + + This function maximizes the F0.5-Score by means of + optimal predictions thresholding. + + :param actual : numpy array + The ground truth value + :param predicted : numpy array + The predicted value + :param sample_weight : numpy array or None + sample weights + + :returns double + The optimal F0.5-Score + """ + import h2o4gpu.util.daicx as daicx + if sample_weight is None: + return daicx.f05_opt(actual.ravel(), predicted.ravel()) + return daicx.f05_opt(actual.ravel(), predicted.ravel(), + sample_weight.ravel()) + + def f1_opt(actual, predicted, sample_weight=None): """ - Computes the F1-score after optimal predictions thresholding. + Computes the F1-Score after optimal predictions thresholding. - This function maximizes the F1-score by means of + This function maximizes the F1-Score by means of optimal predictions thresholding. :param actual : numpy array @@ -290,12 +314,37 @@ def f1_opt(actual, predicted, sample_weight=None): sample weights :returns double - The optimal F1-score + The optimal F1-Score """ import h2o4gpu.util.daicx as daicx if sample_weight is None: - return daicx.f1_opt(actual, predicted) - return daicx.f1_opt(actual, predicted, sample_weight) + return daicx.f1_opt(actual.ravel(), predicted.ravel()) + return daicx.f1_opt(actual.ravel(), predicted.ravel(), + sample_weight.ravel()) + + +def f2_opt(actual, predicted, sample_weight=None): + """ + Computes the F2-Score after optimal predictions thresholding. + + This function maximizes the F2-Score by means of + optimal predictions thresholding. + + :param actual : numpy array + The ground truth value + :param predicted : numpy array + The predicted value + :param sample_weight : numpy array or None + sample weights + + :returns double + The optimal F2-Score + """ + import h2o4gpu.util.daicx as daicx + if sample_weight is None: + return daicx.f2_opt(actual.ravel(), predicted.ravel()) + return daicx.f2_opt(actual.ravel(), predicted.ravel(), + sample_weight.ravel()) def mcc_opt(actual, predicted, sample_weight=None): @@ -317,13 +366,38 @@ def mcc_opt(actual, predicted, sample_weight=None): """ import h2o4gpu.util.daicx as daicx if sample_weight is None: - return daicx.mcc_opt(actual, predicted) - return daicx.mcc_opt(actual, predicted, sample_weight) + return daicx.mcc_opt(actual.ravel(), predicted.ravel()) + return daicx.mcc_opt(actual.ravel(), predicted.ravel(), + sample_weight.ravel()) + + +def acc_opt(actual, predicted, sample_weight=None): + """ + Computes the Accuracy after optimal predictions thresholding. + + This function maximizes the Accuracy + by means of optimal predictions thresholding. + + :param actual : numpy array + The ground truth value + :param predicted : numpy array + The predicted value + :param sample_weight : numpy array or None + sample weights + + :returns double + The optimal Accuracy + """ + import h2o4gpu.util.daicx as daicx + if sample_weight is None: + return daicx.acc_opt(actual.ravel(), predicted.ravel()) + return daicx.acc_opt(actual.ravel(), predicted.ravel(), + sample_weight.ravel()) def confusion_matrices(actual, predicted, sample_weight=None): """ - Computes confusion matrices for DAICX analysis. + Computes confusion matrices for ROC analysis. This function cumputes confusion matrices for all possible prediction thresholds. @@ -343,7 +417,8 @@ def confusion_matrices(actual, predicted, sample_weight=None): res = np.zeros((actual.shape[0], len(cm_stats_cols))) if sample_weight is None: - daicx.confusion_matrices(actual, predicted, res) + daicx.confusion_matrices(actual.ravel(), predicted.ravel(), res) else: - daicx.confusion_matrices(actual, predicted, sample_weight, res) + daicx.confusion_matrices(actual.ravel(), predicted.ravel(), + sample_weight.ravel(), res) return pd.DataFrame(res[~np.all(res == 0, axis=1)], columns=cm_stats_cols)