From e35ac197f168b1b51461e5153b45243f04d47e98 Mon Sep 17 00:00:00 2001
From: Far0n <frozenfingerz@gmail.com>
Date: Fri, 23 Mar 2018 18:07:11 +0100
Subject: [PATCH] F0.5, F2 and Accuracy for daicx (#507)

---
 src/cpu/metrics/metrics.cpp              | 52 ++++++++++++-
 src/include/metrics/metrics.h            | 19 ++++-
 src/interface_py/h2o4gpu/util/metrics.py | 95 +++++++++++++++++++++---
 3 files changed, 150 insertions(+), 16 deletions(-)
diff --git a/src/cpu/metrics/metrics.cpp b/src/cpu/metrics/metrics.cpp
index bcd1bc89e..a3902be03 100644
--- a/src/cpu/metrics/metrics.cpp
+++ b/src/cpu/metrics/metrics.cpp
@@ -29,11 +29,26 @@ namespace h2o4gpu {
     return (std::abs(y) < 1E-15) ?  0.0 : x / y;
   }
 
+  double f05(double tp, double tn, double fp, double fn) {
+    auto y = 1.25 * tp + fp + 0.25 * fn;
+    return (std::abs(y) < 1E-15) ? 0.0 : (1.25 * tp) / y;
+  }
+
   double f1(double tp, double tn, double fp, double fn) {
     auto y = 2 * tp + fp + fn;
     return (std::abs(y) < 1E-15) ? 0.0 : (2 * tp) / y;
   }
 
+  double f2(double tp, double tn, double fp, double fn) {
+    auto y = 5 * tp + fp + 4 * fn;
+    return (std::abs(y) < 1E-15) ? 0.0 : (5 * tp) / y;
+  }
+
+  double acc(double tp, double tn, double fp, double fn) {
+    auto y = tp + fp + tn + fn;
+    return (std::abs(y) < 1E-15) ? 0.0 : (tp + tn) / y;
+  }
+
   double cm_metric_opt(std::vector<double> y, std::vector<double> yhat,
                        std::vector<double> w, CMMetricFunc metric) {
     auto idx = argsort(yhat);
@@ -126,15 +141,48 @@ namespace h2o4gpu {
                          std::vector<double>(w, w + l), mcc);
   }
 
+  double f05_opt(double *y, int n, double *yhat, int m) {
+    std::vector<double> w(n, 1.0);
+    return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
+      w, f05);
+  }
+
+  double f05_opt(double *y, int n, double *yhat, int m, double *w, int l) {
+    return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
+      std::vector<double>(w, w + l), f05);
+  }
+
   double f1_opt(double *y, int n, double *yhat, int m) {
     std::vector<double> w(n, 1.0);
     return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
-                         w, f1);
+      w, f1);
   }
 
   double f1_opt(double *y, int n, double *yhat, int m, double *w, int l) {
     return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
-                         std::vector<double>(w, w + l), f1);
+      std::vector<double>(w, w + l), f1);
+  }
+
+  double f2_opt(double *y, int n, double *yhat, int m) {
+    std::vector<double> w(n, 1.0);
+    return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
+      w, f2);
+  }
+
+  double f2_opt(double *y, int n, double *yhat, int m, double *w, int l) {
+    return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
+      std::vector<double>(w, w + l), f2);
+  }
+
+  double acc_opt(double *y, int n, double *yhat, int m) {
+    std::vector<double> w(n, 1.0);
+    return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
+      w, acc);
+  }
+
+  double acc_opt(double *y, int n, double *yhat, int m, double *w, int l) {
+    return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
+      std::vector<double>(w, w + l), acc);
   }
   
   void confusion_matrices(double *y, int n, double *yhat, int m, double *cm, int k, int j) {
diff --git a/src/include/metrics/metrics.h b/src/include/metrics/metrics.h
index 060cc0986..8cbecbc27 100644
--- a/src/include/metrics/metrics.h
+++ b/src/include/metrics/metrics.h
@@ -3,20 +3,31 @@
 
 
 namespace h2o4gpu {
-    
+
   double mcc(double tp, double tn, double fp, double fn);
+  double f05(double tp, double tn, double fp, double fn);
   double f1(double tp, double tn, double fp, double fn);
+  double f2(double tp, double tn, double fp, double fn);
+  double acc(double tp, double tn, double fp, double fn);
 
   double mcc_opt(double *y, int n, double *yhat, int m);
   double mcc_opt(double *y, int n, double *yhat, int m, double* w, int l);
 
+  double f05_opt(double *y, int n, double *yhat, int m);
+  double f05_opt(double *y, int n, double *yhat, int m, double* w, int l);
+
   double f1_opt(double *y, int n, double *yhat, int m);
   double f1_opt(double *y, int n, double *yhat, int m, double* w, int l);
-  
+
+  double f2_opt(double *y, int n, double *yhat, int m);
+  double f2_opt(double *y, int n, double *yhat, int m, double* w, int l);
+
+  double acc_opt(double *y, int n, double *yhat, int m);
+  double acc_opt(double *y, int n, double *yhat, int m, double* w, int l);
+
   void confusion_matrices(double *y, int n, double *yhat, int m, double *cm, int k, int j);
   void confusion_matrices(double *y, int n, double *yhat, int m, double* w, int l, double *cm, int k, int j);
-  
+
 }
 
 #endif
-
diff --git a/src/interface_py/h2o4gpu/util/metrics.py b/src/interface_py/h2o4gpu/util/metrics.py
index ab8760a38..8111c8ca9 100644
--- a/src/interface_py/h2o4gpu/util/metrics.py
+++ b/src/interface_py/h2o4gpu/util/metrics.py
@@ -275,11 +275,35 @@ def auc(actual, posterior):
     return area_under_curve
 
 
+def f05_opt(actual, predicted, sample_weight=None):
+    """
+    Computes the F0.5-Score after optimal predictions thresholding.
+
+    This function maximizes the F0.5-Score by means of
+    optimal predictions thresholding.
+
+    :param actual : numpy array
+                    The ground truth value
+    :param predicted : numpy array
+                       The predicted value
+    :param sample_weight : numpy array or None
+                           sample weights
+
+    :returns double
+             The optimal F0.5-Score
+    """
+    import h2o4gpu.util.daicx as daicx
+    if sample_weight is None:
+        return daicx.f05_opt(actual.ravel(), predicted.ravel())
+    return daicx.f05_opt(actual.ravel(), predicted.ravel(),
+                         sample_weight.ravel())
+
+
 def f1_opt(actual, predicted, sample_weight=None):
     """
-    Computes the F1-score after optimal predictions thresholding.
+    Computes the F1-Score after optimal predictions thresholding.
 
-    This function maximizes the F1-score by means of
+    This function maximizes the F1-Score by means of
     optimal predictions thresholding.
 
     :param actual : numpy array
@@ -290,12 +314,37 @@ def f1_opt(actual, predicted, sample_weight=None):
                            sample weights
 
     :returns double
-             The optimal F1-score
+             The optimal F1-Score
     """
     import h2o4gpu.util.daicx as daicx
     if sample_weight is None:
-        return daicx.f1_opt(actual, predicted)
-    return daicx.f1_opt(actual, predicted, sample_weight)
+        return daicx.f1_opt(actual.ravel(), predicted.ravel())
+    return daicx.f1_opt(actual.ravel(), predicted.ravel(),
+                        sample_weight.ravel())
+
+
+def f2_opt(actual, predicted, sample_weight=None):
+    """
+    Computes the F2-Score after optimal predictions thresholding.
+
+    This function maximizes the F2-Score by means of
+    optimal predictions thresholding.
+
+    :param actual : numpy array
+                    The ground truth value
+    :param predicted : numpy array
+                       The predicted value
+    :param sample_weight : numpy array or None
+                           sample weights
+
+    :returns double
+             The optimal F2-Score
+    """
+    import h2o4gpu.util.daicx as daicx
+    if sample_weight is None:
+        return daicx.f2_opt(actual.ravel(), predicted.ravel())
+    return daicx.f2_opt(actual.ravel(), predicted.ravel(),
+                        sample_weight.ravel())
 
 
 def mcc_opt(actual, predicted, sample_weight=None):
@@ -317,13 +366,38 @@ def mcc_opt(actual, predicted, sample_weight=None):
     """
     import h2o4gpu.util.daicx as daicx
     if sample_weight is None:
-        return daicx.mcc_opt(actual, predicted)
-    return daicx.mcc_opt(actual, predicted, sample_weight)
+        return daicx.mcc_opt(actual.ravel(), predicted.ravel())
+    return daicx.mcc_opt(actual.ravel(), predicted.ravel(),
+                         sample_weight.ravel())
+
+
+def acc_opt(actual, predicted, sample_weight=None):
+    """
+    Computes the Accuracy after optimal predictions thresholding.
+
+    This function maximizes the Accuracy
+    by means of optimal predictions thresholding.
+
+    :param actual : numpy array
+                    The ground truth value
+    :param predicted : numpy array
+                       The predicted value
+    :param sample_weight : numpy array or None
+                           sample weights
+
+    :returns double
+             The optimal Accuracy
+    """
+    import h2o4gpu.util.daicx as daicx
+    if sample_weight is None:
+        return daicx.acc_opt(actual.ravel(), predicted.ravel())
+    return daicx.acc_opt(actual.ravel(), predicted.ravel(),
+                         sample_weight.ravel())
 
 
 def confusion_matrices(actual, predicted, sample_weight=None):
     """
-    Computes confusion matrices for DAICX analysis.
+    Computes confusion matrices for ROC analysis.
 
     This function cumputes confusion matrices
     for all possible prediction thresholds.
@@ -343,7 +417,8 @@ def confusion_matrices(actual, predicted, sample_weight=None):
 
     res = np.zeros((actual.shape[0], len(cm_stats_cols)))
     if sample_weight is None:
-        daicx.confusion_matrices(actual, predicted, res)
+        daicx.confusion_matrices(actual.ravel(), predicted.ravel(), res)
     else:
-        daicx.confusion_matrices(actual, predicted, sample_weight, res)
+        daicx.confusion_matrices(actual.ravel(), predicted.ravel(),
+                                 sample_weight.ravel(), res)
     return pd.DataFrame(res[~np.all(res == 0, axis=1)], columns=cm_stats_cols)