Skip to content

Commit

Permalink
F0.5, F2 and Accuracy for daicx (#507)
Browse files Browse the repository at this point in the history
  • Loading branch information
Far0n authored and pseudotensor committed Mar 23, 2018
1 parent b5b5c1b commit e35ac19
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 16 deletions.
52 changes: 50 additions & 2 deletions src/cpu/metrics/metrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,26 @@ namespace h2o4gpu {
return (std::abs(y) < 1E-15) ? 0.0 : x / y;
}

double f05(double tp, double tn, double fp, double fn) {
auto y = 1.25 * tp + fp + 0.25 * fn;
return (std::abs(y) < 1E-15) ? 0.0 : (1.25 * tp) / y;
}

double f1(double tp, double tn, double fp, double fn) {
auto y = 2 * tp + fp + fn;
return (std::abs(y) < 1E-15) ? 0.0 : (2 * tp) / y;
}

double f2(double tp, double tn, double fp, double fn) {
auto y = 5 * tp + fp + 4 * fn;
return (std::abs(y) < 1E-15) ? 0.0 : (5 * tp) / y;
}

double acc(double tp, double tn, double fp, double fn) {
auto y = tp + fp + tn + fn;
return (std::abs(y) < 1E-15) ? 0.0 : (tp + tn) / y;
}

double cm_metric_opt(std::vector<double> y, std::vector<double> yhat,
std::vector<double> w, CMMetricFunc metric) {
auto idx = argsort(yhat);
Expand Down Expand Up @@ -126,15 +141,48 @@ namespace h2o4gpu {
std::vector<double>(w, w + l), mcc);
}

double f05_opt(double *y, int n, double *yhat, int m) {
std::vector<double> w(n, 1.0);
return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
w, f05);
}

double f05_opt(double *y, int n, double *yhat, int m, double *w, int l) {
return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
std::vector<double>(w, w + l), f05);
}

double f1_opt(double *y, int n, double *yhat, int m) {
std::vector<double> w(n, 1.0);
return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
w, f1);
w, f1);
}

double f1_opt(double *y, int n, double *yhat, int m, double *w, int l) {
return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
std::vector<double>(w, w + l), f1);
std::vector<double>(w, w + l), f1);
}

double f2_opt(double *y, int n, double *yhat, int m) {
std::vector<double> w(n, 1.0);
return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
w, f2);
}

double f2_opt(double *y, int n, double *yhat, int m, double *w, int l) {
return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
std::vector<double>(w, w + l), f2);
}

double acc_opt(double *y, int n, double *yhat, int m) {
std::vector<double> w(n, 1.0);
return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
w, acc);
}

double acc_opt(double *y, int n, double *yhat, int m, double *w, int l) {
return cm_metric_opt(std::vector<double>(y, y + n), std::vector<double>(yhat, yhat + m),
std::vector<double>(w, w + l), acc);
}

void confusion_matrices(double *y, int n, double *yhat, int m, double *cm, int k, int j) {
Expand Down
19 changes: 15 additions & 4 deletions src/include/metrics/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,31 @@


namespace h2o4gpu {

double mcc(double tp, double tn, double fp, double fn);
double f05(double tp, double tn, double fp, double fn);
double f1(double tp, double tn, double fp, double fn);
double f2(double tp, double tn, double fp, double fn);
double acc(double tp, double tn, double fp, double fn);

double mcc_opt(double *y, int n, double *yhat, int m);
double mcc_opt(double *y, int n, double *yhat, int m, double* w, int l);

double f05_opt(double *y, int n, double *yhat, int m);
double f05_opt(double *y, int n, double *yhat, int m, double* w, int l);

double f1_opt(double *y, int n, double *yhat, int m);
double f1_opt(double *y, int n, double *yhat, int m, double* w, int l);


double f2_opt(double *y, int n, double *yhat, int m);
double f2_opt(double *y, int n, double *yhat, int m, double* w, int l);

double acc_opt(double *y, int n, double *yhat, int m);
double acc_opt(double *y, int n, double *yhat, int m, double* w, int l);

void confusion_matrices(double *y, int n, double *yhat, int m, double *cm, int k, int j);
void confusion_matrices(double *y, int n, double *yhat, int m, double* w, int l, double *cm, int k, int j);

}

#endif

95 changes: 85 additions & 10 deletions src/interface_py/h2o4gpu/util/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,35 @@ def auc(actual, posterior):
return area_under_curve


def f05_opt(actual, predicted, sample_weight=None):
"""
Computes the F0.5-Score after optimal predictions thresholding.
This function maximizes the F0.5-Score by means of
optimal predictions thresholding.
:param actual : numpy array
The ground truth value
:param predicted : numpy array
The predicted value
:param sample_weight : numpy array or None
sample weights
:returns double
The optimal F0.5-Score
"""
import h2o4gpu.util.daicx as daicx
if sample_weight is None:
return daicx.f05_opt(actual.ravel(), predicted.ravel())
return daicx.f05_opt(actual.ravel(), predicted.ravel(),
sample_weight.ravel())


def f1_opt(actual, predicted, sample_weight=None):
"""
Computes the F1-score after optimal predictions thresholding.
Computes the F1-Score after optimal predictions thresholding.
This function maximizes the F1-score by means of
This function maximizes the F1-Score by means of
optimal predictions thresholding.
:param actual : numpy array
Expand All @@ -290,12 +314,37 @@ def f1_opt(actual, predicted, sample_weight=None):
sample weights
:returns double
The optimal F1-score
The optimal F1-Score
"""
import h2o4gpu.util.daicx as daicx
if sample_weight is None:
return daicx.f1_opt(actual, predicted)
return daicx.f1_opt(actual, predicted, sample_weight)
return daicx.f1_opt(actual.ravel(), predicted.ravel())
return daicx.f1_opt(actual.ravel(), predicted.ravel(),
sample_weight.ravel())


def f2_opt(actual, predicted, sample_weight=None):
"""
Computes the F2-Score after optimal predictions thresholding.
This function maximizes the F2-Score by means of
optimal predictions thresholding.
:param actual : numpy array
The ground truth value
:param predicted : numpy array
The predicted value
:param sample_weight : numpy array or None
sample weights
:returns double
The optimal F2-Score
"""
import h2o4gpu.util.daicx as daicx
if sample_weight is None:
return daicx.f2_opt(actual.ravel(), predicted.ravel())
return daicx.f2_opt(actual.ravel(), predicted.ravel(),
sample_weight.ravel())


def mcc_opt(actual, predicted, sample_weight=None):
Expand All @@ -317,13 +366,38 @@ def mcc_opt(actual, predicted, sample_weight=None):
"""
import h2o4gpu.util.daicx as daicx
if sample_weight is None:
return daicx.mcc_opt(actual, predicted)
return daicx.mcc_opt(actual, predicted, sample_weight)
return daicx.mcc_opt(actual.ravel(), predicted.ravel())
return daicx.mcc_opt(actual.ravel(), predicted.ravel(),
sample_weight.ravel())


def acc_opt(actual, predicted, sample_weight=None):
"""
Computes the Accuracy after optimal predictions thresholding.
This function maximizes the Accuracy
by means of optimal predictions thresholding.
:param actual : numpy array
The ground truth value
:param predicted : numpy array
The predicted value
:param sample_weight : numpy array or None
sample weights
:returns double
The optimal Accuracy
"""
import h2o4gpu.util.daicx as daicx
if sample_weight is None:
return daicx.acc_opt(actual.ravel(), predicted.ravel())
return daicx.acc_opt(actual.ravel(), predicted.ravel(),
sample_weight.ravel())


def confusion_matrices(actual, predicted, sample_weight=None):
"""
Computes confusion matrices for DAICX analysis.
Computes confusion matrices for ROC analysis.
This function cumputes confusion matrices
for all possible prediction thresholds.
Expand All @@ -343,7 +417,8 @@ def confusion_matrices(actual, predicted, sample_weight=None):

res = np.zeros((actual.shape[0], len(cm_stats_cols)))
if sample_weight is None:
daicx.confusion_matrices(actual, predicted, res)
daicx.confusion_matrices(actual.ravel(), predicted.ravel(), res)
else:
daicx.confusion_matrices(actual, predicted, sample_weight, res)
daicx.confusion_matrices(actual.ravel(), predicted.ravel(),
sample_weight.ravel(), res)
return pd.DataFrame(res[~np.all(res == 0, axis=1)], columns=cm_stats_cols)

0 comments on commit e35ac19

Please sign in to comment.