From 94265dba3aebd2271604c29d73acaa904bbb2de6 Mon Sep 17 00:00:00 2001 From: mdymczyk Date: Thu, 31 May 2018 20:32:49 +0900 Subject: [PATCH] Initial validation dataset handling --- src/base/ffm/ffm.cpp | 33 ++++++++++++++++++++----- src/base/ffm/ffm.h | 2 +- src/base/ffm/trainer.h | 15 ++++++++--- src/gpu/ffm/trainer.cu | 24 +++++++++++------- src/include/data/ffm/data.h | 8 ++++-- src/include/solver/ffm_api.h | 13 ++++++++-- src/interface_py/h2o4gpu/solvers/ffm.py | 22 +++++++++++++---- 7 files changed, 88 insertions(+), 29 deletions(-) diff --git a/src/base/ffm/ffm.cpp b/src/base/ffm/ffm.cpp index 3275d37c8..ebe6cd860 100644 --- a/src/base/ffm/ffm.cpp +++ b/src/base/ffm/ffm.cpp @@ -16,14 +16,15 @@ template FFM::FFM(Params & params, T *weights) : params(params), trainer(weights, params) {} template -void FFM::fit(const Dataset &dataset) { +void FFM::fit(const Dataset &dataset, const Dataset &valid_dataset) { this->trainer.setDataset(dataset); + this->trainer.setValidationDataset(valid_dataset); Timer timer; for (int epoch = 1; epoch <= this->params.nIter; epoch++) { timer.tic(); - trainer.oneEpoch(true); + trainer.oneEpoch(); timer.toc(); log_debug(params.verbose, "Epoch took %f.", timer.pop()); if (trainer.earlyStop()) { @@ -71,8 +72,11 @@ T maxElement(const T *data, int size) { /** * C API method */ -void ffm_fit_float(int* features, int* fields, float* values, int *labels, int *rowPositions, float *w, Params &_param) { - log_debug(_param.verbose, "Converting %d float rows into a dataset.", _param.numRows); + +void ffm_fit_float(int *features, int* fields, float* values, int *labels, int *rowPositions, + int *features_v, int* fields_v, float* values_v, int *labels_v, int *rowPositions_v, + float *w, Params &_param) { + log_debug(_param.verbose, "Converting %d float rows into a training dataset.", _param.numRows); float *scales = (float*) malloc(sizeof(float) * _param.numRows); computeScales(scales, values, rowPositions, _param); @@ -80,18 +84,35 @@ void ffm_fit_float(int* features, int* fields, float* values, int *labels, int * _param.numFeatures = maxElement(features, _param.numNodes) + 1; Dataset dataset(_param.numFields, _param.numFeatures, _param.numRows, _param.numNodes, features, fields, values, labels, scales, rowPositions); + + Dataset *validationDataset = Dataset(); + if(features_v) { + log_debug(_param.verbose, "Converting %d float rows into a validation dataset.", _param.numRowsVal); + + float *scales_v = (float*) malloc(sizeof(float) * _param.numRowsVal); + computeScales(scales_v, values_v, rowPositions_v, _param); + + validationDataset = new Dataset(_param.numFields, _param.numFeatures, _param.numRowsVal, _param.numNodesVal, features_v, fields_v, values_v, labels_v, scales_v, rowPositions_v); + } + FFM ffm(_param); _param.printParams(); log_debug(_param.verbose, "Running FFM fit for float."); Timer timer; timer.tic(); - ffm.fit(dataset); + ffm.fit(dataset, *validationDataset); ffm.trainer.model->copyTo(w); timer.toc(); log_debug(_param.verbose, "Float fit took %f.", timer.pop()); + + if(validationDataset) { + delete validationDataset; + } } -void ffm_fit_double(int* features, int* fields, double* values, int *labels, int *rowPositions, double *w, Params &_param) { +void ffm_fit_double(int *features, int* fields, double* values, int *labels, int *rowPositions, + int *features_v, int* fields_v, double* values_v, int *labels_v, int *rowPositions_v, + double *w, Params &_param) { log_debug(_param.verbose, "Converting %d double rows into a dataset.", _param.numRows); double *scales = (double*) malloc(sizeof(double) * _param.numRows); computeScales(scales, values, rowPositions, _param); diff --git a/src/base/ffm/ffm.h b/src/base/ffm/ffm.h index a09f5451e..0436235eb 100644 --- a/src/base/ffm/ffm.h +++ b/src/base/ffm/ffm.h @@ -18,7 +18,7 @@ class FFM { FFM(Params & params); FFM(Params & params, T *weights); - void fit(const Dataset &dataset); + void fit(const Dataset &dataset, const Dataset &valid_dataset); void predict(const Dataset &dataset, T *predictions); diff --git a/src/base/ffm/trainer.h b/src/base/ffm/trainer.h index 145e2d8a9..5d646f928 100644 --- a/src/base/ffm/trainer.h +++ b/src/base/ffm/trainer.h @@ -15,12 +15,16 @@ template class Trainer { public: Trainer(Params ¶ms); - Trainer(const T* weights, Params ¶ms); + Trainer(const T *weights, Params ¶ms); ~Trainer(); void setDataset(const Dataset &dataset); - T oneEpoch(bool update); + T validationLoss(); + + T oneEpoch(); + + T oneEpoch(std::vector *> dataBatcher, bool update); void predict(T *predictions); @@ -32,8 +36,11 @@ class Trainer { private: Params ¶ms; - // Vector of datasets split for threads/GPUs - std::vector*> trainDataBatcher; + // Vector of train datasets splits for threads/GPUs + std::vector *> trainDataBatcher; + + // Vector of validation datasets split for threads/GPUs + std::vector *> validationDataBatcher; }; diff --git a/src/gpu/ffm/trainer.cu b/src/gpu/ffm/trainer.cu index 514b2ebbf..21b3e08ab 100644 --- a/src/gpu/ffm/trainer.cu +++ b/src/gpu/ffm/trainer.cu @@ -37,7 +37,7 @@ __device__ double atomicAdd(double* address, double val) namespace ffm { template -Trainer::Trainer(Params ¶ms) : params(params), trainDataBatcher(params.nGpus) { +Trainer::Trainer(Params ¶ms) : params(params), trainDataBatcher(params.nGpus), validationDataBatcher(params.nGpus) { CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceMapHost)); CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); @@ -53,7 +53,7 @@ Trainer::Trainer(Params ¶ms) : params(params), trainDataBatcher(params.nG } template -Trainer::Trainer(const T* weights, Params ¶ms) : params(params), trainDataBatcher(params.nGpus) { +Trainer::Trainer(const T* weights, Params ¶ms) : params(params), trainDataBatcher(params.nGpus), validationDataBatcher(params.nGpus) { CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceMapHost)); CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); @@ -225,8 +225,18 @@ void Trainer::predict(T *predictions) { } } +template +T Trainer::validationLoss() { + return this->oneEpoch(this->validationDataBatcher, false); +} + template T Trainer::oneEpoch(bool update) { + return this->oneEpoch(this->trainDataBatcher, true); +} + +template +T Trainer::oneEpoch(std::vector*> dataBatcher, bool update) { Timer timer; log_debug(this->params.verbose, "Computing an FFM epoch (update = %s)", update ? "true" : "false"); @@ -236,13 +246,9 @@ T Trainer::oneEpoch(bool update) { int initialBatchOffset = 0; CUDA_CHECK(cudaMemcpyToSymbol(cBatchOffset, &initialBatchOffset, sizeof(int))); - while (trainDataBatcher[i]->hasNext()) { - /** - * Get batch - */ - + while (dataBatcher[i]->hasNext()) { timer.tic(); - DatasetBatch *batch = trainDataBatcher[i]->nextBatch(this->params.batchSize); + DatasetBatch *batch = dataBatcher[i]->nextBatch(this->params.batchSize); timer.toc(); log_verbose(params.verbose, "Getting batch took %f.", timer.pop()); @@ -314,7 +320,7 @@ T Trainer::oneEpoch(bool update) { cudaFree(losses); } - trainDataBatcher[i]->reset(); + dataBatcher[i]->reset(); } if (params.nGpus != 1) { diff --git a/src/include/data/ffm/data.h b/src/include/data/ffm/data.h index f91e1feef..b91aec2aa 100644 --- a/src/include/data/ffm/data.h +++ b/src/include/data/ffm/data.h @@ -48,9 +48,9 @@ class Dataset { rowPositions(rowPositions){} // Number of rows in the dataset - int numRows; + int numRows = 0; // Total number of nodes in all the rows - int numNodes; + int numNodes = 0; // Total number of fields int numFields = 0; // Total number of features @@ -75,6 +75,10 @@ class Dataset { int requiredBytes() const { return numNodes * 2 * sizeof(int) + numRows * sizeof(T) + numRows * sizeof(int) + numRows * sizeof(T); } + + bool empty() { + return numRows == 0; + } }; } \ No newline at end of file diff --git a/src/include/solver/ffm_api.h b/src/include/solver/ffm_api.h index b8785080d..cb19d66d4 100644 --- a/src/include/solver/ffm_api.h +++ b/src/include/solver/ffm_api.h @@ -19,6 +19,7 @@ typedef struct Params { int nIter = 10; int batchSize = -1; + // Training dataset params int numRows = 0; int numNodes = 0; int numFeatures = 0; @@ -34,14 +35,22 @@ typedef struct Params { // For GPU number of GPUs to be used int nGpus = 1; + // Validation dataset params + int numRowsVal = 0; + int numNodesVal = 0; + void printParams() { log_verbose(verbose, "learningRate = %f \n regLambda = %f \n nIter = %d \n batchSize = %d \n numRows = %d \n numFeatures = %d \n numFields = %d \n k = %d", learningRate, regLambda, nIter, batchSize, numRows, numFeatures, numFields, k); } } Params; -void ffm_fit_float(int *features, int* fields, float* values, int *labels, int *positions, float *w, Params &_param); -void ffm_fit_double(int *features, int* fields, double* values, int *labels, int *positions, double *w, Params &_param); +void ffm_fit_float(int *features, int* fields, float* values, int *labels, int *positions, + int *features_v, int* fields_v, float* values_v, int *labels_v, int *positions_v, + float *w, Params &_param); +void ffm_fit_double(int *features, int* fields, double* values, int *labels, int *positions, + int *features_v, int* fields_v, double* values_v, int *labels_v, int *positions_v, + double *w, Params &_param); void ffm_predict_float(int *features, int* fields, float* values, int* positions, float *predictions, float *w, Params &_param); void ffm_predict_double(int *features, int* fields, double* values, int* positions, double *predictions, double *w, Params &_param); diff --git a/src/interface_py/h2o4gpu/solvers/ffm.py b/src/interface_py/h2o4gpu/solvers/ffm.py index 8675b538e..cecdeeabc 100644 --- a/src/interface_py/h2o4gpu/solvers/ffm.py +++ b/src/interface_py/h2o4gpu/solvers/ffm.py @@ -97,7 +97,7 @@ def set_params(self, **params): # TODO implement pass - def fit(self, X, y): + def fit(self, X, y, X_validate=None, y_validate=None): lib = self._load_lib() params = lib.params_ffm() @@ -117,19 +117,31 @@ def fit(self, X, y): fields, features, values, positions = self._numpy_to_ffm_rows(params, X) - self.weights = np.zeros(params.k * (np.max(features) + 1) * (np.max(fields) + 1), dtype=self.dtype) - y_np = self._sanatize_labels(y) + fields_validation, features_validation, values_validation, positions_validation = None, None, None, None + if X_validate is not None and y_validate is not None: + fields_validation, features_validation, values_validation, positions_validation = self._numpy_to_ffm_rows(params, X_validate) + + y_validation_np = self._sanatize_labels(y_validate) + + self.weights = np.zeros(params.k * (np.max(features) + 1) * (np.max(fields) + 1), dtype=self.dtype) + if self.dtype == np.float32: - lib.ffm_fit_float(features, fields, values, y_np, positions, self.weights, params) + lib.ffm_fit_float(features, fields, values, y_np, positions, + features_validation, fields_validation, values_validation, y_validation_np, positions_validation, + self.weights, params) else: - lib.ffm_fit_double(features, fields, values, y_np, positions, self.weights, params) + lib.ffm_fit_double(features, fields, values, y_np, positions, + features_validation, fields_validation, values_validation, y_validation_np, positions_validation, + self.weights, params) self.learned_params = params return self def _sanatize_labels(self, y): + if y is None: + return None return np.array(list(map(lambda e: 1 if e > 0 else -1, y)), dtype=np.int32) def _numpy_to_ffm_rows(self, params, X):