From 2db95302990c3a9033fab14cf0c2b2a552847ecd Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Thu, 22 Mar 2018 21:16:08 -0400 Subject: [PATCH 1/5] Bump version and modify cran-comments.md --- src/interface_r/DESCRIPTION | 4 +++- src/interface_r/NEWS.md | 2 +- src/interface_r/cran-comments.md | 11 ++--------- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/interface_r/DESCRIPTION b/src/interface_r/DESCRIPTION index 301139472..5e3580f9c 100644 --- a/src/interface_r/DESCRIPTION +++ b/src/interface_r/DESCRIPTION @@ -3,7 +3,9 @@ Type: Package Title: R Interface to 'H2O4GPU' Version: 0.2.0 Authors@R: c( - person("Yuan", "Tang", role = c("aut", "cre"), email = "terry@h2o.ai"), + person("Yuan", "Tang", role = c("aut", "cre"), + email = "terrytangyuan@gmail.com", + comment = c(ORCID = "0000-0001-5243-233X")), person("Navdeep", "Gill", role = c("aut"), email = "navdeep@h2o.ai"), person("Erin", "LeDell", role = c("aut"), email = "erin@h2o.ai"), person("H2O.ai", role = c("cph", "fnd"))) diff --git a/src/interface_r/NEWS.md b/src/interface_r/NEWS.md index 1c0724bc4..4ee0fde81 100644 --- a/src/interface_r/NEWS.md +++ b/src/interface_r/NEWS.md @@ -1,3 +1,3 @@ -## h2o4gpu 0.0.1 (CRAN) +## h2o4gpu 0.2.0 (CRAN) * Initial release. diff --git a/src/interface_r/cran-comments.md b/src/interface_r/cran-comments.md index a42509e81..c5ede2de3 100644 --- a/src/interface_r/cran-comments.md +++ b/src/interface_r/cran-comments.md @@ -7,17 +7,10 @@ 0 errors | 0 warnings | 1 note +New submission + * This is a new release. ## Reverse dependencies This is a new release, so there are no reverse dependencies. - ---- - -* I have run R CMD check on the NUMBER downstream dependencies. - (Summary at ...). - -* FAILURE SUMMARY - -* All revdep maintainers were notified of the release on RELEASE DATE. From 070052c5547a4baf2ded925c2a1a58c54f529510 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Thu, 22 Mar 2018 22:09:45 -0400 Subject: [PATCH 2/5] Use canonical form of CRAN url for Metrics package --- src/interface_r/vignettes/getting_started.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interface_r/vignettes/getting_started.Rmd b/src/interface_r/vignettes/getting_started.Rmd index 5bff545a0..a820adc02 100644 --- a/src/interface_r/vignettes/getting_started.Rmd +++ b/src/interface_r/vignettes/getting_started.Rmd @@ -74,7 +74,7 @@ ce(actual = y, predicted = pred) The tree based models (Random Forest and GBM) are built on top of the very powerful [XGBoost](https://xgboost.readthedocs.io/en/latest/) library, and the Elastic Net GLM has been built upon the POGS solver. [Proximal Graph Solver (POGS)](http://stanford.edu/%7Eboyd/papers/pogs.html) is a solver for convex optimization problems in graph form using Alternating Direction Method of Multipliers (ADMM). We have found that this method is not as fast as we'd like it to be, so we are working on implementing an entirely new GLM from scratch (follow progress [here](https://github.com/h2oai/h2o4gpu/issues/356)). -The **h2o4gpu** R package does not include a suite of internal model metrics functions, therefore we encourage users to use a third-party model metrics package of their choice. For all the examples below, we will use the [Metrics](https://cran.r-project.org/web/packages/Metrics/index.html) R package. This package has a large number of model metrics functions, all with a very simple, unified API. +The **h2o4gpu** R package does not include a suite of internal model metrics functions, therefore we encourage users to use a third-party model metrics package of their choice. For all the examples below, we will use the [Metrics](https://CRAN.R-project.org/package=Metrics) R package. This package has a large number of model metrics functions, all with a very simple, unified API. ### Binary Classification From 895db3f4d2135ae3c0de8c32a8ae0b77a7003952 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Thu, 22 Mar 2018 22:12:57 -0400 Subject: [PATCH 3/5] Fix vignette titles are not placeholders --- src/interface_r/vignettes/getting_started.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interface_r/vignettes/getting_started.Rmd b/src/interface_r/vignettes/getting_started.Rmd index a820adc02..f731c05ab 100644 --- a/src/interface_r/vignettes/getting_started.Rmd +++ b/src/interface_r/vignettes/getting_started.Rmd @@ -4,7 +4,7 @@ author: "Navdeep Gill, Erin LeDell, Yuan Tang" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Vignette Title} + %\VignetteIndexEntry{H2O4GPU: Machine Learning with GPUs in R} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- From 7fdd3515b4788afedb192aed39beb1169e0e285b Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Fri, 23 Mar 2018 10:16:08 -0400 Subject: [PATCH 4/5] Addressed CRAN feedback --- src/interface_r/DESCRIPTION | 6 +++--- src/interface_r/R/package.R | 17 +++++++++++++++++ src/interface_r/man/h2o4gpu.Rd | 17 +++++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/interface_r/DESCRIPTION b/src/interface_r/DESCRIPTION index 5e3580f9c..1d9401469 100644 --- a/src/interface_r/DESCRIPTION +++ b/src/interface_r/DESCRIPTION @@ -1,6 +1,6 @@ Package: h2o4gpu Type: Package -Title: R Interface to 'H2O4GPU' +Title: Interface to 'H2O4GPU' Version: 0.2.0 Authors@R: c( person("Yuan", "Tang", role = c("aut", "cre"), @@ -9,12 +9,12 @@ Authors@R: c( person("Navdeep", "Gill", role = c("aut"), email = "navdeep@h2o.ai"), person("Erin", "LeDell", role = c("aut"), email = "erin@h2o.ai"), person("H2O.ai", role = c("cph", "fnd"))) -Description: R Interface to 'H2O4GPU' - A collection of 'GPU' solvers for machine learning algorithms. +Description: Interface to 'H2O4GPU' , a collection of 'GPU' solvers for machine learning algorithms. License: Apache License 2.0 URL: https://github.com/h2oai/h2o4gpu BugReports: https://github.com/h2oai/h2o4gpu/issues SystemRequirements: Python (>= 3.6) with header files and shared library; - h2o4gpu (https://github.com/h2oai/h2o4gpu) + H2O4GPU (https://github.com/h2oai/h2o4gpu) Encoding: UTF-8 LazyData: true Depends: diff --git a/src/interface_r/R/package.R b/src/interface_r/R/package.R index 2e053cd27..cca2c0796 100644 --- a/src/interface_r/R/package.R +++ b/src/interface_r/R/package.R @@ -2,6 +2,23 @@ #' #' @docType package #' @name h2o4gpu +#' +#' @examples +#' \dontrun{ +#' +#' library(h2o4gpu) +#' +#' # Setup dataset +#' x <- iris[1:4] +#' y <- as.integer(iris$Species) - 1 +#' +#' # Initialize and train the classifier +#' model <- h2o4gpu.random_forest_classifier() %>% fit(x, y) +#' +#' # Make predictions +#' predictions <- model %>% predict(x) +#' +#' } NULL h2o4gpu <- NULL diff --git a/src/interface_r/man/h2o4gpu.Rd b/src/interface_r/man/h2o4gpu.Rd index 0ecd8370a..47150640a 100644 --- a/src/interface_r/man/h2o4gpu.Rd +++ b/src/interface_r/man/h2o4gpu.Rd @@ -8,3 +8,20 @@ \description{ h2o4gpu in R } +\examples{ +\dontrun{ + +library(h2o4gpu) + +# Setup dataset +x <- iris[1:4] +y <- as.integer(iris$Species) - 1 + +# Initialize and train the classifier +model <- h2o4gpu.random_forest_classifier() \%>\% fit(x, y) + +# Make predictions +predictions <- model \%>\% predict(x) + +} +} From 12e03133e3ce674f4e82080996ce498a7837409e Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Fri, 23 Mar 2018 11:24:39 -0400 Subject: [PATCH 5/5] More dontrun examples to address cran comments --- src/interface_r/R/model.R | 50 +++++++++++++++++++ src/interface_r/man/fit.h2o4gpu_model.Rd | 13 +++++ src/interface_r/man/predict.h2o4gpu_model.Rd | 18 +++++++ .../man/transform.h2o4gpu_model.Rd | 22 ++++++++ 4 files changed, 103 insertions(+) diff --git a/src/interface_r/R/model.R b/src/interface_r/R/model.R index 2116502ca..7173f4f40 100644 --- a/src/interface_r/R/model.R +++ b/src/interface_r/R/model.R @@ -73,6 +73,18 @@ print.h2o4gpu_model <- function(x, ...) { #' @param ... Additional arguments (unused for now). #' #' @export +#' @examples +#' \dontrun{ +#' +#' library(h2o4gpu) +#' +#' # Setup dataset +#' x <- iris[1:4] +#' y <- as.integer(iris$Species) - 1 +#' +#' # Train the classifier +#' h2o4gpu.random_forest_classifier() %>% fit(x, y) +#' } fit.h2o4gpu_model <- function(object, x, y = NULL, ...) { if (inherits(object$model, "h2o4gpu.solvers.elastic_net.ElasticNet") && object$params$family == "logistic"){ if (length(unique(y)) > 2){ @@ -94,6 +106,23 @@ fit.h2o4gpu_model <- function(object, x, y = NULL, ...) { #' @param type One of "raw" or "prob", indicating the type of output: predicted values or probabilities #' @param ... Additional arguments (unused for now). #' @export +#' @examples +#' \dontrun{ +#' +#' library(h2o4gpu) +#' +#' # Setup dataset +#' x <- iris[1:4] +#' y <- as.integer(iris$Species) - 1 +#' +#' # Initialize and train the classifier +#' model <- h2o4gpu.random_forest_classifier() %>% fit(x, y) +#' +#' # Make predictions +#' predictions <- model %>% predict(x) +#' +#' } +#' predict.h2o4gpu_model <- function(object, x, type="raw", ...) { if (type == "raw") { preds <- object$model$predict(X = resolve_model_input(x), ...) @@ -122,6 +151,27 @@ predict.h2o4gpu_model <- function(object, x, type="raw", ...) { #' be used in generating predictions. #' @param ... Additional arguments (unused for now). #' @export +#' @examples +#' \dontrun{ +#' +#' library(h2o4gpu) +#' +#' # Prepare data +#' iris$Species <- as.integer(iris$Species) # convert to numeric data +#' +#' # Randomly sample 80% of the rows for the training set +#' set.seed(1) +#' train_idx <- sample(1:nrow(iris), 0.8*nrow(iris)) +#' train <- iris[train_idx, ] +#' test <- iris[-train_idx, ] +#' +#' # Train a K-Means model +#' model_km <- h2o4gpu.kmeans(n_clusters = 3L) %>% fit(train) +#' +#' # Transform test data +#' test_dist <- model_km %>% transform(test) +#' +#' } transform.h2o4gpu_model <- function(object, x, ...) { object$model$transform(X = resolve_model_input(x), ...) } diff --git a/src/interface_r/man/fit.h2o4gpu_model.Rd b/src/interface_r/man/fit.h2o4gpu_model.Rd index 9d79ea098..ae7988441 100644 --- a/src/interface_r/man/fit.h2o4gpu_model.Rd +++ b/src/interface_r/man/fit.h2o4gpu_model.Rd @@ -22,3 +22,16 @@ specified as \code{NULL}.} \description{ This function builds the model using the training data specified. } +\examples{ +\dontrun{ + +library(h2o4gpu) + +# Setup dataset +x <- iris[1:4] +y <- as.integer(iris$Species) - 1 + +# Train the classifier +h2o4gpu.random_forest_classifier() \%>\% fit(x, y) +} +} diff --git a/src/interface_r/man/predict.h2o4gpu_model.Rd b/src/interface_r/man/predict.h2o4gpu_model.Rd index 5b76245bb..d5b3fa2ba 100644 --- a/src/interface_r/man/predict.h2o4gpu_model.Rd +++ b/src/interface_r/man/predict.h2o4gpu_model.Rd @@ -20,3 +20,21 @@ be used in generating predictions.} This function makes predictions from new data using a trained H2O4GPU model and returns class predictions for classification and predicted values for regression. } +\examples{ +\dontrun{ + +library(h2o4gpu) + +# Setup dataset +x <- iris[1:4] +y <- as.integer(iris$Species) - 1 + +# Initialize and train the classifier +model <- h2o4gpu.random_forest_classifier() \%>\% fit(x, y) + +# Make predictions +predictions <- model \%>\% predict(x) + +} + +} diff --git a/src/interface_r/man/transform.h2o4gpu_model.Rd b/src/interface_r/man/transform.h2o4gpu_model.Rd index 720df7809..8c60c1522 100644 --- a/src/interface_r/man/transform.h2o4gpu_model.Rd +++ b/src/interface_r/man/transform.h2o4gpu_model.Rd @@ -17,3 +17,25 @@ be used in generating predictions.} \description{ This function transforms the given new data using a trained H2O4GPU model. } +\examples{ +\dontrun{ + +library(h2o4gpu) + +# Prepare data +iris$Species <- as.integer(iris$Species) # convert to numeric data + +# Randomly sample 80\% of the rows for the training set +set.seed(1) +train_idx <- sample(1:nrow(iris), 0.8*nrow(iris)) +train <- iris[train_idx, ] +test <- iris[-train_idx, ] + +# Train a K-Means model +model_km <- h2o4gpu.kmeans(n_clusters = 3L) \%>\% fit(train) + +# Transform test data +test_dist <- model_km \%>\% transform(test) + +} +}