From 3e7e426b36eb307dff64a2f81f8a5a225377891d Mon Sep 17 00:00:00 2001
From: Andrew Ziem <ahz001@gmail.com>
Date: Tue, 11 May 2021 06:44:36 -0600
Subject: [PATCH] Fix spelling in documents (#6948)

* Update roxygen2 doc.

Co-authored-by: fis <jm.yuan@outlook.com>
---
 CONTRIBUTORS.md                               |  6 ++--
 NEWS.md                                       | 16 +++++-----
 R-package/R/callbacks.R                       |  2 +-
 R-package/R/utils.R                           |  6 ++--
 R-package/R/xgb.DMatrix.R                     | 10 +++----
 R-package/R/xgb.plot.shap.R                   |  4 +--
 R-package/R/xgb.train.R                       | 10 +++----
 R-package/demo/00Index                        |  6 ++--
 R-package/demo/README.md                      |  2 +-
 R-package/demo/basic_walkthrough.R            |  2 +-
 R-package/demo/create_sparse_matrix.R         | 10 +++----
 R-package/demo/cross_validation.R             |  4 +--
 R-package/demo/custom_objective.R             |  6 ++--
 R-package/demo/early_stopping.R               |  6 ++--
 R-package/man/getinfo.Rd                      |  4 +--
 R-package/man/setinfo.Rd                      |  4 +--
 R-package/man/xgb.DMatrix.Rd                  |  2 +-
 R-package/man/xgb.plot.shap.Rd                |  2 +-
 R-package/man/xgb.train.Rd                    | 10 +++----
 R-package/src/Makevars.win                    |  2 +-
 R-package/tests/testthat/test_basic.R         |  4 +--
 R-package/tests/testthat/test_monotone.R      |  2 +-
 .../tests/testthat/test_poisson_regression.R  |  4 +--
 R-package/vignettes/discoverYourData.Rmd      | 28 ++++++++---------
 R-package/vignettes/xgboostPresentation.Rmd   | 20 ++++++-------
 R-package/vignettes/xgboostfromJSON.Rmd       |  6 ++--
 demo/CLI/binary_classification/README.md      |  4 +--
 demo/README.md                                |  2 +-
 demo/c-api/README.md                          |  2 +-
 doc/R-package/discoverYourData.md             | 24 +++++++--------
 doc/R-package/xgboostPresentation.md          | 10 +++----
 doc/contrib/coding_guide.rst                  |  2 +-
 doc/contrib/docs.rst                          |  4 +--
 doc/contrib/donate.rst                        |  4 +--
 doc/contrib/release.rst                       | 16 +++++-----
 doc/gpu/index.rst                             |  6 ++--
 doc/jvm/xgboost4j_spark_tutorial.rst          |  4 +--
 doc/parameter.rst                             |  8 ++---
 doc/python/callbacks.rst                      |  2 +-
 doc/python/python_api.rst                     |  2 +-
 doc/python/python_intro.rst                   |  6 ++--
 doc/treemethod.rst                            |  6 ++--
 doc/tutorials/aft_survival_analysis.rst       |  2 +-
 doc/tutorials/c_api_tutorial.rst              |  6 ++--
 doc/tutorials/custom_metric_obj.rst           |  2 +-
 doc/tutorials/dask.rst                        |  4 +--
 doc/tutorials/external_memory.rst             |  4 +--
 doc/tutorials/input_format.rst                | 10 +++----
 doc/tutorials/rf.rst                          |  4 +--
 doc/tutorials/saving_model.rst                |  6 ++--
 include/xgboost/c_api.h                       | 18 +++++------
 include/xgboost/intrusive_ptr.h               |  2 +-
 include/xgboost/json.h                        |  2 +-
 include/xgboost/learner.h                     |  4 +--
 include/xgboost/linalg.h                      |  4 +--
 include/xgboost/model.h                       |  8 ++---
 include/xgboost/span.h                        | 14 ++++-----
 include/xgboost/tree_model.h                  |  2 +-
 jvm-packages/README.md                        |  2 +-
 rabit/doc/parameters.md                       |  2 +-
 rabit/include/rabit/c_api.h                   |  8 ++---
 rabit/include/rabit/internal/rabit-inl.h      |  2 +-
 rabit/include/rabit/internal/socket.h         |  2 +-
 rabit/src/allreduce_base.cc                   | 30 +++++++++----------
 rabit/src/allreduce_base.h                    | 30 +++++++++----------
 rabit/src/engine.cc                           |  2 +-
 rabit/src/engine_mpi.cc                       |  2 +-
 src/c_api/c_api.cc                            |  4 +--
 src/c_api/c_api_error.h                       |  2 +-
 src/common/charconv.cc                        |  4 +--
 src/common/common.h                           |  2 +-
 src/common/device_helpers.cuh                 |  4 +--
 src/common/hist_util.h                        |  4 +--
 src/common/json.cc                            |  2 +-
 src/common/math.h                             |  6 ++--
 src/common/quantile.cc                        |  2 +-
 src/common/quantile.cu                        |  6 ++--
 src/common/survival_util.h                    |  2 +-
 src/common/transform.h                        |  4 +--
 src/common/version.cc                         |  2 +-
 src/common/version.h                          |  2 +-
 src/data/adapter.h                            |  4 +--
 src/data/ellpack_page.cu                      |  2 +-
 src/data/ellpack_page.cuh                     |  8 ++---
 src/gbm/gbtree.cc                             |  4 +--
 src/metric/rank_metric.cc                     |  6 ++--
 src/objective/regression_obj.cu               |  2 +-
 src/tree/constraints.h                        |  2 +-
 src/tree/gpu_hist/evaluate_splits.cu          |  4 +--
 src/tree/gpu_hist/gradient_based_sampler.cuh  |  2 +-
 src/tree/gpu_hist/histogram.cu                |  2 +-
 src/tree/param.h                              |  6 ++--
 src/tree/tree_model.cc                        |  6 ++--
 src/tree/updater_basemaker-inl.h              |  4 +--
 src/tree/updater_gpu_hist.cu                  |  2 +-
 src/tree/updater_histmaker.cc                 |  4 +--
 src/tree/updater_quantile_hist.h              |  2 +-
 src/tree/updater_refresh.cc                   |  2 +-
 tests/README.md                               |  6 ++--
 tests/benchmark/generate_libsvm.py            |  2 +-
 100 files changed, 284 insertions(+), 284 deletions(-)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index afc1cd608ac4..666435118d00 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -43,7 +43,7 @@ Committers are people who have made substantial contribution to the project and
 
 Become a Committer
 ------------------
-XGBoost is a opensource project and we are actively looking for new committers who are willing to help maintaining and lead the project.
+XGBoost is a open source project and we are actively looking for new committers who are willing to help maintaining and lead the project.
 Committers comes from contributors who:
 * Made substantial contribution to the project.
 * Willing to spent time on maintaining and lead the project.
@@ -59,7 +59,7 @@ List of Contributors
 * [Skipper Seabold](https://github.com/jseabold)
   - Skipper is the major contributor to the scikit-learn module of XGBoost.
 * [Zygmunt Zając](https://github.com/zygmuntz)
-  - Zygmunt is the master behind the early stopping feature frequently used by kagglers.
+  - Zygmunt is the master behind the early stopping feature frequently used by Kagglers.
 * [Ajinkya Kale](https://github.com/ajkl)
 * [Boliang Chen](https://github.com/cblsjtu)
 * [Yangqing Men](https://github.com/yanqingmen)
@@ -91,7 +91,7 @@ List of Contributors
 * [Henry Gouk](https://github.com/henrygouk)
 * [Pierre de Sahb](https://github.com/pdesahb)
 * [liuliang01](https://github.com/liuliang01)
-  - liuliang01 added support for the qid column for LibSVM input format. This makes ranking task easier in distributed setting.
+  - liuliang01 added support for the qid column for LIBSVM input format. This makes ranking task easier in distributed setting.
 * [Andrew Thia](https://github.com/BlueTea88)
   - Andrew Thia implemented feature interaction constraints
 * [Wei Tian](https://github.com/weitian)
diff --git a/NEWS.md b/NEWS.md
index cb80b0e311d3..30123442a2b4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1105,7 +1105,7 @@ This release marks a major milestone for the XGBoost project.
 * Specify version macro in CMake. (#4730)
 * Include dmlc-tracker into XGBoost Python package (#4731)
 * [CI] Use long key ID for Ubuntu repository fingerprints. (#4783)
-* Remove plugin, cuda related code in automake & autoconf files (#4789)
+* Remove plugin, CUDA related code in automake & autoconf files (#4789)
 * Skip related tests when scikit-learn is not installed. (#4791)
 * Ignore vscode and clion files (#4866)
 * Use bundled Google Test by default (#4900)
@@ -1136,7 +1136,7 @@ This release marks a major milestone for the XGBoost project.
 ### Usability Improvements, Documentation
 * Add Random Forest API to Python API doc (#4500)
 * Fix Python demo and doc. (#4545)
-* Remove doc about not supporting cuda 10.1 (#4578)
+* Remove doc about not supporting CUDA 10.1 (#4578)
 * Address some sphinx warnings and errors, add doc for building doc. (#4589)
 * Add instruction to run formatting checks locally (#4591)
 * Fix docstring for `XGBModel.predict()` (#4592)
@@ -1151,7 +1151,7 @@ This release marks a major milestone for the XGBoost project.
 * Update XGBoost4J-Spark doc (#4804)
 * Regular formatting for evaluation metrics (#4803)
 * [jvm-packages] Refine documentation for handling missing values in XGBoost4J-Spark (#4805)
-* Monitor for distributed envorinment (#4829). This is useful for identifying performance bottleneck.
+* Monitor for distributed environment (#4829). This is useful for identifying performance bottleneck.
 * Add check for length of weights and produce a good error message (#4872)
 * Fix DMatrix doc (#4884)
 * Export C++ headers in CMake installation (#4897)
@@ -1623,7 +1623,7 @@ This release is packed with many new features and bug fixes.
 ### Known issues
 * Quantile sketcher fails to produce any quantile for some edge cases (#2943)
 * The `hist` algorithm leaks memory when used with learning rate decay callback (#3579)
-* Using custom evaluation funciton together with early stopping causes assertion failure in XGBoost4J-Spark (#3595)
+* Using custom evaluation function together with early stopping causes assertion failure in XGBoost4J-Spark (#3595)
 * Early stopping doesn't work with `gblinear` learner (#3789)
 * Label and weight vectors are not reshared upon the change in number of GPUs (#3794). To get around this issue, delete the `DMatrix` object and re-load.
 * The `DMatrix` Python objects are initialized with incorrect values when given array slices (#3841)
@@ -1717,7 +1717,7 @@ This version is only applicable for the Python package. The content is identical
   - Add scripts to cross-build and deploy artifacts (#3276, #3307)
   - Fix a compilation error for Scala 2.10 (#3332)
 * BREAKING CHANGES
-  - `XGBClassifier.predict_proba()` no longer accepts paramter `output_margin`. The paramater makes no sense for `predict_proba()` because the method is to predict class probabilities, not raw margin scores.
+  - `XGBClassifier.predict_proba()` no longer accepts parameter `output_margin`. The parameter makes no sense for `predict_proba()` because the method is to predict class probabilities, not raw margin scores.
 
 ## v0.71 (2018.04.11)
 * This is a minor release, mainly motivated by issues concerning `pip install`, e.g. #2426, #3189, #3118, and #3194.
@@ -1733,7 +1733,7 @@ This version is only applicable for the Python package. The content is identical
   - AUC-PR metric for ranking task (#3172)
   - Monotonic constraints for 'hist' algorithm (#3085)
 * GPU support
-  - Create an abtract 1D vector class that moves data seamlessly between the main and GPU memory (#2935, #3116, #3068). This eliminates unnecessary PCIe data transfer during training time.
+    - Create an abstract 1D vector class that moves data seamlessly between the main and GPU memory (#2935, #3116, #3068). This eliminates unnecessary PCIe data transfer during training time.
   - Fix minor bugs (#3051, #3217)
   - Fix compatibility error for CUDA 9.1 (#3218)
 * Python package:
@@ -1761,7 +1761,7 @@ This version is only applicable for the Python package. The content is identical
 * Refactored gbm to allow more friendly cache strategy
   - Specialized some prediction routine
 * Robust `DMatrix` construction from a sparse matrix
-* Faster consturction of `DMatrix` from 2D NumPy matrices: elide copies, use of multiple threads
+* Faster construction of `DMatrix` from 2D NumPy matrices: elide copies, use of multiple threads
 * Automatically remove nan from input data when it is sparse.
   - This can solve some of user reported problem of istart != hist.size
 * Fix the single-instance prediction function to obtain correct predictions
@@ -1789,7 +1789,7 @@ This version is only applicable for the Python package. The content is identical
   - Faster, histogram-based tree algorithm (`tree_method='hist'`) .
   - GPU/CUDA accelerated tree algorithms (`tree_method='gpu_hist'` or `'gpu_exact'`), including the GPU-based predictor.
   - Monotonic constraints: when other features are fixed, force the prediction to be monotonic increasing with respect to a certain specified feature.
-  - Faster gradient caculation using AVX SIMD
+  - Faster gradient calculation using AVX SIMD
   - Ability to export models in JSON format
   - Support for Tweedie regression
   - Additional dropout options for DART: binomial+1, epsilon
diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
index 7d161111d988..c3ea2acaf7fd 100644
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -188,7 +188,7 @@ cb.reset.parameters <- function(new_params) {
   pnames <- gsub("\\.", "_", names(new_params))
   nrounds <- NULL
 
-  # run some checks in the begining
+  # run some checks in the beginning
   init <- function(env) {
     nrounds <<- env$end_iteration - env$begin_iteration + 1
 
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index c468c331bdc3..1b0f9eefe2a3 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -1,6 +1,6 @@
 #
-# This file is for the low level reuseable utility functions
-# that are not supposed to be visibe to a user.
+# This file is for the low level reusable utility functions
+# that are not supposed to be visible to a user.
 #
 
 #
@@ -284,7 +284,7 @@ xgb.createFolds <- function(y, k = 10)
     for (i in seq_along(numInClass)) {
       ## create a vector of integers from 1:k as many times as possible without
       ## going over the number of samples in the class. Note that if the number
-      ## of samples in a class is less than k, nothing is producd here.
+      ## of samples in a class is less than k, nothing is produced here.
       seqVector <- rep(seq_len(k), numInClass[i] %/% k)
       ## add enough random integers to get  length(seqVector) == numInClass[i]
       if (numInClass[i] %% k > 0) seqVector <- c(seqVector, sample.int(k, numInClass[i] %% k))
diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R
index 65ad737252b1..2460e23a43fc 100644
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -1,7 +1,7 @@
 #' Construct xgb.DMatrix object
 #'
 #' Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file.
-#' Supported input file formats are either a libsvm text file or a binary file that was created previously by
+#' Supported input file formats are either a LIBSVM text file or a binary file that was created previously by
 #' \code{\link{xgb.DMatrix.save}}).
 #'
 #' @param data a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
@@ -161,9 +161,9 @@ dimnames.xgb.DMatrix <- function(x) {
 #' The \code{name} field can be one of the following:
 #'
 #' \itemize{
-#'     \item \code{label}: label Xgboost learn from ;
+#'     \item \code{label}: label XGBoost learn from ;
 #'     \item \code{weight}: to do a weight rescale ;
-#'     \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
+#'     \item \code{base_margin}: base margin is the base prediction XGBoost will boost from ;
 #'     \item \code{nrow}: number of rows of the \code{xgb.DMatrix}.
 #'
 #' }
@@ -216,9 +216,9 @@ getinfo.xgb.DMatrix <- function(object, name, ...) {
 #' The \code{name} field can be one of the following:
 #'
 #' \itemize{
-#'     \item \code{label}: label Xgboost learn from ;
+#'     \item \code{label}: label XGBoost learn from ;
 #'     \item \code{weight}: to do a weight rescale ;
-#'     \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
+#'     \item \code{base_margin}: base margin is the base prediction XGBoost will boost from ;
 #'     \item \code{group}: number of rows in each group (to use with \code{rank:pairwise} objective).
 #' }
 #'
diff --git a/R-package/R/xgb.plot.shap.R b/R-package/R/xgb.plot.shap.R
index 81a5b04c126d..8f8e921a86b0 100644
--- a/R-package/R/xgb.plot.shap.R
+++ b/R-package/R/xgb.plot.shap.R
@@ -33,7 +33,7 @@
 #' @param col_loess a color to use for the loess curves.
 #' @param span_loess the \code{span} parameter in \code{\link[stats]{loess}}'s call.
 #' @param which whether to do univariate or bivariate plotting. NOTE: only 1D is implemented so far.
-#' @param plot whether a plot should be drawn. If FALSE, only a lits of matrices is returned.
+#' @param plot whether a plot should be drawn. If FALSE, only a list of matrices is returned.
 #' @param ... other parameters passed to \code{plot}.
 #'
 #' @details
@@ -157,7 +157,7 @@ xgb.plot.shap <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
       plot(x2plot, y, pch = pch, xlab = f, col = col, xlim = x_lim, ylim = y_lim, ylab = ylab, ...)
       grid()
       if (plot_loess) {
-        # compress x to 3 digits, and mean-aggredate y
+        # compress x to 3 digits, and mean-aggregate y
         zz <- data.table(x = signif(x, 3), y)[, .(.N, y = mean(y)), x]
         if (nrow(zz) <= 5) {
           lines(zz$x, zz$y, col = col_loess)
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 2734656e7527..981412768436 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -26,7 +26,7 @@
 #'   \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
 #'   \item \code{lambda} L2 regularization term on weights. Default: 1
 #'   \item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
-#'   \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample  < 1}  and \code{round = 1}) accordingly. Default: 1
+#'   \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through XGBoost (set \code{colsample_bytree < 1}, \code{subsample  < 1}  and \code{round = 1}) accordingly. Default: 1
 #'   \item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
 #'   \item \code{interaction_constraints} A list of vectors specifying feature indices of permitted interactions. Each item of the list represents one permitted interaction where specified features are allowed to interact with each other. Feature index values should start from \code{0} (\code{0} references the first column).  Leave argument unspecified for no interaction constraints.
 #' }
@@ -51,10 +51,10 @@
 #'     \item \code{binary:logistic} logistic regression for binary classification. Output probability.
 #'     \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
 #'     \item \code{binary:hinge}: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
-#'     \item \code{count:poisson}: poisson regression for count data, output mean of poisson distribution. \code{max_delta_step} is set to 0.7 by default in poisson regression (used to safeguard optimization).
+#'     \item \code{count:poisson}: Poisson regression for count data, output mean of Poisson distribution. \code{max_delta_step} is set to 0.7 by default in poisson regression (used to safeguard optimization).
 #'     \item \code{survival:cox}: Cox regression for right censored survival time data (negative values are considered right censored). Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function \code{h(t) = h0(t) * HR)}.
 #'     \item \code{survival:aft}: Accelerated failure time model for censored survival time data. See \href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details.
-#'     \item \code{aft_loss_distribution}: Probabilty Density Function used by \code{survival:aft} and \code{aft-nloglik} metric.
+#'     \item \code{aft_loss_distribution}: Probability Density Function used by \code{survival:aft} and \code{aft-nloglik} metric.
 #'     \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class - 1}.
 #'     \item \code{multi:softprob} same as softmax, but prediction outputs a vector of ndata * nclass elements, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
 #'     \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
@@ -126,11 +126,11 @@
 #' Parallelization is automatically enabled if \code{OpenMP} is present.
 #' Number of threads can also be manually specified via \code{nthread} parameter.
 #'
-#' The evaluation metric is chosen automatically by Xgboost (according to the objective)
+#' The evaluation metric is chosen automatically by XGBoost (according to the objective)
 #' when the \code{eval_metric} parameter is not provided.
 #' User may set one or several \code{eval_metric} parameters.
 #' Note that when using a customized metric, only this single metric can be used.
-#' The following is the list of built-in metrics for which Xgboost provides optimized implementation:
+#' The following is the list of built-in metrics for which XGBoost provides optimized implementation:
 #'   \itemize{
 #'      \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
 #'      \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
diff --git a/R-package/demo/00Index b/R-package/demo/00Index
index 5c949d0a7922..13ffdc6b64fa 100644
--- a/R-package/demo/00Index
+++ b/R-package/demo/00Index
@@ -1,6 +1,6 @@
 basic_walkthrough               Basic feature walkthrough
 caret_wrapper                   Use xgboost to train in caret library
-custom_objective                Cutomize loss function, and evaluation metric
+custom_objective                Customize loss function, and evaluation metric
 boost_from_prediction           Boosting from existing prediction
 predict_first_ntree             Predicting using first n trees
 generalized_linear_model        Generalized Linear Model
@@ -8,8 +8,8 @@ cross_validation                Cross validation
 create_sparse_matrix            Create Sparse Matrix
 predict_leaf_indices            Predicting the corresponding leaves
 early_stopping                  Early Stop in training
-poisson_regression              Poisson Regression on count data
-tweedie_regression              Tweddie Regression
+poisson_regression              Poisson regression on count data
+tweedie_regression              Tweedie regression
 gpu_accelerated                 GPU-accelerated tree building algorithms
 interaction_constraints         Interaction constraints among features
 
diff --git a/R-package/demo/README.md b/R-package/demo/README.md
index e9a840cb7997..0a07a7426741 100644
--- a/R-package/demo/README.md
+++ b/R-package/demo/README.md
@@ -2,7 +2,7 @@ XGBoost R Feature Walkthrough
 ====
 * [Basic walkthrough of wrappers](basic_walkthrough.R)
 * [Train a xgboost model from caret library](caret_wrapper.R)
-* [Cutomize loss function, and evaluation metric](custom_objective.R)
+* [Customize loss function, and evaluation metric](custom_objective.R)
 * [Boosting from existing prediction](boost_from_prediction.R)
 * [Predicting using first n trees](predict_first_ntree.R)
 * [Generalized Linear Model](generalized_linear_model.R)
diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R
index 445a19aeeefd..6c7f79a03f26 100644
--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
@@ -40,7 +40,7 @@ print("Train xgboost with verbose 2, also print information about tree")
 bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
                nthread = 2, objective = "binary:logistic", verbose = 2)
 
-# you can also specify data as file path to a LibSVM format input
+# you can also specify data as file path to a LIBSVM format input
 # since we do not have this file with us, the following line is just for illustration
 # bst <- xgboost(data = 'agaricus.train.svm', max_depth = 2, eta = 1, nrounds = 2,objective = "binary:logistic")
 
diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R
index 8de81afc990d..b935a8066cd8 100644
--- a/R-package/demo/create_sparse_matrix.R
+++ b/R-package/demo/create_sparse_matrix.R
@@ -2,17 +2,17 @@ require(xgboost)
 require(Matrix)
 require(data.table)
 if (!require(vcd)) {
-  install.packages('vcd') #Available in Cran. Used for its dataset with categorical values.
+  install.packages('vcd') #Available in CRAN. Used for its dataset with categorical values.
   require(vcd)
 }
-# According to its documentation, Xgboost works only on numbers.
+# According to its documentation, XGBoost works only on numbers.
 # Sometimes the dataset we have to work on have categorical data.
 # A categorical variable is one which have a fixed number of values. By example, if for each observation a variable called "Colour" can have only "red", "blue" or "green" as value, it is a categorical variable.
 #
 # In R, categorical variable is called Factor.
 # Type ?factor in console for more information.
 #
-# In this demo we will see how to transform a dense dataframe with categorical variables to a sparse matrix before analyzing it in Xgboost.
+# In this demo we will see how to transform a dense dataframe with categorical variables to a sparse matrix before analyzing it in XGBoost.
 # The method we are going to see is usually called "one hot encoding".
 
 #load Arthritis dataset in memory.
@@ -25,13 +25,13 @@ df <- data.table(Arthritis, keep.rownames = FALSE)
 cat("Print the dataset\n")
 print(df)
 
-# 2 columns have factor type, one has ordinal type (ordinal variable is a categorical variable with values wich can be ordered, here: None > Some > Marked).
+# 2 columns have factor type, one has ordinal type (ordinal variable is a categorical variable with values which can be ordered, here: None > Some > Marked).
 cat("Structure of the dataset\n")
 str(df)
 
 # Let's add some new categorical features to see if it helps. Of course these feature are highly correlated to the Age feature. Usually it's not a good thing in ML, but Tree algorithms (including boosted trees) are able to select the best features, even in case of highly correlated features.
 
-# For the first feature we create groups of age by rounding the real age. Note that we transform it to factor (categorical data) so the algorithm treat them as independant values.
+# For the first feature we create groups of age by rounding the real age. Note that we transform it to factor (categorical data) so the algorithm treat them as independent values.
 df[, AgeDiscret := as.factor(round(Age / 10, 0))]
 
 # Here is an even stronger simplification of the real age with an arbitrary split at 30 years old. I choose this value based on nothing. We will see later if simplifying the information based on arbitrary values is a good strategy (I am sure you already have an idea of how well it will work!).
diff --git a/R-package/demo/cross_validation.R b/R-package/demo/cross_validation.R
index e55ff3915cd4..639533a2cc64 100644
--- a/R-package/demo/cross_validation.R
+++ b/R-package/demo/cross_validation.R
@@ -22,10 +22,10 @@ xgb.cv(param, dtrain, nrounds, nfold = 5,
        metrics = 'error', showsd = FALSE)
 
 ###
-# you can also do cross validation with cutomized loss function
+# you can also do cross validation with customized loss function
 # See custom_objective.R
 ##
-print ('running cross validation, with cutomsized loss function')
+print ('running cross validation, with customized loss function')
 
 logregobj <- function(preds, dtrain) {
   labels <- getinfo(dtrain, "label")
diff --git a/R-package/demo/custom_objective.R b/R-package/demo/custom_objective.R
index d251f40059d2..2d0914ab50be 100644
--- a/R-package/demo/custom_objective.R
+++ b/R-package/demo/custom_objective.R
@@ -12,7 +12,7 @@ watchlist <- list(eval = dtest, train = dtrain)
 num_round <- 2
 
 # user define objective function, given prediction, return gradient and second order gradient
-# this is loglikelihood loss
+# this is log likelihood loss
 logregobj <- function(preds, dtrain) {
   labels <- getinfo(dtrain, "label")
   preds <- 1 / (1 + exp(-preds))
@@ -23,9 +23,9 @@ logregobj <- function(preds, dtrain) {
 
 # user defined evaluation function, return a pair metric_name, result
 # NOTE: when you do customized loss function, the default prediction value is margin
-# this may make buildin evalution metric not function properly
+# this may make builtin evaluation metric not function properly
 # for example, we are doing logistic loss, the prediction is score before logistic transformation
-# the buildin evaluation error assumes input is after logistic transformation
+# the builtin evaluation error assumes input is after logistic transformation
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
 evalerror <- function(preds, dtrain) {
   labels <- getinfo(dtrain, "label")
diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R
index 65210a5d366f..f733dce8d7bb 100644
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@@ -11,7 +11,7 @@ param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
 watchlist <- list(eval = dtest)
 num_round <- 20
 # user define objective function, given prediction, return gradient and second order gradient
-# this is loglikelihood loss
+# this is log likelihood loss
 logregobj <- function(preds, dtrain) {
   labels <- getinfo(dtrain, "label")
   preds <- 1 / (1 + exp(-preds))
@@ -21,9 +21,9 @@ logregobj <- function(preds, dtrain) {
 }
 # user defined evaluation function, return a pair metric_name, result
 # NOTE: when you do customized loss function, the default prediction value is margin
-# this may make buildin evalution metric not function properly
+# this may make builtin evaluation metric not function properly
 # for example, we are doing logistic loss, the prediction is score before logistic transformation
-# the buildin evaluation error assumes input is after logistic transformation
+# the builtin evaluation error assumes input is after logistic transformation
 # Take this in mind when you use the customization, and maybe you need write customized evaluation function
 evalerror <- function(preds, dtrain) {
   labels <- getinfo(dtrain, "label")
diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd
index 1ae722091a9b..246860bbd3f4 100644
--- a/R-package/man/getinfo.Rd
+++ b/R-package/man/getinfo.Rd
@@ -23,9 +23,9 @@ Get information of an xgb.DMatrix object
 The \code{name} field can be one of the following:
 
 \itemize{
-    \item \code{label}: label Xgboost learn from ;
+    \item \code{label}: label XGBoost learn from ;
     \item \code{weight}: to do a weight rescale ;
-    \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
+    \item \code{base_margin}: base margin is the base prediction XGBoost will boost from ;
     \item \code{nrow}: number of rows of the \code{xgb.DMatrix}.
 
 }
diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd
index a1ad6fc05705..696a04c2ddea 100644
--- a/R-package/man/setinfo.Rd
+++ b/R-package/man/setinfo.Rd
@@ -25,9 +25,9 @@ Set information of an xgb.DMatrix object
 The \code{name} field can be one of the following:
 
 \itemize{
-    \item \code{label}: label Xgboost learn from ;
+    \item \code{label}: label XGBoost learn from ;
     \item \code{weight}: to do a weight rescale ;
-    \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
+    \item \code{base_margin}: base margin is the base prediction XGBoost will boost from ;
     \item \code{group}: number of rows in each group (to use with \code{rank:pairwise} objective).
 }
 }
diff --git a/R-package/man/xgb.DMatrix.Rd b/R-package/man/xgb.DMatrix.Rd
index 000efe6ca06f..32bd150660c1 100644
--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@@ -22,7 +22,7 @@ It is useful when a 0 or some other extreme value represents missing values in d
 }
 \description{
 Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file.
-Supported input file formats are either a libsvm text file or a binary file that was created previously by
+Supported input file formats are either a LIBSVM text file or a binary file that was created previously by
 \code{\link{xgb.DMatrix.save}}).
 }
 \examples{
diff --git a/R-package/man/xgb.plot.shap.Rd b/R-package/man/xgb.plot.shap.Rd
index abb21ce1957a..a55a551de286 100644
--- a/R-package/man/xgb.plot.shap.Rd
+++ b/R-package/man/xgb.plot.shap.Rd
@@ -87,7 +87,7 @@ more than 5 distinct values.}
 
 \item{which}{whether to do univariate or bivariate plotting. NOTE: only 1D is implemented so far.}
 
-\item{plot}{whether a plot should be drawn. If FALSE, only a lits of matrices is returned.}
+\item{plot}{whether a plot should be drawn. If FALSE, only a list of matrices is returned.}
 
 \item{...}{other parameters passed to \code{plot}.}
 }
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 53b7c42fb351..848fddca477d 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -65,7 +65,7 @@ xgboost(
   \item \code{colsample_bytree} subsample ratio of columns when constructing each tree. Default: 1
   \item \code{lambda} L2 regularization term on weights. Default: 1
   \item \code{alpha} L1 regularization term on weights. (there is no L1 reg on bias because it is not important). Default: 0
-  \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through Xgboost (set \code{colsample_bytree < 1}, \code{subsample  < 1}  and \code{round = 1}) accordingly. Default: 1
+  \item \code{num_parallel_tree} Experimental parameter. number of trees to grow per round. Useful to test Random Forest through XGBoost (set \code{colsample_bytree < 1}, \code{subsample  < 1}  and \code{round = 1}) accordingly. Default: 1
   \item \code{monotone_constraints} A numerical vector consists of \code{1}, \code{0} and \code{-1} with its length equals to the number of features in the training data. \code{1} is increasing, \code{-1} is decreasing and \code{0} is no constraint.
   \item \code{interaction_constraints} A list of vectors specifying feature indices of permitted interactions. Each item of the list represents one permitted interaction where specified features are allowed to interact with each other. Feature index values should start from \code{0} (\code{0} references the first column).  Leave argument unspecified for no interaction constraints.
 }
@@ -90,10 +90,10 @@ xgboost(
     \item \code{binary:logistic} logistic regression for binary classification. Output probability.
     \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
     \item \code{binary:hinge}: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
-    \item \code{count:poisson}: poisson regression for count data, output mean of poisson distribution. \code{max_delta_step} is set to 0.7 by default in poisson regression (used to safeguard optimization).
+    \item \code{count:poisson}: Poisson regression for count data, output mean of Poisson distribution. \code{max_delta_step} is set to 0.7 by default in poisson regression (used to safeguard optimization).
     \item \code{survival:cox}: Cox regression for right censored survival time data (negative values are considered right censored). Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function \code{h(t) = h0(t) * HR)}.
     \item \code{survival:aft}: Accelerated failure time model for censored survival time data. See \href{https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html}{Survival Analysis with Accelerated Failure Time} for details.
-    \item \code{aft_loss_distribution}: Probabilty Density Function used by \code{survival:aft} and \code{aft-nloglik} metric.
+    \item \code{aft_loss_distribution}: Probability Density Function used by \code{survival:aft} and \code{aft-nloglik} metric.
     \item \code{multi:softmax} set xgboost to do multiclass classification using the softmax objective. Class is represented by a number and should be from 0 to \code{num_class - 1}.
     \item \code{multi:softprob} same as softmax, but prediction outputs a vector of ndata * nclass elements, which can be further reshaped to ndata, nclass matrix. The result contains predicted probabilities of each data point belonging to each class.
     \item \code{rank:pairwise} set xgboost to do ranking task by minimizing the pairwise loss.
@@ -211,11 +211,11 @@ than the \code{xgboost} interface.
 Parallelization is automatically enabled if \code{OpenMP} is present.
 Number of threads can also be manually specified via \code{nthread} parameter.
 
-The evaluation metric is chosen automatically by Xgboost (according to the objective)
+The evaluation metric is chosen automatically by XGBoost (according to the objective)
 when the \code{eval_metric} parameter is not provided.
 User may set one or several \code{eval_metric} parameters.
 Note that when using a customized metric, only this single metric can be used.
-The following is the list of built-in metrics for which Xgboost provides optimized implementation:
+The following is the list of built-in metrics for which XGBoost provides optimized implementation:
   \itemize{
      \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
      \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
index 3583b4f177d9..aeb8e6d30dcd 100644
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -3,7 +3,7 @@ PKGROOT=./
 ENABLE_STD_THREAD=0
 # _*_ mode: Makefile; _*_
 
-# This file is only used for windows compilation from github
+# This file is only used for Windows compilation from GitHub
 # It will be replaced with Makevars.in for the CRAN version
 .PHONY: all xgblib
 all: $(SHLIB)
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index ddf2c4318854..5e2e3ee6cb96 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -331,7 +331,7 @@ test_that("train and predict with non-strict classes", {
   expect_error(pr <- predict(bst, train_dense), regexp = NA)
   expect_equal(pr0, pr)
 
-  # when someone inhertis from xgb.Booster, it should still be possible to use it as xgb.Booster
+  # when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster
   class(bst) <- c('super.Booster', 'xgb.Booster')
   expect_error(pr <- predict(bst, train_dense), regexp = NA)
   expect_equal(pr0, pr)
@@ -346,7 +346,7 @@ test_that("max_delta_step works", {
   bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
   # model with restricted max_delta_step
   bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
-  # the no-restriction model is expected to have consistently lower loss during the initial interations
+  # the no-restriction model is expected to have consistently lower loss during the initial iterations
   expect_true(all(bst1$evaluation_log$train_logloss < bst2$evaluation_log$train_logloss))
   expect_lt(mean(bst1$evaluation_log$train_logloss) / mean(bst2$evaluation_log$train_logloss), 0.8)
 })
diff --git a/R-package/tests/testthat/test_monotone.R b/R-package/tests/testthat/test_monotone.R
index 54070cd619bf..756863061d6d 100644
--- a/R-package/tests/testthat/test_monotone.R
+++ b/R-package/tests/testthat/test_monotone.R
@@ -19,5 +19,5 @@ test_that("monotone constraints for regression", {
     pred.ord <- pred[ind]
     expect_true({
         !any(diff(pred.ord) > 0)
-    }, "Monotone Contraint Satisfied")
+    }, "Monotone constraint satisfied")
 })
diff --git a/R-package/tests/testthat/test_poisson_regression.R b/R-package/tests/testthat/test_poisson_regression.R
index a731dc23a017..4f3527cdb31f 100644
--- a/R-package/tests/testthat/test_poisson_regression.R
+++ b/R-package/tests/testthat/test_poisson_regression.R
@@ -1,9 +1,9 @@
-context('Test poisson regression model')
+context('Test Poisson regression model')
 
 require(xgboost)
 set.seed(1994)
 
-test_that("poisson regression works", {
+test_that("Poisson regression works", {
   data(mtcars)
   bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
                 objective = 'count:poisson', nrounds = 10, verbose = 0)
diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd
index c41f4f125def..7587956dab70 100644
--- a/R-package/vignettes/discoverYourData.Rmd
+++ b/R-package/vignettes/discoverYourData.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "Understand your dataset with Xgboost"
+title: "Understand your dataset with XGBoost"
 output:
   rmarkdown::html_vignette:
     css: vignette.css
@@ -18,9 +18,9 @@ Understand your dataset with XGBoost
 Introduction
 ------------
 
-The purpose of this vignette is to show you how to use **Xgboost** to discover and understand your own dataset better.
+The purpose of this vignette is to show you how to use **XGBoost** to discover and understand your own dataset better.
 
-This vignette is not about predicting anything (see [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)). We will explain how to use **Xgboost** to highlight the *link* between the *features* of your data and the *outcome*.
+This vignette is not about predicting anything (see [XGBoost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)). We will explain how to use **XGBoost** to highlight the *link* between the *features* of your data and the *outcome*.
 
 Package loading:
 
@@ -39,7 +39,7 @@ Preparation of the dataset
 ### Numeric v.s. categorical variables
 
 
-**Xgboost** manages only `numeric` vectors.
+**XGBoost** manages only `numeric` vectors.
 
 What to do when you have *categorical* data?
 
@@ -66,7 +66,7 @@ data(Arthritis)
 df <- data.table(Arthritis, keep.rownames = FALSE)
 ```
 
-> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](https://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
+> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](https://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **XGBoost** **R** package use `data.table`.
 
 The first thing we want to do is to have a look to the first few lines of the `data.table`:
 
@@ -166,7 +166,7 @@ output_vector = df[,Improved] == "Marked"
 Build the model
 ---------------
 
-The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).
+The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [XGBoost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).
 
 ```{r}
 bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
@@ -176,7 +176,7 @@ bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
 
 You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better.
 
-A model which fits too well may [overfit](https://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).
+A small value for training error may be a symptom of [overfitting](https://en.wikipedia.org/wiki/Overfitting), meaning the model will not accurately predict the future values.
 
 > Here you can see the numbers decrease until line 7 and then increase.
 >
@@ -304,19 +304,19 @@ Linear model may not be that smart in this scenario.
 Special Note: What about Random Forests™?
 -----------------------------------------
 
-As you may know, [Random Forests™](https://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](https://en.wikipedia.org/wiki/Ensemble_learning) family.
+As you may know, [Random Forests](https://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](https://en.wikipedia.org/wiki/Ensemble_learning) family.
 
-Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).
+Both trains several decision trees for one dataset. The *main* difference is that in Random Forests, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).
 
 This difference have an impact on a corner case in feature importance analysis: the *correlated features*.
 
-Imagine two features perfectly correlated, feature `A` and feature `B`. For one specific tree, if the algorithm needs one of them, it will choose randomly (true in both boosting and Random Forests™).
+Imagine two features perfectly correlated, feature `A` and feature `B`. For one specific tree, if the algorithm needs one of them, it will choose randomly (true in both boosting and Random Forests).
 
-However, in Random Forests™ this random choice will be done for each tree, because each tree is independent from the others. Therefore, approximatively, depending of your parameters, 50% of the trees will choose feature `A` and the other 50% will choose feature `B`. So the *importance* of the information contained in `A` and `B` (which is the same, because they are perfectly correlated) is diluted in `A` and `B`. So you won't easily know this information is important to predict what you want to predict! It is even worse when you have 10 correlated features...
+However, in Random Forests this random choice will be done for each tree, because each tree is independent from the others. Therefore, approximatively, depending of your parameters, 50% of the trees will choose feature `A` and the other 50% will choose feature `B`. So the *importance* of the information contained in `A` and `B` (which is the same, because they are perfectly correlated) is diluted in `A` and `B`. So you won't easily know this information is important to predict what you want to predict! It is even worse when you have 10 correlated features...
 
 In boosting, when a specific link between feature and outcome have been learned by the algorithm, it will try to not refocus on it (in theory it is what happens, reality is not always that simple). Therefore, all the importance will be on feature `A` or on feature `B` (but not both). You will know that one feature have an important role in the link between the observations and the label. It is still up to you to search for the correlated features to the one detected as important if you need to know all of them.
 
-If you want to try Random Forests™ algorithm, you can tweak Xgboost parameters!
+If you want to try Random Forests algorithm, you can tweak XGBoost parameters!
 
 For instance, to compute a model with 1000 trees, with a 0.5 factor on sampling rows and columns:
 
@@ -326,7 +326,7 @@ data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
 
-#Random Forest™ - 1000 trees
+#Random Forest - 1000 trees
 bst <- xgboost(data = train$data, label = train$label, max_depth = 4, num_parallel_tree = 1000, subsample = 0.5, colsample_bytree =0.5, nrounds = 1, objective = "binary:logistic")
 
 #Boosting - 3 rounds
@@ -335,4 +335,4 @@ bst <- xgboost(data = train$data, label = train$label, max_depth = 4, nrounds =
 
 > Note that the parameter `round` is set to `1`.
 
-> [**Random Forests™**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.
+> [**Random Forests**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.
diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd
index ab72c677938e..218b12eeb103 100644
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "Xgboost presentation"
+title: "XGBoost presentation"
 output:
   rmarkdown::html_vignette:
     css: vignette.css
@@ -8,7 +8,7 @@ output:
 bibliography: xgboost.bib
 author: Tianqi Chen, Tong He, Michaël Benesty
 vignette: >
-  %\VignetteIndexEntry{Xgboost presentation}
+  %\VignetteIndexEntry{XGBoost presentation}
   %\VignetteEngine{knitr::rmarkdown}
   \usepackage[utf8]{inputenc}
 ---
@@ -19,9 +19,9 @@ XGBoost R Tutorial
 ## Introduction
 
 
-**Xgboost** is short for e**X**treme **G**radient **Boost**ing package.
+**XGBoost** is short for e**X**treme **G**radient **Boost**ing package.
 
-The purpose of this Vignette is to show you how to use **Xgboost** to build a model and make predictions.
+The purpose of this Vignette is to show you how to use **XGBoost** to build a model and make predictions.
 
 It is an efficient and scalable implementation of gradient boosting framework by @friedman2000additive and @friedman2001greedy. Two solvers are included:
 
@@ -46,10 +46,10 @@ It has several features:
 ## Installation
 
 
-### Github version
+### GitHub version
 
 
-For weekly updated version (highly recommended), install from *Github*:
+For weekly updated version (highly recommended), install from *GitHub*:
 
 ```{r installGithub, eval=FALSE}
 install.packages("drat", repos="https://cran.rstudio.com")
@@ -82,7 +82,7 @@ require(xgboost)
 ### Dataset presentation
 
 
-In this example, we are aiming to predict whether a mushroom can be eaten or not (like in many tutorials, example data are the the same as you will use on in your every day life :-).
+In this example, we are aiming to predict whether a mushroom can be eaten or not (like in many tutorials, example data are the same as you will use on in your every day life :-).
 
 Mushroom data is cited from UCI Machine Learning Repository. @Bache+Lichman:2013.
 
@@ -148,7 +148,7 @@ We will train decision tree model using the following parameters:
 
 * `objective = "binary:logistic"`: we will train a binary classification model ;
 * `max_depth = 2`: the trees won't be deep, because our case is very simple ;
-* `nthread = 2`: the number of cpu threads we are going to use;
+* `nthread = 2`: the number of CPU threads we are going to use;
 * `nrounds = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
 
 ```{r trainingSparse, message=F, warning=F}
@@ -180,7 +180,7 @@ bstDMatrix <- xgboost(data = dtrain, max_depth = 2, eta = 1, nthread = 2, nround
 
 **XGBoost** has several features to help you to view how the learning progress internally. The purpose is to help you to set the best parameters, which is the key of your model quality.
 
-One of the simplest way to see the training progress is to set the `verbose` option (see below for more advanced technics).
+One of the simplest way to see the training progress is to set the `verbose` option (see below for more advanced techniques).
 
 ```{r trainingVerbose0, message=T, warning=F}
 # verbose = 0, no message
@@ -253,7 +253,7 @@ The most important thing to remember is that **to do a classification, you just
 
 *Multiclass* classification works in a similar way.
 
-This metric is **`r round(err, 2)`** and is pretty low: our yummly mushroom model works well!
+This metric is **`r round(err, 2)`** and is pretty low: our yummy mushroom model works well!
 
 ## Advanced features
 
diff --git a/R-package/vignettes/xgboostfromJSON.Rmd b/R-package/vignettes/xgboostfromJSON.Rmd
index 492f3a77ba1a..544186830d11 100644
--- a/R-package/vignettes/xgboostfromJSON.Rmd
+++ b/R-package/vignettes/xgboostfromJSON.Rmd
@@ -16,7 +16,7 @@ XGBoost from JSON
 
 ## Introduction
 
-The purpose of this Vignette is to show you how to correctly load and work with an **Xgboost** model that has been dumped to JSON.  **Xgboost** internally converts all data to [32-bit floats](https://en.wikipedia.org/wiki/Single-precision_floating-point_format), and the values dumped to JSON are decimal representations of these values.  When working with a model that has been parsed from a JSON file, care must be taken to correctly treat:
+The purpose of this Vignette is to show you how to correctly load and work with an **XGBoost** model that has been dumped to JSON.  **XGBoost** internally converts all data to [32-bit floats](https://en.wikipedia.org/wiki/Single-precision_floating-point_format), and the values dumped to JSON are decimal representations of these values.  When working with a model that has been parsed from a JSON file, care must be taken to correctly treat:
 
 - the input data, which should be converted to 32-bit floats
 - any 32-bit floats that were stored in JSON as decimal representations
@@ -172,9 +172,9 @@ bst_from_json_preds <- ifelse(fl(data$dates)<fl(node$split_condition),
 bst_preds == bst_from_json_preds
 ```
 
-None are exactly equal again.  What is going on here?  Well, since we are using the value `1` in the calcuations, we have introduced a double into the calculation.  Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used.  On the other hand, xgboost uses the 32-bit version of the exponentation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).
+None are exactly equal again.  What is going on here?  Well, since we are using the value `1` in the calculations, we have introduced a double into the calculation.  Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used.  On the other hand, xgboost uses the 32-bit version of the exponentiation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).
 
-How do we fix this?  We have to ensure we use the correct datatypes everywhere and the correct operators.  If we use only floats, the float library that we have loaded will ensure the 32-bit float exponention operator is applied. 
+How do we fix this?  We have to ensure we use the correct data types everywhere and the correct operators.  If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied. 
 ```{r}
 # calculate the predictions casting doubles to floats
 bst_from_json_preds <- ifelse(fl(data$dates)<fl(node$split_condition),
diff --git a/demo/CLI/binary_classification/README.md b/demo/CLI/binary_classification/README.md
index 048058cfc01d..7fe2120ece0a 100644
--- a/demo/CLI/binary_classification/README.md
+++ b/demo/CLI/binary_classification/README.md
@@ -6,7 +6,7 @@ The script 'runexp.sh' can be used to run the demo. Here we use [mushroom datase
 
 ### Tutorial
 #### Generate Input Data
-XGBoost takes LibSVM format. An example of faked input data is below:
+XGBoost takes LIBSVM format. An example of faked input data is below:
 ```
 1 101:1.2 102:0.03
 0 1:2.1 10001:300 10002:400
@@ -15,7 +15,7 @@ XGBoost takes LibSVM format. An example of faked input data is below:
 Each line represent a single instance, and in the first line '1' is the instance label,'101' and '102' are feature indices, '1.2' and '0.03' are feature values. In the binary classification case, '1' is used to indicate positive samples, and '0' is used to indicate negative samples. We also support probability values in [0,1] as label, to indicate the probability of the instance being positive.
 
 
-First we will transform the dataset into classic LibSVM format and split the data into training set and test set by running:
+First we will transform the dataset into classic LIBSVM format and split the data into training set and test set by running:
 ```
 python mapfeat.py
 python mknfold.py agaricus.txt 1
diff --git a/demo/README.md b/demo/README.md
index 247b93afa949..4a16a8e9a8a7 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -120,7 +120,7 @@ Please send pull requests if you find ones that are missing here.
 - [XGBoost - eXtreme Gradient Boosting](http://www.slideshare.net/ShangxuanZhang/xgboost) by Tong He
 - [How to use XGBoost algorithm in R in easy steps](http://www.analyticsvidhya.com/blog/2016/01/xgboost-algorithm-easy-steps/) by TAVISH SRIVASTAVA ([Chinese Translation 中文翻译](https://segmentfault.com/a/1190000004421821) by [HarryZhu](https://segmentfault.com/u/harryprince))
 - [Kaggle Solution: What’s Cooking ? (Text Mining Competition)](http://www.analyticsvidhya.com/blog/2015/12/kaggle-solution-cooking-text-mining-competition/) by MANISH SARASWAT
-- Better Optimization with Repeated Cross Validation and the XGBoost model - Machine Learning with R) by Manuel Amunategui ([Youtube Link](https://www.youtube.com/watch?v=Og7CGAfSr_Y)) ([Github Link](https://github.com/amunategui/BetterCrossValidation))
+- Better Optimization with Repeated Cross Validation and the XGBoost model - Machine Learning with R) by Manuel Amunategui ([Youtube Link](https://www.youtube.com/watch?v=Og7CGAfSr_Y)) ([GitHub Link](https://github.com/amunategui/BetterCrossValidation))
 - [XGBoost Rossman Parameter Tuning](https://www.kaggle.com/khozzy/rossmann-store-sales/xgboost-parameter-tuning-template/run/90168/notebook) by [Norbert Kozlowski](https://www.kaggle.com/khozzy)
 - [Featurizing log data before XGBoost](http://www.slideshare.net/DataRobot/featurizing-log-data-before-xgboost) by Xavier Conort, Owen Zhang etc
 - [West Nile Virus Competition Benchmarks & Tutorials](http://blog.kaggle.com/2015/07/21/west-nile-virus-competition-benchmarks-tutorials/) by [Anna Montoya](http://blog.kaggle.com/author/annamontoya/)
diff --git a/demo/c-api/README.md b/demo/c-api/README.md
index d94f4aca3490..a6d33b36a447 100644
--- a/demo/c-api/README.md
+++ b/demo/c-api/README.md
@@ -27,4 +27,4 @@ target_link_libraries(api-demo xgboost)
 ```
 
 # make
-You can start by modifying the makefile in this directory to fit your need.
\ No newline at end of file
+You can start by modifying the makefile in this directory to fit your need.
diff --git a/doc/R-package/discoverYourData.md b/doc/R-package/discoverYourData.md
index de7b2823e565..9233546dff36 100644
--- a/doc/R-package/discoverYourData.md
+++ b/doc/R-package/discoverYourData.md
@@ -5,9 +5,9 @@ Understand your dataset with XGBoost
 Introduction
 ------------
 
-The purpose of this Vignette is to show you how to use **Xgboost** to discover and understand your own dataset better.
+The purpose of this Vignette is to show you how to use **XGBoost** to discover and understand your own dataset better.
 
-This Vignette is not about predicting anything (see [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)). We will explain how to use **Xgboost** to highlight the *link* between the *features* of your data and the *outcome*.
+This Vignette is not about predicting anything (see [XGBoost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)). We will explain how to use **XGBoost** to highlight the *link* between the *features* of your data and the *outcome*.
 
 Package loading:
 
@@ -27,7 +27,7 @@ Preparation of the dataset
 ### Numeric VS categorical variables
 
 
-**Xgboost** manages only `numeric` vectors.
+**XGBoost** manages only `numeric` vectors.
 
 What to do when you have *categorical* data?
 
@@ -55,7 +55,7 @@ data(Arthritis)
 df <- data.table(Arthritis, keep.rownames = FALSE)
 ```
 
-> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
+> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **XGBoost** **R** package use `data.table`.
 
 The first thing we want to do is to have a look to the first lines of the `data.table`:
 
@@ -217,7 +217,7 @@ output_vector = df[,Improved] == "Marked"
 Build the model
 ---------------
 
-The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [Xgboost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).
+The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [XGBoost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).
 
 
 ```r
@@ -422,19 +422,19 @@ Linear models may not be that smart in this scenario.
 Special Note: What about Random Forests™?
 -----------------------------------------
 
-As you may know, [Random Forests™](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family.
+As you may know, [Random Forests](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family.
 
-Both train several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).
+Both train several decision trees for one dataset. The *main* difference is that in Random Forests, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).
 
 This difference have an impact on a corner case in feature importance analysis: the *correlated features*.
 
-Imagine two features perfectly correlated, feature `A` and feature `B`. For one specific tree, if the algorithm needs one of them, it will choose randomly (true in both boosting and Random Forests™).
+Imagine two features perfectly correlated, feature `A` and feature `B`. For one specific tree, if the algorithm needs one of them, it will choose randomly (true in both boosting and Random Forests).
 
-However, in Random Forests™ this random choice will be done for each tree, because each tree is independent from the others. Therefore, approximatively, depending of your parameters, 50% of the trees will choose feature `A` and the other 50% will choose feature `B`. So the *importance* of the information contained in `A` and `B` (which is the same, because they are perfectly correlated) is diluted in `A` and `B`. So you won't easily know this information is important to predict what you want to predict! It is even worse when you have 10 correlated features...
+However, in Random Forests this random choice will be done for each tree, because each tree is independent from the others. Therefore, approximatively, depending of your parameters, 50% of the trees will choose feature `A` and the other 50% will choose feature `B`. So the *importance* of the information contained in `A` and `B` (which is the same, because they are perfectly correlated) is diluted in `A` and `B`. So you won't easily know this information is important to predict what you want to predict! It is even worse when you have 10 correlated features...
 
 In boosting, when a specific link between feature and outcome have been learned by the algorithm, it will try to not refocus on it (in theory it is what happens, reality is not always that simple). Therefore, all the importance will be on feature `A` or on feature `B` (but not both). You will know that one feature have an important role in the link between the observations and the label. It is still up to you to search for the correlated features to the one detected as important if you need to know all of them.
 
-If you want to try Random Forests™ algorithm, you can tweak Xgboost parameters!
+If you want to try Random Forests algorithm, you can tweak XGBoost parameters!
 
 **Warning**: this is still an experimental parameter.
 
@@ -447,7 +447,7 @@ data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
 
-#Random Forest™ - 1000 trees
+#Random Forest - 1000 trees
 bst <- xgboost(data = train$data, label = train$label, max.depth = 4, num_parallel_tree = 1000, subsample = 0.5, colsample_bytree =0.5, nrounds = 1, objective = "binary:logistic")
 ```
 
@@ -468,4 +468,4 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 4, nrounds =
 
 > Note that the parameter `round` is set to `1`.
 
-> [**Random Forests™**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.
+> [**Random Forests**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.
diff --git a/doc/R-package/xgboostPresentation.md b/doc/R-package/xgboostPresentation.md
index 1e01b099faf2..9fe4787eb80c 100644
--- a/doc/R-package/xgboostPresentation.md
+++ b/doc/R-package/xgboostPresentation.md
@@ -5,9 +5,9 @@ XGBoost R Tutorial
 ## Introduction
 
 
-**Xgboost** is short for e**X**treme **G**radient **Boost**ing package.
+**XGBoost** is short for e**X**treme **G**radient **Boost**ing package.
 
-The purpose of this Vignette is to show you how to use **Xgboost** to build a model and make predictions.
+The purpose of this Vignette is to show you how to use **XGBoost** to build a model and make predictions.
 
 It is an efficient and scalable implementation of gradient boosting framework by @friedman2000additive and @friedman2001greedy. Two solvers are included:
 
@@ -32,10 +32,10 @@ It has several features:
 ## Installation
 
 
-### Github version
+### GitHub version
 
 
-For weekly updated version (highly recommended), install from *Github*:
+For weekly updated version (highly recommended), install from *GitHub*:
 
 
 ```r
@@ -177,7 +177,7 @@ We will train decision tree model using the following parameters:
 
 * `objective = "binary:logistic"`: we will train a binary classification model ;
 * `max.depth = 2`: the trees won't be deep, because our case is very simple ;
-* `nthread = 2`: the number of cpu threads we are going to use;
+* `nthread = 2`: the number of CPU threads we are going to use;
 * `nrounds = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
 
 
diff --git a/doc/contrib/coding_guide.rst b/doc/contrib/coding_guide.rst
index 9eff456354c8..6d407ba129e9 100644
--- a/doc/contrib/coding_guide.rst
+++ b/doc/contrib/coding_guide.rst
@@ -23,7 +23,7 @@ C++ Coding Guideline
 ***********************
 Python Coding Guideline
 ***********************
-- Follow `PEP 8: Style Guide for Python Code <https://www.python.org/dev/peps/pep-0008/>`_. We use PyLint to automatically enforce PEP 8 style across our Python codebase. Before submitting your pull request, you are encouraged to run PyLint on your machine. See :ref:`running_checks_locally`.
+- Follow `PEP 8: Style Guide for Python Code <https://www.python.org/dev/peps/pep-0008/>`_. We use Pylint to automatically enforce PEP 8 style across our Python codebase. Before submitting your pull request, you are encouraged to run Pylint on your machine. See :ref:`running_checks_locally`.
 - Docstrings should be in `NumPy docstring format <https://numpydoc.readthedocs.io/en/latest/format.html>`_.
 
 .. _running_checks_locally:
diff --git a/doc/contrib/docs.rst b/doc/contrib/docs.rst
index 4990f0e0b7b5..538428c0a811 100644
--- a/doc/contrib/docs.rst
+++ b/doc/contrib/docs.rst
@@ -25,6 +25,6 @@ inside the ``doc/`` directory.
 Examples
 ********
 * Use cases and examples will be in `demo <https://github.com/dmlc/xgboost/tree/master/demo>`_.
-* We are super excited to hear about your story, if you have blogposts,
-  tutorials code solutions using XGBoost, please tell us and we will add
+* We are super excited to hear about your story. If you have blog posts,
+  tutorials, or code solutions using XGBoost, please tell us, and we will add
   a link in the example pages.
diff --git a/doc/contrib/donate.rst b/doc/contrib/donate.rst
index ba8697434777..6571fef5febd 100644
--- a/doc/contrib/donate.rst
+++ b/doc/contrib/donate.rst
@@ -15,7 +15,7 @@ A robust and efficient **continuous integration (CI)** infrastructure is one of
 
 There are several CI services available free to open source projects, such as Travis CI and AppVeyor. The XGBoost project already utilizes Travis and AppVeyor. However, the XGBoost project has needs that these free services do not adequately address. In particular, the limited usage quota of resources such as CPU and memory leaves XGBoost developers unable to bring "too-intensive" tests. In addition, they do not offer test machines with GPUs for testing XGBoost-GPU code base which has been attracting more and more interest across many organizations. Consequently, the XGBoost project self-hosts a cloud server with Jenkins software installed: https://xgboost-ci.net/.
 
-The self-hosted Jenkins CI server has recurring operating expenses. It utilizes a leading cloud provider (AWS) to accommodate variable workload. The master node serving the web interface is available 24/7, to accomodate contributions from people around the globe. In addition, the master node launches slave nodes on demand, to run the test suite on incoming contributions. To save cost, the slave nodes are terminated when they are no longer needed.
+The self-hosted Jenkins CI server has recurring operating expenses. It utilizes a leading cloud provider (AWS) to accommodate variable workload. The master node serving the web interface is available 24/7, to accommodate contributions from people around the globe. In addition, the master node launches slave nodes on demand, to run the test suite on incoming contributions. To save cost, the slave nodes are terminated when they are no longer needed.
 
 To help defray the hosting cost, the XGBoost project seeks donations from third parties.
 
@@ -25,7 +25,7 @@ Donors may choose to make one-time donations or recurring donations on monthly o
 
 Fiscal host: Open Source Collective 501(c)(6)
 ---------------------------------------------
-The Project Management Committee (PMC) of the XGBoost project appointed `Open Source Collective <https://opencollective.com/opensource>`_ as their **fiscal host**. The platform is a 501(c)(6) registered entity and will manage the funds on the behalf of the PMC so that PMC members will not have to manage the funds directly. The platform currently hosts several well-known Javascript frameworks such as Babel, Vue, and Webpack.
+The Project Management Committee (PMC) of the XGBoost project appointed `Open Source Collective <https://opencollective.com/opensource>`_ as their **fiscal host**. The platform is a 501(c)(6) registered entity and will manage the funds on the behalf of the PMC so that PMC members will not have to manage the funds directly. The platform currently hosts several well-known JavaScript frameworks such as Babel, Vue, and Webpack.
 
 All expenses incurred for hosting CI will be submitted to the fiscal host with receipts. Only the expenses in the following categories will be approved for reimbursement:
 
diff --git a/doc/contrib/release.rst b/doc/contrib/release.rst
index ae3e378b9aeb..ca0d1515f817 100644
--- a/doc/contrib/release.rst
+++ b/doc/contrib/release.rst
@@ -8,7 +8,7 @@ Versioning Policy
 
 Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE]
 
-* MAJOR: We gurantee the API compatibility across releases with the same major version number. We expect to have a 1+ years development period for a new MAJOR release version.
+* MAJOR: We guarantee the API compatibility across releases with the same major version number. We expect to have a 1+ years development period for a new MAJOR release version.
 * FEATURE: We ship new features, improvements and bug fixes through feature releases. The cycle length of a feature is decided by the size of feature roadmap. The roadmap is decided right after the previous release.
 * MAINTENANCE: Maintenance version only contains bug fixes. This type of release only occurs when we found significant correctness and/or performance bugs and barrier for users to upgrade to a new version of XGBoost smoothly.
 
@@ -21,10 +21,10 @@ Making a Release
    1. Modify ``CMakeLists.txt`` source tree, run CMake.
    2. Modify ``DESCRIPTION`` in R-package.
    3. Run ``change_version.sh`` in ``jvm-packages/dev``
-3. Commit the change, create a PR on github on release branch.  Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.
-4. Create a tag on release branch, either on github or locally.
-5. Make a release on github tag page, which might be done with previous step if the tag is created on github.
-6. Submit pip, cran and maven packages.
-   - pip package is maintained by [Hyunsu Cho](http://hyunsu-cho.io/) and [Jiaming Yuan](https://github.com/trivialfis).  There's a helper script for downloading pre-built wheels on ``xgboost/dev/release-pypi.py`` along with simple instructions for using ``twine``.
-   - cran package is maintained by [Tong He](https://github.com/hetong007).
-   - maven packageis maintained by [Nan Zhu](https://github.com/CodingCat).
+3. Commit the change, create a PR on GitHub on release branch.  Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.
+4. Create a tag on release branch, either on GitHub or locally.
+5. Make a release on GitHub tag page, which might be done with previous step if the tag is created on GitHub.
+6. Submit pip, CRAN, and Maven packages.
+   - The pip package is maintained by [Hyunsu Cho](http://hyunsu-cho.io/) and [Jiaming Yuan](https://github.com/trivialfis).  There's a helper script for downloading pre-built wheels on ``xgboost/dev/release-pypi.py`` along with simple instructions for using ``twine``.
+   - The CRAN package is maintained by [Tong He](https://github.com/hetong007).
+   - The Maven package is maintained by [Nan Zhu](https://github.com/CodingCat).
diff --git a/doc/gpu/index.rst b/doc/gpu/index.rst
index 6cd6bb3fb3d6..29c8ba590bbf 100644
--- a/doc/gpu/index.rst
+++ b/doc/gpu/index.rst
@@ -212,7 +212,7 @@ You can run benchmarks on synthetic data for binary classification:
   python tests/benchmark/benchmark_tree.py --tree_method=gpu_hist
   python tests/benchmark/benchmark_tree.py --tree_method=hist
 
-Training time on 1,000,000 rows x 50 columns of random data with 500 boosting iterations and 0.25/0.75 test/train split with AMD Ryzen 7 2700 8 core @3.20GHz and Nvidia 1080ti yields the following results:
+Training time on 1,000,000 rows x 50 columns of random data with 500 boosting iterations and 0.25/0.75 test/train split with AMD Ryzen 7 2700 8 core @3.20GHz and NVIDIA 1080ti yields the following results:
 
 +--------------+----------+
 | tree_method  | Time (s) |
@@ -242,14 +242,14 @@ If you are getting out-of-memory errors on a big dataset, try the `external memo
 
 Developer notes
 ===============
-The application may be profiled with annotations by specifying USE_NTVX to cmake and providing the path to the stand-alone nvtx header via NVTX_HEADER_DIR. Regions covered by the 'Monitor' class in cuda code will automatically appear in the nsight profiler.
+The application may be profiled with annotations by specifying USE_NTVX to cmake and providing the path to the stand-alone nvtx header via NVTX_HEADER_DIR. Regions covered by the 'Monitor' class in CUDA code will automatically appear in the nsight profiler.
 
 **********
 References
 **********
 `Mitchell R, Frank E. (2017) Accelerating the XGBoost algorithm using GPU computing. PeerJ Computer Science 3:e127 https://doi.org/10.7717/peerj-cs.127 <https://peerj.com/articles/cs-127/>`_
 
-`Nvidia Parallel Forall: Gradient Boosting, Decision Trees and XGBoost with CUDA <https://devblogs.nvidia.com/parallelforall/gradient-boosting-decision-trees-xgboost-cuda/>`_
+`NVIDIA Parallel Forall: Gradient Boosting, Decision Trees and XGBoost with CUDA <https://devblogs.nvidia.com/parallelforall/gradient-boosting-decision-trees-xgboost-cuda/>`_
 
 `Out-of-Core GPU Gradient Boosting <https://arxiv.org/abs/2005.09148>`_
 
diff --git a/doc/jvm/xgboost4j_spark_tutorial.rst b/doc/jvm/xgboost4j_spark_tutorial.rst
index 81f7386e88e8..2a3bd1c7aba3 100644
--- a/doc/jvm/xgboost4j_spark_tutorial.rst
+++ b/doc/jvm/xgboost4j_spark_tutorial.rst
@@ -58,7 +58,7 @@ In this section, we use `Iris <https://archive.ics.uci.edu/ml/datasets/iris>`_ d
 showcase how we use Spark to transform raw dataset and make it fit to the data interface of XGBoost.
 
 Iris dataset is shipped in CSV format. Each instance contains 4 features, "sepal length", "sepal width",
-"petal length" and "petal width". In addition, it contains the "class" columnm, which is essentially the label with three possible values: "Iris Setosa", "Iris Versicolour" and "Iris Virginica".
+"petal length" and "petal width". In addition, it contains the "class" column, which is essentially the label with three possible values: "Iris Setosa", "Iris Versicolour" and "Iris Virginica".
 
 Read Dataset with Spark's Built-In Reader
 -----------------------------------------
@@ -562,7 +562,7 @@ Checkpoint During Training
 Transient failures are also commonly seen in production environment. To simplify the design of XGBoost,
 we stop training if any of the distributed workers fail. However, if the training fails after having been through a long time, it would be a great waste of resources.
 
-We support creating checkpoint during training to facilitate more efficient recovery from failture. To enable this feature, you can set how many iterations we build each checkpoint with ``setCheckpointInterval`` and the location of checkpoints with ``setCheckpointPath``:
+We support creating checkpoint during training to facilitate more efficient recovery from failure. To enable this feature, you can set how many iterations we build each checkpoint with ``setCheckpointInterval`` and the location of checkpoints with ``setCheckpointPath``:
 
 .. code-block:: scala
 
diff --git a/doc/parameter.rst b/doc/parameter.rst
index ad45f31ec832..523023b51c4f 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -191,7 +191,7 @@ Parameters for Tree Booster
   - Choices: ``default``, ``update``
 
     - ``default``: The normal boosting process which creates new trees.
-    - ``update``: Starts from an existing model and only updates its trees. In each boosting iteration, a tree from the initial model is taken, a specified sequence of updaters is run for that tree, and a modified tree is added to the new model. The new model would have either the same or smaller number of trees, depending on the number of boosting iteratons performed. Currently, the following built-in updaters could be meaningfully used with this process type: ``refresh``, ``prune``. With ``process_type=update``, one cannot use updaters that create new trees.
+    - ``update``: Starts from an existing model and only updates its trees. In each boosting iteration, a tree from the initial model is taken, a specified sequence of updaters is run for that tree, and a modified tree is added to the new model. The new model would have either the same or smaller number of trees, depending on the number of boosting iterations performed. Currently, the following built-in updaters could be meaningfully used with this process type: ``refresh``, ``prune``. With ``process_type=update``, one cannot use updaters that create new trees.
 
 * ``grow_policy`` [default= ``depthwise``]
 
@@ -362,15 +362,15 @@ Specify the learning task and the corresponding learning objective. The objectiv
   - ``binary:logistic``: logistic regression for binary classification, output probability
   - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation
   - ``binary:hinge``: hinge loss for binary classification. This makes predictions of 0 or 1, rather than producing probabilities.
-  - ``count:poisson`` --poisson regression for count data, output mean of poisson distribution
+  - ``count:poisson`` --poisson regression for count data, output mean of Poisson distribution
 
-    - ``max_delta_step`` is set to 0.7 by default in poisson regression (used to safeguard optimization)
+    - ``max_delta_step`` is set to 0.7 by default in Poisson regression (used to safeguard optimization)
 
   - ``survival:cox``: Cox regression for right censored survival time data (negative values are considered right censored).
     Note that predictions are returned on the hazard ratio scale (i.e., as HR = exp(marginal_prediction) in the proportional hazard function ``h(t) = h0(t) * HR``).
   - ``survival:aft``: Accelerated failure time model for censored survival time data.
     See :doc:`/tutorials/aft_survival_analysis` for details.
-  - ``aft_loss_distribution``: Probabilty Density Function used by ``survival:aft`` objective and ``aft-nloglik`` metric.
+  - ``aft_loss_distribution``: Probability Density Function used by ``survival:aft`` objective and ``aft-nloglik`` metric.
   - ``multi:softmax``: set XGBoost to do multiclass classification using the softmax objective, you also need to set num_class(number of classes)
   - ``multi:softprob``: same as softmax, but output a vector of ``ndata * nclass``, which can be further reshaped to ``ndata * nclass`` matrix. The result contains predicted probability of each data point belonging to each class.
   - ``rank:pairwise``: Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized
diff --git a/doc/python/callbacks.rst b/doc/python/callbacks.rst
index 943df4d511b8..b3302d7f7304 100644
--- a/doc/python/callbacks.rst
+++ b/doc/python/callbacks.rst
@@ -4,7 +4,7 @@ Callback Functions
 
 This document gives a basic walkthrough of callback function used in XGBoost Python
 package.  In XGBoost 1.3, a new callback interface is designed for Python package, which
-provides the flexiblity of designing various extension for training.  Also, XGBoost has a
+provides the flexibility of designing various extension for training.  Also, XGBoost has a
 number of pre-defined callbacks for supporting early stopping, checkpoints etc.
 
 
diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst
index e57e218173a9..2d0b1ed9f960 100644
--- a/doc/python/python_api.rst
+++ b/doc/python/python_api.rst
@@ -1,6 +1,6 @@
 Python API Reference
 ====================
-This page gives the Python API reference of xgboost, please also refer to Python Package Introduction for more information about python package.
+This page gives the Python API reference of xgboost, please also refer to Python Package Introduction for more information about the Python package.
 
 .. contents::
   :backlinks: none
diff --git a/doc/python/python_intro.rst b/doc/python/python_intro.rst
index 0f5df09f9425..0b0fccab87bc 100644
--- a/doc/python/python_intro.rst
+++ b/doc/python/python_intro.rst
@@ -1,7 +1,7 @@
 ###########################
 Python Package Introduction
 ###########################
-This document gives a basic walkthrough of xgboost python package.
+This document gives a basic walkthrough of the xgboost package for Python.
 
 **List of other Helpful Links**
 
@@ -24,7 +24,7 @@ Data Interface
 --------------
 The XGBoost python module is able to load data from:
 
-- LibSVM text format file
+- LIBSVM text format file
 - Comma-separated values (CSV) file
 - NumPy 2D array
 - SciPy 2D sparse array
@@ -36,7 +36,7 @@ The XGBoost python module is able to load data from:
 
 The data is stored in a :py:class:`DMatrix <xgboost.DMatrix>` object.
 
-* To load a libsvm text file or a XGBoost binary file into :py:class:`DMatrix <xgboost.DMatrix>`:
+* To load a LIBSVM text file or a XGBoost binary file into :py:class:`DMatrix <xgboost.DMatrix>`:
 
   .. code-block:: python
 
diff --git a/doc/treemethod.rst b/doc/treemethod.rst
index 97ebeb2e5371..beb703145e80 100644
--- a/doc/treemethod.rst
+++ b/doc/treemethod.rst
@@ -80,7 +80,7 @@ Other Updaters
 
 1. ``Pruner``: It prunes the built tree by ``gamma`` parameter.  ``pruner`` is usually
    used as part of other tree methods.
-2. ``Refresh``: Refresh the statistic of bulilt trees on a new training dataset.
+2. ``Refresh``: Refresh the statistic of built trees on a new training dataset.
 3. ``Sync``: Synchronize the tree among workers when running distributed training.
 
 ****************
@@ -90,12 +90,12 @@ Removed Updaters
 2 Updaters were removed during development due to maintainability.  We describe them here
 solely for the interest of documentation.  First one is distributed colmaker, which was a
 distributed version of exact tree method.  It required specialization for column based
-spliting strategy and a different prediction procedure.  As the exact tree method is slow
+splitting strategy and a different prediction procedure.  As the exact tree method is slow
 by itself and scaling is even less efficient, we removed it entirely.  Second one is
 ``skmaker``.  Per-node weighted sketching employed by ``grow_local_histmaker`` is slow,
 the ``skmaker`` was unmaintained and seems to be a workaround trying to eliminate the
 histogram creation step and uses sketching values directly during split evaluation.  It
 was never tested and contained some unknown bugs, we decided to remove it and focus our
 resources on more promising algorithms instead.  For accuracy, most of the time
-``approx``, ``hist`` and ``gpu_hist`` are enough with some parameters tunning, so removing
+``approx``, ``hist`` and ``gpu_hist`` are enough with some parameters tuning, so removing
 them don't have any real practical impact.
diff --git a/doc/tutorials/aft_survival_analysis.rst b/doc/tutorials/aft_survival_analysis.rst
index ca99ceb9c0db..adce5c3d0404 100644
--- a/doc/tutorials/aft_survival_analysis.rst
+++ b/doc/tutorials/aft_survival_analysis.rst
@@ -158,7 +158,7 @@ The parameter ``aft_loss_distribution`` corresponds to the distribution of the :
 Currently, you can choose from three probability distributions for ``aft_loss_distribution``:
 
 ========================= ===========================================
-``aft_loss_distribution`` Probabilty Density Function (PDF)
+``aft_loss_distribution`` Probability Density Function (PDF)
 ========================= ===========================================
 ``normal``                :math:`\dfrac{\exp{(-z^2/2)}}{\sqrt{2\pi}}`
 ``logistic``              :math:`\dfrac{e^z}{(1+e^z)^2}`
diff --git a/doc/tutorials/c_api_tutorial.rst b/doc/tutorials/c_api_tutorial.rst
index 98849a3717b5..d23d8053d6f3 100644
--- a/doc/tutorials/c_api_tutorial.rst
+++ b/doc/tutorials/c_api_tutorial.rst
@@ -2,7 +2,7 @@
 C API Tutorial 
 ##############################
 
-In this tutorial, we are going to install XGBoost library & configure the CMakeLists.txt file of our C/C++ application to link XGBoost library with our application. Later on, we will see some usefull tips for using C API and code snippets as examples to use various functions available in C API to perform basic task like loading, training model & predicting on test dataset. 
+In this tutorial, we are going to install XGBoost library & configure the CMakeLists.txt file of our C/C++ application to link XGBoost library with our application. Later on, we will see some useful tips for using C API and code snippets as examples to use various functions available in C API to perform basic task like loading, training model & predicting on test dataset. 
 
 .. contents::
   :backlinks: none
@@ -68,11 +68,11 @@ To ensure that CMake can locate the XGBoost library, supply ``-DCMAKE_PREFIX_PAT
 Usefull Tips To Remember
 ************************
 
-Below are some usefull tips while using C API:
+Below are some useful tips while using C API:
 
 1. Error handling: Always check the return value of the C API functions.
 
-a. In a C application: Use the following macro to guard all calls to XGBoost's C API functions. The macro prints all the error/ exception occured:
+a. In a C application: Use the following macro to guard all calls to XGBoost's C API functions. The macro prints all the error/ exception occurred:
 
 .. highlight:: c
    :linenothreshold: 5
diff --git a/doc/tutorials/custom_metric_obj.rst b/doc/tutorials/custom_metric_obj.rst
index 5633ae20abe5..eeb7e728a9ad 100644
--- a/doc/tutorials/custom_metric_obj.rst
+++ b/doc/tutorials/custom_metric_obj.rst
@@ -143,6 +143,6 @@ For fully reproducible source code and comparison plots, see `custom_rmsle.py <h
 Multi-class objective function
 ******************************
 
-A similiar demo for multi-class objective funtion is also available, see
+A similar demo for multi-class objective function is also available, see
 `demo/guide-python/custom_softmax.py <https://github.com/dmlc/xgboost/tree/master/demo/guide-python/custom_softmax.py>`_
 for details.
diff --git a/doc/tutorials/dask.rst b/doc/tutorials/dask.rst
index e90889da294c..440e595c079e 100644
--- a/doc/tutorials/dask.rst
+++ b/doc/tutorials/dask.rst
@@ -127,7 +127,7 @@ In previous example we used ``DaskDMatrix`` as input to ``predict`` function.  I
 practice, it's also possible to call ``predict`` function directly on dask collections
 like ``Array`` and ``DataFrame`` and might have better prediction performance.  When
 ``DataFrame`` is used as prediction input, the result is a dask ``Series`` instead of
-array.  Also, there's inplace predict support on dask interface, which can help reducing
+array.  Also, there's in-place predict support on dask interface, which can help reducing
 both memory usage and prediction time.
 
 .. code-block:: python
@@ -479,7 +479,7 @@ Here are some pratices on reducing memory usage with dask and xgboost.
   ``xgboost.dask.DaskDeviceQuantileDMatrix`` as a drop in replacement for ``DaskDMatrix``
   to reduce overall memory usage.  See ``demo/dask/gpu_training.py`` for an example.
 
-- Use inplace prediction when possible.
+- Use in-place prediction when possible.
 
 References:
 
diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst
index 1cdbe2486d9c..4dc22571bf3c 100644
--- a/doc/tutorials/external_memory.rst
+++ b/doc/tutorials/external_memory.rst
@@ -10,7 +10,7 @@ The external memory version takes in the following `URI <https://en.wikipedia.or
 
   filename#cacheprefix
 
-The ``filename`` is the normal path to libsvm format file you want to load in, and
+The ``filename`` is the normal path to LIBSVM format file you want to load in, and
 ``cacheprefix`` is a path to a cache file that XGBoost will use for caching preprocessed
 data in binary form.
 
@@ -24,7 +24,7 @@ where ``label_column`` should point to the csv column acting as the label.
 
 To provide a simple example for illustration, extracting the code from
 `demo/guide-python/external_memory.py <https://github.com/dmlc/xgboost/blob/master/demo/guide-python/external_memory.py>`_. If
-you have a dataset stored in a file similar to ``agaricus.txt.train`` with libSVM format, the external memory support can be enabled by:
+you have a dataset stored in a file similar to ``agaricus.txt.train`` with LIBSVM format, the external memory support can be enabled by:
 
 .. code-block:: python
 
diff --git a/doc/tutorials/input_format.rst b/doc/tutorials/input_format.rst
index f0cb69c2ce21..923a82650b63 100644
--- a/doc/tutorials/input_format.rst
+++ b/doc/tutorials/input_format.rst
@@ -5,7 +5,7 @@ Text Input Format of DMatrix
 ******************
 Basic Input Format
 ******************
-XGBoost currently supports two text formats for ingesting data: LibSVM and CSV. The rest of this document will describe the LibSVM format. (See `this Wikipedia article <https://en.wikipedia.org/wiki/Comma-separated_values>`_ for a description of the CSV format.).  Please be careful that, XGBoost does **not** understand file extensions, nor try to guess the file format, as there is no universal agreement upon file extension of LibSVM or CSV.  Instead it employs `URI <https://en.wikipedia.org/wiki/Uniform_Resource_Identifier>`_ format for specifying the precise input file type.  For example if you provide a `csv` file ``./data.train.csv`` as input, XGBoost will blindly use the default libsvm parser to digest it and generate a parser error.  Instead, users need to provide an uri in the form of ``train.csv?format=csv``.  For external memory input, the uri should of a form similar to ``train.csv?format=csv#dtrain.cache``.  See :ref:`python_data_interface` and :doc:`/tutorials/external_memory` also.
+XGBoost currently supports two text formats for ingesting data: LIBSVM and CSV. The rest of this document will describe the LIBSVM format. (See `this Wikipedia article <https://en.wikipedia.org/wiki/Comma-separated_values>`_ for a description of the CSV format.).  Please be careful that, XGBoost does **not** understand file extensions, nor try to guess the file format, as there is no universal agreement upon file extension of LIBSVM or CSV.  Instead it employs `URI <https://en.wikipedia.org/wiki/Uniform_Resource_Identifier>`_ format for specifying the precise input file type.  For example if you provide a `csv` file ``./data.train.csv`` as input, XGBoost will blindly use the default LIBSVM parser to digest it and generate a parser error.  Instead, users need to provide an URI in the form of ``train.csv?format=csv``.  For external memory input, the URI should of a form similar to ``train.csv?format=csv#dtrain.cache``.  See :ref:`python_data_interface` and :doc:`/tutorials/external_memory` also.
 
 For training or predicting, XGBoost takes an instance file with the format as below:
 
@@ -23,7 +23,7 @@ Each line represent a single instance, and in the first line '1' is the instance
 ******************************************
 Auxiliary Files for Additional Information
 ******************************************
-**Note: all information below is applicable only to single-node version of the package.** If you'd like to perform distributed training with multiple nodes, skip to the section `Embedding additional information inside LibSVM file`_.
+**Note: all information below is applicable only to single-node version of the package.** If you'd like to perform distributed training with multiple nodes, skip to the section `Embedding additional information inside LIBSVM file`_.
 
 Group Input Format
 ==================
@@ -72,13 +72,13 @@ XGBoost supports providing each instance an initial margin prediction. For examp
 XGBoost will take these values as initial margin prediction and boost from that. An important note about base_margin is that it should be margin prediction before transformation, so if you are doing logistic loss, you will need to put in value before logistic transformation. If you are using XGBoost predictor, use ``pred_margin=1`` to output margin values.
 
 ***************************************************
-Embedding additional information inside LibSVM file
+Embedding additional information inside LIBSVM file
 ***************************************************
 **This section is applicable to both single- and multiple-node settings.**
 
 Query ID Columns
 ================
-This is most useful for `ranking task <https://github.com/dmlc/xgboost/tree/master/demo/rank>`_, where the instances are grouped into query groups. You may embed query group ID for each instance in the LibSVM file by adding a token of form ``qid:xx`` in each row:
+This is most useful for `ranking task <https://github.com/dmlc/xgboost/tree/master/demo/rank>`_, where the instances are grouped into query groups. You may embed query group ID for each instance in the LIBSVM file by adding a token of form ``qid:xx`` in each row:
 
 .. code-block:: none
   :caption: ``train.txt``
@@ -98,7 +98,7 @@ Keep in mind the following restrictions:
 
 Instance weights
 ================
-You may specify instance weights in the LibSVM file by appending each instance label with the corresponding weight in the form of ``[label]:[weight]``, as shown by the following example:
+You may specify instance weights in the LIBSVM file by appending each instance label with the corresponding weight in the form of ``[label]:[weight]``, as shown by the following example:
 
 .. code-block:: none
   :caption: ``train.txt``
diff --git a/doc/tutorials/rf.rst b/doc/tutorials/rf.rst
index 97dfb1eb5bce..808dd38504c9 100644
--- a/doc/tutorials/rf.rst
+++ b/doc/tutorials/rf.rst
@@ -1,9 +1,9 @@
 #########################
-Random Forests in XGBoost
+Random Forests(TM) in XGBoost
 #########################
 
 XGBoost is normally used to train gradient-boosted decision trees and other gradient
-boosted models. Random forests use the same model representation and inference, as
+boosted models. Random Forests use the same model representation and inference, as
 gradient-boosted decision trees, but a different training algorithm.  One can use XGBoost
 to train a standalone random forest or use random forest as a base model for gradient
 boosting.  Here we focus on training standalone random forest.
diff --git a/doc/tutorials/saving_model.rst b/doc/tutorials/saving_model.rst
index d5b373d8ec4d..dfc05ac8cdc6 100644
--- a/doc/tutorials/saving_model.rst
+++ b/doc/tutorials/saving_model.rst
@@ -31,10 +31,10 @@ part of model, that's because objective controls transformation of global bias (
 evaluation or continue the training with a different set of hyper-parameters etc.
 
 However, this is not the end of story.  There are cases where we need to save something
-more than just the model itself.  For example, in distrbuted training, XGBoost performs
+more than just the model itself.  For example, in distributed training, XGBoost performs
 checkpointing operation.  Or for some reasons, your favorite distributed computing
 framework decide to copy the model from one worker to another and continue the training in
-there.  In such cases, the serialisation output is required to contain enougth information
+there.  In such cases, the serialisation output is required to contain enough information
 to continue previous training without user providing any parameters again.  We consider
 such scenario as **memory snapshot** (or memory based serialisation method) and distinguish it
 with normal model IO operation. Currently, memory snapshot is used in the following places:
@@ -145,7 +145,7 @@ or in R:
   config <- xgb.config(bst)
   print(config)
 
-Will print out something similiar to (not actual output as it's too long for demonstration):
+Will print out something similar to (not actual output as it's too long for demonstration):
 
 .. code-block:: javascript
 
diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h
index a8916b783bd7..f4d62439e84a 100644
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -324,7 +324,7 @@ XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle* out);
 XGB_EXTERN_C typedef int XGDMatrixCallbackNext(DataIterHandle iter);  // NOLINT(*)
 
 /*!
- * \brief Callback function prototype for reseting external iterator
+ * \brief Callback function prototype for resetting external iterator
  */
 XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLINT(*)
 
@@ -333,8 +333,8 @@ XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLIN
  *
  * \param iter     A handle to external data iterator.
  * \param proxy    A DMatrix proxy handle created by `XGProxyDMatrixCreate`.
- * \param reset    Callback function reseting the iterator state.
- * \param next     Callback function yieling the next batch of data.
+ * \param reset    Callback function resetting the iterator state.
+ * \param next     Callback function yielding the next batch of data.
  * \param missing  Which value to represent missing value
  * \param nthread  Number of threads to use, 0 for default.
  * \param max_bin  Maximum number of bins for building histogram.
@@ -461,7 +461,7 @@ XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
  *   - feature_type
  *
  * \param handle    An instance of data matrix
- * \param field     Feild name
+ * \param field     Field name
  * \param features  Pointer to array of strings.
  * \param size      Size of `features` pointer (number of strings passed in).
  *
@@ -493,7 +493,7 @@ XGB_DLL int XGDMatrixSetStrFeatureInfo(DMatrixHandle handle, const char *field,
  * XGBoost.
  *
  * \param handle       An instance of data matrix
- * \param field        Feild name
+ * \param field        Field name
  * \param size         Size of output pointer `features` (number of strings returned).
  * \param out_features Address of a pointer to array of strings.  Result is stored in
  *                     thread local memory.
@@ -533,7 +533,7 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
  *  - feature_weights
  *
  * \param handle An instance of data matrix
- * \param field  Feild name
+ * \param field  Field name
  * \param data   Pointer to consecutive memory storing data.
  * \param size   Size of the data, this is relative to size of type.  (Meaning NOT number
  *               of bytes.)
@@ -912,7 +912,7 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(
  *
  * - Functions with the term "Config" handles save/loading configuration.  It helps user
  *   to study the internal of XGBoost.  Also user can use the load method for specifying
- *   paramters in a structured way.  These functions are introduced in 1.0.0, and are not
+ *   parameters in a structured way.  These functions are introduced in 1.0.0, and are not
  *   yet stable.
  *
  * - Functions with the term "Serialization" are combined of above two.  They are used in
@@ -1145,7 +1145,7 @@ XGB_DLL int XGBoosterGetAttrNames(BoosterHandle handle,
  *   - feature_type
  *
  * \param handle    An instance of Booster
- * \param field     Feild name
+ * \param field     Field name
  * \param features  Pointer to array of strings.
  * \param size      Size of `features` pointer (number of strings passed in).
  *
@@ -1167,7 +1167,7 @@ XGB_DLL int XGBoosterSetStrFeatureInfo(BoosterHandle handle, const char *field,
  * function of XGBoost.
  *
  * \param handle       An instance of Booster
- * \param field        Feild name
+ * \param field        Field name
  * \param size         Size of output pointer `features` (number of strings returned).
  * \param out_features Address of a pointer to array of strings. Result is stored in
  *        thread local memory.
diff --git a/include/xgboost/intrusive_ptr.h b/include/xgboost/intrusive_ptr.h
index 879c0b48ac6f..9ebadb24bf8b 100644
--- a/include/xgboost/intrusive_ptr.h
+++ b/include/xgboost/intrusive_ptr.h
@@ -35,7 +35,7 @@ class IntrusivePtrCell {
 };
 
 /*!
- * \brief User defined function for returing embedded reference count.
+ * \brief User defined function for returning embedded reference count.
  */
 template <typename T> IntrusivePtrCell &IntrusivePtrRefCount(T const *ptr) noexcept;
 
diff --git a/include/xgboost/json.h b/include/xgboost/json.h
index db464e052839..bec8927b3583 100644
--- a/include/xgboost/json.h
+++ b/include/xgboost/json.h
@@ -72,7 +72,7 @@ T* Cast(U* value) {
   } else {
     LOG(FATAL) << "Invalid cast, from " + value->TypeStr() + " to " + T().TypeStr();
   }
-  return dynamic_cast<T*>(value);  // supress compiler warning.
+  return dynamic_cast<T*>(value);  // suppress compiler warning.
 }
 
 class JsonString : public Value {
diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
index dd19586d7005..b8f18022501f 100644
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -52,9 +52,9 @@ struct XGBAPIThreadLocalEntry {
   std::vector<bst_float> ret_vec_float;
   /*! \brief temp variable of gradient pairs. */
   std::vector<GradientPair> tmp_gpair;
-  /*! \brief Temp variable for returing prediction result. */
+  /*! \brief Temp variable for returning prediction result. */
   PredictionCacheEntry prediction_entry;
-  /*! \brief Temp variable for returing prediction shape. */
+  /*! \brief Temp variable for returning prediction shape. */
   std::vector<bst_ulong> prediction_shape;
 };
 
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index bdc4dbe60152..dbaf4b8003ad 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -16,7 +16,7 @@
 
 namespace xgboost {
 /*!
- * \brief A veiw over a matrix on contigious storage.
+ * \brief A view over a matrix on contiguous storage.
  *
  * \tparam T data type of matrix
  */
@@ -35,7 +35,7 @@ template <typename T> class MatrixView {
   /*!
    * \param vec     storage.
    * \param strides Strides for matrix.
-   * \param shape   Rows anc columns.
+   * \param shape   Rows and columns.
    * \param device  Where the data is stored in.
    */
   MatrixView(HostDeviceVector<T> *vec, std::array<size_t, 2> strides,
diff --git a/include/xgboost/model.h b/include/xgboost/model.h
index 3b661ae814b8..610c7a0f5c48 100644
--- a/include/xgboost/model.h
+++ b/include/xgboost/model.h
@@ -17,13 +17,13 @@ class Json;
 struct Model {
   virtual ~Model() = default;
   /*!
-   * \brief load the model from a json object
-   * \param in json object where to load the model from
+   * \brief load the model from a JSON object
+   * \param in JSON object where to load the model from
    */
   virtual void LoadModel(Json const& in) = 0;
   /*!
-   * \brief saves the model config to a json object
-   * \param out json container where to save the model to
+   * \brief saves the model config to a JSON object
+   * \param out JSON container where to save the model to
    */
   virtual void SaveModel(Json* out) const = 0;
 };
diff --git a/include/xgboost/span.h b/include/xgboost/span.h
index 4c8d57ae79bc..b8b35e644e39 100644
--- a/include/xgboost/span.h
+++ b/include/xgboost/span.h
@@ -5,7 +5,7 @@
  * About NOLINTs in this file:
  *
  *   If we want Span to work with std interface, like range for loop, the
- *   naming must be consistant with std, not XGBoost. Also, the interface also
+ *   naming must be consistent with std, not XGBoost. Also, the interface also
  *   conflicts with XGBoost coding style, specifically, the use of `explicit'
  *   keyword.
  *
@@ -51,8 +51,8 @@
  * possible.
  *
  * There are other workarounds for MSVC, like _Unwrapped, _Verify_range ...
- * Some of these are hiden magics of MSVC and I tried to avoid them. Should any
- * of them become needed, please consult the source code of GSL, and possibily
+ * Some of these are hidden magics of MSVC and I tried to avoid them. Should any
+ * of them become needed, please consult the source code of GSL, and possibly
  * some explanations from this thread:
  *
  *   https://github.com/Microsoft/GSL/pull/664
@@ -121,7 +121,7 @@ namespace detail {
 /*!
  * By default, XGBoost uses uint32_t for indexing data. int64_t covers all
  *   values uint32_t can represent. Also, On x86-64 Linux, GCC uses long int to
- *   represent ptrdiff_t, which is just int64_t. So we make it determinstic
+ *   represent ptrdiff_t, which is just int64_t. So we make it deterministic
  *   here.
  */
 using ptrdiff_t = typename std::conditional<  // NOLINT
@@ -354,7 +354,7 @@ XGBOOST_DEVICE bool LexicographicalCompare(InputIt1 first1, InputIt1 last1,
  *    Interface might be slightly different, we stick with ISO.
  *
  *    GSL uses C++14/17 features, which are not available here.
- *    GSL uses constexpr extensively, which is not possibile with limitation
+ *    GSL uses constexpr extensively, which is not possible with limitation
  *      of C++11.
  *    GSL doesn't concern about CUDA.
  *
@@ -371,7 +371,7 @@ XGBOOST_DEVICE bool LexicographicalCompare(InputIt1 first1, InputIt1 last1,
  *      in CUDA.
  *    Initializing from std::array is not supported.
  *
- *    ISO uses constexpr extensively, which is not possibile with limitation
+ *    ISO uses constexpr extensively, which is not possible with limitation
  *      of C++11.
  *    ISO uses C++14/17 features, which is not available here.
  *    ISO doesn't concern about CUDA.
@@ -408,7 +408,7 @@ XGBOOST_DEVICE bool LexicographicalCompare(InputIt1 first1, InputIt1 last1,
  *       beg++;                 // crash
  *       \endcode
  *
- *       While hoding a pointer or reference should avoid the problem, its a
+ *       While holding a pointer or reference should avoid the problem, it's a
  *       compromise. Since we have subspan, it's acceptable not to support
  *       passing iterator.
  */
diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h
index 87c810d55a13..fd2d5cc1b2e0 100644
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -396,7 +396,7 @@ class RegTree : public Model {
    * \brief Compares whether 2 trees are equal from a user's perspective.  The equality
    *        compares only non-deleted nodes.
    *
-   * \parm b The other tree.
+   * \param b The other tree.
    */
   bool Equal(const RegTree& b) const;
 
diff --git a/jvm-packages/README.md b/jvm-packages/README.md
index 7185e951c0ca..c4c8898dd4ed 100644
--- a/jvm-packages/README.md
+++ b/jvm-packages/README.md
@@ -107,7 +107,7 @@ be found in the [examples package](https://github.com/dmlc/xgboost/tree/master/j
 
 There is an inconsistent issue between XGBoost4J-Spark and other language bindings of XGBoost.
 
-When users use Spark to load trainingset/testset in LibSVM format with the following code snippet:
+When users use Spark to load trainingset/testset in LIBSVM format with the following code snippet:
 
 ```scala
 spark.read.format("libsvm").load("trainingset_libsvm")
diff --git a/rabit/doc/parameters.md b/rabit/doc/parameters.md
index 37580d5a13a9..eca8d0f5df5d 100644
--- a/rabit/doc/parameters.md
+++ b/rabit/doc/parameters.md
@@ -11,7 +11,7 @@ and do not need to be manually configured.
   - The port of rabit tracker
 * rabit_task_id [automatically detected]
   - The unique identifier of computing process
-  - When running on hadoop, this is automatically extracted from enviroment variable
+  - When running on Hadoop, this is automatically extracted from environment variable
 * rabit_reduce_buffer [default = 256MB]
   - The memory buffer used to store intermediate result of reduction
   - Format "digits + unit", can be 128M, 1G
diff --git a/rabit/include/rabit/c_api.h b/rabit/include/rabit/c_api.h
index deb21d06c240..77d4b17fd35e 100644
--- a/rabit/include/rabit/c_api.h
+++ b/rabit/include/rabit/c_api.h
@@ -25,7 +25,7 @@
 typedef unsigned long rbt_ulong;  // NOLINT(*)
 
 /*!
- * \brief intialize the rabit module,
+ * \brief initialize the rabit module,
  *  call this once before using anything
  *  The additional arguments is not necessary.
  *  Usually rabit will detect settings
@@ -87,7 +87,7 @@ RABIT_DLL void RabitGetProcessorName(char *out_name,
  * \brief broadcast an memory region to all others from root
  *
  *     Example: int a = 1; Broadcast(&a, sizeof(a), root);
- * \param sendrecv_data the pointer to send or recive buffer,
+ * \param sendrecv_data the pointer to send or receive buffer,
  * \param size the size of the data
  * \param root the root of process
  */
@@ -122,12 +122,12 @@ RABIT_DLL int RabitAllgather(void *sendrecvbuf, size_t total_size,
  *     ...
  *     Allreduce<op::Sum>(&data[0], data.size());
  *     ...
- * \param sendrecvbuf buffer for both sending and recving data
+ * \param sendrecvbuf buffer for both sending and receiving data
  * \param count number of elements to be reduced
  * \param enum_dtype the enumeration of data type, see rabit::engine::mpi::DataType in engine.h of rabit include
  * \param enum_op the enumeration of operation type, see rabit::engine::mpi::OpType in engine.h of rabit
  * \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg)
- *                    will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
+ *                    will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf_.
  *                     If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
  * \param prepare_arg argument used to passed into the lazy preprocessing function
  */
diff --git a/rabit/include/rabit/internal/rabit-inl.h b/rabit/include/rabit/internal/rabit-inl.h
index 701f801e80ef..9289ea8806e6 100644
--- a/rabit/include/rabit/internal/rabit-inl.h
+++ b/rabit/include/rabit/internal/rabit-inl.h
@@ -102,7 +102,7 @@ inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &
 }
 }  // namespace op
 
-// intialize the rabit engine
+// initialize the rabit engine
 inline bool Init(int argc, char *argv[]) {
   return engine::Init(argc, argv);
 }
diff --git a/rabit/include/rabit/internal/socket.h b/rabit/include/rabit/internal/socket.h
index b4d77e15cb8b..bdd38fe8fbdf 100644
--- a/rabit/include/rabit/internal/socket.h
+++ b/rabit/include/rabit/internal/socket.h
@@ -615,7 +615,7 @@ struct PollHelper {
   }
 
   /*!
-   * \brief peform poll on the set defined, read, write, exception
+   * \brief perform poll on the set defined, read, write, exception
    * \param timeout specify timeout in milliseconds(ms) if negative, means poll will block
    * \return
    */
diff --git a/rabit/src/allreduce_base.cc b/rabit/src/allreduce_base.cc
index 5c3bb363d216..ffed988efb98 100644
--- a/rabit/src/allreduce_base.cc
+++ b/rabit/src/allreduce_base.cc
@@ -40,7 +40,7 @@ AllreduceBase::AllreduceBase() {
   err_link = nullptr;
   dmlc_role = "worker";
   this->SetParam("rabit_reduce_buffer", "256MB");
-  // setup possible enviroment variable of interest
+  // setup possible environment variable of interest
   // include dmlc support direct variables
   env_vars.emplace_back("DMLC_TASK_ID");
   env_vars.emplace_back("DMLC_ROLE");
@@ -52,7 +52,7 @@ AllreduceBase::AllreduceBase() {
 
 // initialization function
 bool AllreduceBase::Init(int argc, char* argv[]) {
-  // setup from enviroment variables
+  // setup from environment variables
   // handler to get variables from env
   for (auto & env_var : env_vars) {
     const char *value = getenv(env_var.c_str());
@@ -294,7 +294,7 @@ bool AllreduceBase::ReConnectLinks(const char *cmd) {
     rank = newrank;
 
     if (rank == -1) {
-      LOG(FATAL) << "tracker got overwhelemed and not able to assign correct rank";
+      LOG(FATAL) << "tracker got overwhelmed and not able to assign correct rank";
     }
 
     LOG(CONSOLE) << "task " << task_id << " got new rank " << rank;
@@ -455,7 +455,7 @@ bool AllreduceBase::ReConnectLinks(const char *cmd) {
  *    It only means the current node get the correct result of Allreduce.
  *    However, it means every node finishes LAST call(instead of this one) of Allreduce/Bcast
  *
- * \param sendrecvbuf_ buffer for both sending and recving data
+ * \param sendrecvbuf_ buffer for both sending and receiving data
  * \param type_nbytes the unit number of bytes the type have
  * \param count number of elements to be reduced
  * \param reducer reduce function
@@ -477,7 +477,7 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_,
  * \brief perform in-place allreduce, on sendrecvbuf,
  * this function implements tree-shape reduction
  *
- * \param sendrecvbuf_ buffer for both sending and recving data
+ * \param sendrecvbuf_ buffer for both sending and receiving data
  * \param type_nbytes the unit number of bytes the type have
  * \param count number of elements to be reduced
  * \param reducer reduce function
@@ -513,7 +513,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
     }
     links[i].ResetSize();
   }
-  // if no childs, no need to reduce
+  // if no children, no need to reduce
   if (nlink == static_cast<int>(parent_index != -1)) {
     size_up_reduce = total_size;
   }
@@ -548,7 +548,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
         }
       }
     }
-    // finish runing allreduce
+    // finish running allreduce
     if (finished) break;
     // select must return
     watcher.Poll(timeout_sec);
@@ -566,7 +566,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
         }
       }
     }
-    // this node have childs, peform reduce
+    // this node have children, perform reduce
     if (nlink > static_cast<int>(parent_index != -1)) {
       size_t buffer_size = 0;
       // do upstream reduce
@@ -584,16 +584,16 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
       max_reduce = (max_reduce / type_nbytes * type_nbytes);
 
       // if max reduce is less than total size, we reduce multiple times of
-      // eachreduce size
+      // each reduce size
       if (max_reduce < total_size) {
           max_reduce = max_reduce - max_reduce % eachreduce;
       }
 
-      // peform reduce, can be at most two rounds
+      // perform reduce, can be at most two rounds
       while (size_up_reduce < max_reduce) {
         // start position
         size_t start = size_up_reduce % buffer_size;
-        // peform read till end of buffer
+        // perform read till end of buffer
         size_t nread = std::min(buffer_size - start,
                                 max_reduce - size_up_reduce);
         utils::Assert(nread % type_nbytes == 0, "Allreduce: size check");
@@ -659,7 +659,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
       // this is root, can use reduce as most recent point
       size_down_in = size_up_out = size_up_reduce;
     }
-    // can pass message down to childs
+    // can pass message down to children
     for (int i = 0; i < nlink; ++i) {
       if (i != parent_index && links[i].size_write < size_down_in) {
         ReturnType ret = links[i].WriteFromArray(sendrecvbuf, size_down_in);
@@ -673,7 +673,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
 }
 /*!
  * \brief broadcast data from root to all nodes, this function can fail,and will return the cause of failure
- * \param sendrecvbuf_ buffer for both sending and recving data
+ * \param sendrecvbuf_ buffer for both sending and receiving data
  * \param total_size the size of the data to be broadcasted
  * \param root the root worker id to broadcast the data
  * \return this function can return kSuccess, kSockError, kGetExcept, see ReturnType for details
@@ -851,7 +851,7 @@ AllreduceBase::TryAllgatherRing(void *sendrecvbuf_, size_t total_size,
  *
  *  Ring-based algorithm
  *
- * \param sendrecvbuf_ buffer for both sending and recving data
+ * \param sendrecvbuf_ buffer for both sending and receiving data
  * \param type_nbytes the unit number of bytes the type have
  * \param count number of elements to be reduced
  * \param reducer reduce function
@@ -952,7 +952,7 @@ AllreduceBase::TryReduceScatterRing(void *sendrecvbuf_,
  * \brief perform in-place allreduce, on sendrecvbuf
  *  use a ring based algorithm
  *
- * \param sendrecvbuf_ buffer for both sending and recving data
+ * \param sendrecvbuf_ buffer for both sending and receiving data
  * \param type_nbytes the unit number of bytes the type have
  * \param count number of elements to be reduced
  * \param reducer reduce function
diff --git a/rabit/src/allreduce_base.h b/rabit/src/allreduce_base.h
index 2f0268a77b5c..14d2b7db8e73 100644
--- a/rabit/src/allreduce_base.h
+++ b/rabit/src/allreduce_base.h
@@ -111,12 +111,12 @@ class AllreduceBase : public IEngine {
   /*!
    * \brief perform in-place allreduce, on sendrecvbuf
    *        this function is NOT thread-safe
-   * \param sendrecvbuf_ buffer for both sending and recving data
+   * \param sendrecvbuf_ buffer for both sending and receiving data
    * \param type_nbytes the unit number of bytes the type have
    * \param count number of elements to be reduced
    * \param reducer reduce function
    * \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg)
-   *                     will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
+   *                     will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf_.
    *                     If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
    * \param prepare_arg argument used to passed into the lazy preprocessing function
    */
@@ -131,7 +131,7 @@ class AllreduceBase : public IEngine {
   }
   /*!
    * \brief broadcast data from root to all nodes
-   * \param sendrecvbuf_ buffer for both sending and recving data
+   * \param sendrecvbuf_ buffer for both sending and receiving data
    * \param size the size of the data to be broadcasted
    * \param root the root worker id to broadcast the data
    * \param _file caller file name used to generate unique cache key
@@ -146,7 +146,7 @@ class AllreduceBase : public IEngine {
   /*!
    * \brief load latest check point
    * \param global_model pointer to the globally shared model/state
-   *   when calling this function, the caller need to gauranttees that global_model
+   *   when calling this function, the caller need to guarantees that global_model
    *   is the same in all nodes
    * \param local_model pointer to local model, that is specific to current node/rank
    *   this can be NULL when no local model is needed
@@ -174,7 +174,7 @@ class AllreduceBase : public IEngine {
    *  every time we call check point, there is a version number which will increase by one
    *
    * \param global_model pointer to the globally shared model/state
-   *   when calling this function, the caller need to gauranttees that global_model
+   *   when calling this function, the caller need to guarantees that global_model
    *   is the same in all nodes
    * \param local_model pointer to local model, that is specific to current node/rank
    *   this can be NULL when no local state is needed
@@ -191,11 +191,11 @@ class AllreduceBase : public IEngine {
   }
   /*!
    * \brief This function can be used to replace CheckPoint for global_model only,
-   *   when certain condition is met(see detailed expplaination).
+   *   when certain condition is met(see detailed explanation).
    *
    *   This is a "lazy" checkpoint such that only the pointer to global_model is
    *   remembered and no memory copy is taken. To use this function, the user MUST ensure that:
-   *   The global_model must remain unchanged util last call of Allreduce/Broadcast in current version finishs.
+   *   The global_model must remain unchanged until the last call of Allreduce/Broadcast in current version finishes.
    *   In another words, global_model model can be changed only between last call of
    *   Allreduce/Broadcast and LazyCheckPoint in current version
    *
@@ -205,7 +205,7 @@ class AllreduceBase : public IEngine {
    *   If user can only changes global_model in code3, then LazyCheckPoint can be used to
    *   improve efficiency of the program.
    * \param global_model pointer to the globally shared model/state
-   *   when calling this function, the caller need to gauranttees that global_model
+   *   when calling this function, the caller need to guarantees that global_model
    *   is the same in all nodes
    * \sa LoadCheckPoint, CheckPoint, VersionNumber
    */
@@ -405,7 +405,7 @@ class AllreduceBase : public IEngine {
    *    It only means the current node get the correct result of Allreduce.
    *    However, it means every node finishes LAST call(instead of this one) of Allreduce/Bcast
    *
-   * \param sendrecvbuf_ buffer for both sending and recving data
+   * \param sendrecvbuf_ buffer for both sending and receiving data
    * \param type_nbytes the unit number of bytes the type have
    * \param count number of elements to be reduced
    * \param reducer reduce function
@@ -429,7 +429,7 @@ class AllreduceBase : public IEngine {
    * \brief perform in-place allreduce, on sendrecvbuf,
    * this function implements tree-shape reduction
    *
-   * \param sendrecvbuf_ buffer for both sending and recving data
+   * \param sendrecvbuf_ buffer for both sending and receiving data
    * \param type_nbytes the unit number of bytes the type have
    * \param count number of elements to be reduced
    * \param reducer reduce function
@@ -465,7 +465,7 @@ class AllreduceBase : public IEngine {
    *  the k-th segment is defined by [k * step, min((k + 1) * step,count) )
    *  where step = ceil(count / world_size)
    *
-   * \param sendrecvbuf_ buffer for both sending and recving data
+   * \param sendrecvbuf_ buffer for both sending and receiving data
    * \param type_nbytes the unit number of bytes the type have
    * \param count number of elements to be reduced
    * \param reducer reduce function
@@ -480,7 +480,7 @@ class AllreduceBase : public IEngine {
    * \brief perform in-place allreduce, on sendrecvbuf
    *  use a ring based algorithm, reduce-scatter + allgather
    *
-   * \param sendrecvbuf_ buffer for both sending and recving data
+   * \param sendrecvbuf_ buffer for both sending and receiving data
    * \param type_nbytes the unit number of bytes the type have
    * \param count number of elements to be reduced
    * \param reducer reduce function
@@ -505,7 +505,7 @@ class AllreduceBase : public IEngine {
   int seq_counter{0}; // NOLINT
   // version number of model
   int version_number {0};  // NOLINT
-  // whether the job is running in hadoop
+  // whether the job is running in Hadoop
   bool hadoop_mode;  // NOLINT
   //---- local data related to link ----
   // index of parent link, can be -1, meaning this is root of the tree
@@ -540,9 +540,9 @@ class AllreduceBase : public IEngine {
   size_t reduce_buffer_size;  // NOLINT
   // reduction method
   int reduce_method;  // NOLINT
-  // mininum count of cells to use ring based method
+  // minimum count of cells to use ring based method
   size_t reduce_ring_mincount;  // NOLINT
-  // minimul block size per tree reduce
+  // minimum block size per tree reduce
   size_t tree_reduce_minsize;  // NOLINT
   // current rank
   int rank;  // NOLINT
diff --git a/rabit/src/engine.cc b/rabit/src/engine.cc
index 48cbc0e45e4c..36e28a1771c1 100644
--- a/rabit/src/engine.cc
+++ b/rabit/src/engine.cc
@@ -121,7 +121,7 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
                              size_t type_nbytes, size_t count,
                              IEngine::PreprocFunction prepare_fun,
                              void *prepare_arg) {
-  utils::Assert(redfunc_ != nullptr, "must intialize handle to call AllReduce");
+  utils::Assert(redfunc_ != nullptr, "must initialize handle to call AllReduce");
   GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count,
                          redfunc_, prepare_fun, prepare_arg);
 }
diff --git a/rabit/src/engine_mpi.cc b/rabit/src/engine_mpi.cc
index 2b7bd78fd31a..c5811cb76a6c 100644
--- a/rabit/src/engine_mpi.cc
+++ b/rabit/src/engine_mpi.cc
@@ -223,7 +223,7 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
                              size_t type_nbytes, size_t count,
                              IEngine::PreprocFunction prepare_fun,
                              void *prepare_arg) {
-  utils::Assert(handle_ != NULL, "must intialize handle to call AllReduce");
+  utils::Assert(handle_ != NULL, "must initialize handle to call AllReduce");
   MPI::Op *op = reinterpret_cast<MPI::Op*>(handle_);
   MPI::Datatype *dtype = reinterpret_cast<MPI::Datatype*>(htype_);
   if (created_type_nbytes_ != type_nbytes || dtype == NULL) {
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index b8c42eef8b2f..f9a9bc15e300 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -638,10 +638,10 @@ XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle,
                                         bst_float const **out_result) {
   API_BEGIN();
   if (handle == nullptr) {
-    LOG(FATAL) << "Booster has not been intialized or has already been disposed.";
+    LOG(FATAL) << "Booster has not been initialized or has already been disposed.";
   }
   if (dmat == nullptr) {
-    LOG(FATAL) << "DMatrix has not been intialized or has already been disposed.";
+    LOG(FATAL) << "DMatrix has not been initialized or has already been disposed.";
   }
   auto config = Json::Load(StringView{c_json_config});
 
diff --git a/src/c_api/c_api_error.h b/src/c_api/c_api_error.h
index ffe0591d9162..cb2255dc3aae 100644
--- a/src/c_api/c_api_error.h
+++ b/src/c_api/c_api_error.h
@@ -36,7 +36,7 @@
   return 0; // NOLINT(*)
 
 #define CHECK_HANDLE() if (handle == nullptr) \
-  LOG(FATAL) << "DMatrix/Booster has not been intialized or has already been disposed.";
+  LOG(FATAL) << "DMatrix/Booster has not been initialized or has already been disposed.";
 
 /*!
  * \brief Set the last error message needed by C API
diff --git a/src/common/charconv.cc b/src/common/charconv.cc
index 05af54f9653d..8be2c0a810b4 100644
--- a/src/common/charconv.cc
+++ b/src/common/charconv.cc
@@ -1,7 +1,7 @@
 /*!
  * Copyright 2020 by XGBoost Contributors
  *
- * \brief An implemenation of Ryu algorithm:
+ * \brief An implementation of Ryu algorithm:
  *
  * https://dl.acm.org/citation.cfm?id=3192369
  *
@@ -686,7 +686,7 @@ int32_t ToCharsFloatImpl(float f, char * const result) {
 
 // This is an implementation for base 10 inspired by the one in libstdc++v3.  The general
 // scheme is by decomposing the value into multiple combination of base (which is 10) by
-// mod, until the value is lesser than 10, then last char is just char '0' (ascii 48) plus
+// mod, until the value is lesser than 10, then last char is just char '0' (ASCII 48) plus
 // that value.  Other popular implementations can be found in RapidJson and libc++ (in
 // llvm-project), which uses the same general work flow with the same look up table, but
 // probably with better performance as they are more complicated.
diff --git a/src/common/common.h b/src/common/common.h
index 1f9f23e9884a..d8d42f75ca6a 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -55,7 +55,7 @@ namespace xgboost {
 namespace common {
 /*!
  * \brief Split a string by delimiter
- * \param s String to be splitted.
+ * \param s String to be split.
  * \param delim The delimiter.
  */
 inline std::vector<std::string> Split(const std::string& s, char delim) {
diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
index 6693b2258658..271f4ac36149 100644
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -253,7 +253,7 @@ __global__ void LaunchNKernel(int device_idx, size_t begin, size_t end,
  *   function as argument.  Hence functions like `LaunchN` cannot use this wrapper.
  *
  * - With c++ initialization list `{}` syntax, you are forced to comply with the CUDA type
- *   spcification.
+ *   specification.
  */
 class LaunchKernel {
   size_t shmem_size_;
@@ -930,7 +930,7 @@ class SegmentSorter {
   // Items sorted within the group
   caching_device_vector<T> ditems_;
 
-  // Original position of the items before they are sorted descendingly within its groups
+  // Original position of the items before they are sorted descending within their groups
   caching_device_vector<uint32_t> doriginal_pos_;
 
   // Segments within the original list that delineates the different groups
diff --git a/src/common/hist_util.h b/src/common/hist_util.h
index a4c9a0cc144a..abfbbebcd50f 100644
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -81,7 +81,7 @@ class HistogramCuts {
   }
 
   // Getters.  Cuts should be of no use after building histogram indices, but currently
-  // it's deeply linked with quantile_hist, gpu sketcher and gpu_hist.  So we preserve
+  // they are deeply linked with quantile_hist, gpu sketcher and gpu_hist, so we preserve
   // these for now.
   std::vector<uint32_t> const& Ptrs()      const { return cut_ptrs_.ConstHostVector();   }
   std::vector<float>    const& Values()    const { return cut_values_.ConstHostVector(); }
@@ -247,7 +247,7 @@ struct GHistIndexMatrix {
   // Create a global histogram matrix, given cut
   void Init(DMatrix* p_fmat, int max_num_bins);
 
-  // specific method for sparse data as no posibility to reduce allocated memory
+  // specific method for sparse data as no possibility to reduce allocated memory
   template <typename BinIdxType, typename GetOffset>
   void SetIndexData(common::Span<BinIdxType> index_data_span,
                     size_t batch_threads, const SparsePage &batch,
diff --git a/src/common/json.cc b/src/common/json.cc
index 9502073d7df6..18dc1c71ce20 100644
--- a/src/common/json.cc
+++ b/src/common/json.cc
@@ -394,7 +394,7 @@ Json JsonReader::Parse() {
       return ParseArray();
     } else if ( c == '-' || std::isdigit(c) ||
                 c == 'N' || c == 'I') {
-      // For now we only accept `NaN`, not `nan` as the later violiates LR(1) with `null`.
+      // For now we only accept `NaN`, not `nan` as the later violates LR(1) with `null`.
       return ParseNumber();
     } else if ( c == '\"' ) {
       return ParseString();
diff --git a/src/common/math.h b/src/common/math.h
index c189babee954..5a98ad329ce4 100644
--- a/src/common/math.h
+++ b/src/common/math.h
@@ -77,7 +77,7 @@ XGBOOST_DEVICE inline void Softmax(Iterator start, Iterator end) {
 
 /*!
  * \brief Find the maximum iterator within the iterators
- * \param begin The begining iterator.
+ * \param begin The beginning iterator.
  * \param end The end iterator.
  * \return the iterator point to the maximum value.
  * \tparam Iterator The type of the iterator.
@@ -107,7 +107,7 @@ inline float LogSum(float x, float y) {
 
 /*!
  * \brief perform numerically safe logsum
- * \param begin The begining iterator.
+ * \param begin The beginning iterator.
  * \param end The end iterator.
  * \return the iterator point to the maximum value.
  * \tparam Iterator The type of the iterator.
@@ -135,7 +135,7 @@ inline static bool CmpSecond(const std::pair<float, unsigned> &a,
   return a.second > b.second;
 }
 
-// Redefined here to workaround a VC bug that doesn't support overloadng for integer
+// Redefined here to workaround a VC bug that doesn't support overloading for integer
 // types.
 template <typename T>
 XGBOOST_DEVICE typename std::enable_if<
diff --git a/src/common/quantile.cc b/src/common/quantile.cc
index 3352069935d6..e67d7daec115 100644
--- a/src/common/quantile.cc
+++ b/src/common/quantile.cc
@@ -55,7 +55,7 @@ HostSketchContainer::CalcColumnSize(SparsePage const &batch,
 std::vector<bst_feature_t> HostSketchContainer::LoadBalance(
     SparsePage const &batch, bst_feature_t n_columns, size_t const nthreads) {
   /* Some sparse datasets have their mass concentrating on small number of features.  To
-   * avoid wating for a few threads running forever, we here distirbute different number
+   * avoid waiting for a few threads running forever, we here distribute different number
    * of columns to different threads according to number of entries.
    */
   auto page = batch.GetView();
diff --git a/src/common/quantile.cu b/src/common/quantile.cu
index 6c5f162c33c1..33892d589bfc 100644
--- a/src/common/quantile.cu
+++ b/src/common/quantile.cu
@@ -184,9 +184,9 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
       });
 
   // Compute the index for both x and y (which of the element in a and b are used in each
-  // comparison) by scaning the binary merge path.  Take output [(x_0, y_0), (x_0, y_1),
+  // comparison) by scanning the binary merge path.  Take output [(x_0, y_0), (x_0, y_1),
   // ...] as an example, the comparison between (x_0, y_0) adds 1 step in the merge path.
-  // Asumming y_0 is less than x_0 so this step is torward the end of y.  After the
+  // Assuming y_0 is less than x_0 so this step is toward the end of y.  After the
   // comparison, index of y is incremented by 1 from y_0 to y_1, and at the same time, y_0
   // is landed into output as the first element in merge result.  The scan result is the
   // subscript of x and y.
@@ -367,7 +367,7 @@ void SketchContainer::Push(Span<Entry const> entries, Span<size_t> columns_ptr,
 size_t SketchContainer::ScanInput(Span<SketchEntry> entries, Span<OffsetT> d_columns_ptr_in) {
   /* There are 2 types of duplication.  First is duplicated feature values, which comes
    * from user input data.  Second is duplicated sketching entries, which is generated by
-   * prunning or merging. We preserve the first type and remove the second type.
+   * pruning or merging. We preserve the first type and remove the second type.
    */
   timer_.Start(__func__);
   dh::safe_cuda(cudaSetDevice(device_));
diff --git a/src/common/survival_util.h b/src/common/survival_util.h
index 1a384e8de232..e891edb5428c 100644
--- a/src/common/survival_util.h
+++ b/src/common/survival_util.h
@@ -44,7 +44,7 @@ constexpr double kMaxGradient = 15.0;
 constexpr double kMinHessian = 1e-16;  // Ensure that no data point gets zero hessian
 constexpr double kMaxHessian = 15.0;
 
-constexpr double kEps = 1e-12;  // A denomitor in a fraction should not be too small
+constexpr double kEps = 1e-12;  // A denominator in a fraction should not be too small
 
 // Clip (limit) x to fit range [x_min, x_max].
 // If x < x_min, return x_min; if x > x_max, return x_max; if x_min <= x <= x_max, return x.
diff --git a/src/common/transform.h b/src/common/transform.h
index 85f1c0c43245..79c97391e730 100644
--- a/src/common/transform.h
+++ b/src/common/transform.h
@@ -52,7 +52,7 @@ __global__ void LaunchCUDAKernel(Functor _func, Range _range,
  *
  *     If you use it in a function that can be compiled by both nvcc and host
  *     compiler, the behaviour is un-defined!  Because your function is NOT
- *     duplicated by `CompiledWithCuda`. At link time, cuda compiler resolution
+ *     duplicated by `CompiledWithCuda`. At link time, CUDA compiler resolution
  *     will merge functions with same signature.
  */
 template <bool CompiledWithCuda = WITH_CUDA()>
@@ -155,7 +155,7 @@ class Transform {
           _func, shard_range, UnpackHDVOnDevice(_vectors)...);
     }
 #else
-    /*! \brief Dummy funtion defined when compiling for CPU.  */
+    /*! \brief Dummy function defined when compiling for CPU.  */
     template <typename std::enable_if<!CompiledWithCuda>::type* = nullptr,
               typename... HDV>
     void LaunchCUDA(Functor _func, HDV*...) const {
diff --git a/src/common/version.cc b/src/common/version.cc
index 6f1af9fa6475..d5407e04b1b4 100644
--- a/src/common/version.cc
+++ b/src/common/version.cc
@@ -36,7 +36,7 @@ Version::TripletT Version::Load(Json const& in) {
 
 Version::TripletT Version::Load(dmlc::Stream* fi) {
   XGBoostVersionT major{0}, minor{0}, patch{0};
-  // This is only used in DMatrix serialization, so doesn't break model compability.
+  // This is only used in DMatrix serialization, so doesn't break model compatibility.
   std::string msg { "Incorrect version format found in binary file.  "
                     "Binary file from XGBoost < 1.0.0 is no longer supported. "
                     "Please generate it again." };
diff --git a/src/common/version.h b/src/common/version.h
index f143ed36afd6..cf562abd98c5 100644
--- a/src/common/version.h
+++ b/src/common/version.h
@@ -17,7 +17,7 @@ struct Version {
   using TripletT = std::tuple<XGBoostVersionT, XGBoostVersionT, XGBoostVersionT>;
   static const TripletT kInvalid;
 
-  // Save/Load version info to Json document
+  // Save/Load version info to JSON document
   static TripletT Load(Json const& in);
   static void Save(Json* out);
 
diff --git a/src/data/adapter.h b/src/data/adapter.h
index 92b4a8b9b21f..80c14e27273c 100644
--- a/src/data/adapter.h
+++ b/src/data/adapter.h
@@ -42,7 +42,7 @@ namespace data {
  * This abstraction allows us to read through different sparse matrix formats
  * using the same interface. In particular we can write a DMatrix constructor
  * that uses the same code to construct itself from a CSR matrix, CSC matrix,
- * dense matrix, csv, libsvm file, or potentially other formats. To see why this
+ * dense matrix, CSV, LIBSVM file, or potentially other formats. To see why this
  * is necessary, imagine we have 5 external matrix formats and 5 internal
  * DMatrix types where each DMatrix needs a custom constructor for each possible
  * input. The number of constructors is 5*5=25. Using an abstraction over the
@@ -736,7 +736,7 @@ class IteratorAdapter : public dmlc::DataIter<FileAdapterBatch> {
 
   size_t columns_;
   size_t row_offset_;
-  // at the beinning.
+  // at the beginning.
   bool at_first_;
   // handle to the iterator,
   DataIterHandle data_handle_;
diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu
index 7cc49a601221..59eee0e6ff99 100644
--- a/src/data/ellpack_page.cu
+++ b/src/data/ellpack_page.cu
@@ -187,7 +187,7 @@ template <typename AdapterBatchT>
 void CopyDataToEllpack(const AdapterBatchT& batch, EllpackPageImpl* dst,
                        int device_idx, float missing) {
   // Some witchcraft happens here
-  // The goal is to copy valid elements out of the input to an ellpack matrix
+  // The goal is to copy valid elements out of the input to an ELLPACK matrix
   // with a given row stride, using no extra working memory Standard stream
   // compaction needs to be modified to do this, so we manually define a
   // segmented stream compaction via operators on an inclusive scan. The output
diff --git a/src/data/ellpack_page.cuh b/src/data/ellpack_page.cuh
index 128e8a7ee9d1..ddee683ed51e 100644
--- a/src/data/ellpack_page.cuh
+++ b/src/data/ellpack_page.cuh
@@ -13,13 +13,13 @@
 #include <thrust/binary_search.h>
 
 namespace xgboost {
-/** \brief Struct for accessing and manipulating an ellpack matrix on the
+/** \brief Struct for accessing and manipulating an ELLPACK matrix on the
  * device. Does not own underlying memory and may be trivially copied into
  * kernels.*/
 struct EllpackDeviceAccessor {
   /*! \brief Whether or not if the matrix is dense. */
   bool is_dense;
-  /*! \brief Row length for ELLPack, equal to number of features. */
+  /*! \brief Row length for ELLPACK, equal to number of features. */
   size_t row_stride;
   size_t base_rowid{};
   size_t n_rows{};
@@ -197,11 +197,11 @@ class EllpackPageImpl {
 public:
   /*! \brief Whether or not if the matrix is dense. */
   bool is_dense;
-  /*! \brief Row length for ELLPack. */
+  /*! \brief Row length for ELLPACK. */
   size_t row_stride;
   size_t base_rowid{0};
   size_t n_rows{};
-  /*! \brief global index of histogram, which is stored in ELLPack format. */
+  /*! \brief global index of histogram, which is stored in ELLPACK format. */
   HostDeviceVector<common::CompressedByteT> gidx_buffer;
 
  private:
diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc
index cc7e1d9d62e4..50d5b963b03d 100644
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -563,7 +563,7 @@ GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
   // GPU_Hist by default has prediction cache calculated from quantile values,
   // so GPU Predictor is not used for training dataset.  But when XGBoost
   // performs continue training with an existing model, the prediction cache is
-  // not availbale and number of trees doesn't equal zero, the whole training
+  // not available and number of trees doesn't equal zero, the whole training
   // dataset got copied into GPU for precise prediction.  This condition tries
   // to avoid such copy by calling CPU Predictor instead.
   if ((out_pred && out_pred->Size() == 0) && (model_.param.num_trees != 0) &&
@@ -831,7 +831,7 @@ class Dart : public GBTree {
 #pragma omp parallel for
         for (omp_ulong ridx = 0; ridx < n_rows; ++ridx) {
           const size_t offset = ridx * n_groups + group;
-          // Need to remove the base margin from indiviual tree.
+          // Need to remove the base margin from individual tree.
           h_out_predts[offset] +=
               (h_predts[offset] - model_.learner_model_param->base_score) * w;
         }
diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc
index bc690dea9baa..193938c0f8e6 100644
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@@ -5,9 +5,9 @@
 // possible for a valid device ordinal to be present for non GPU builds. However, it is possible
 // for an invalid device ordinal to be specified in GPU builds - to train/predict and/or compute
 // the metrics on CPU. To accommodate these scenarios, the following is done for the metrics
-// accelarated on the GPU.
+// accelerated on the GPU.
 // - An internal GPU registry holds all the GPU metric types (defined in the .cu file)
-// - An instance of the appropriate gpu metric type is created when a device ordinal is present
+// - An instance of the appropriate GPU metric type is created when a device ordinal is present
 // - If the creation is successful, the metric computation is done on the device
 // - else, it falls back on the CPU
 // - The GPU metric types are *only* registered when xgboost is built for GPUs
@@ -561,7 +561,7 @@ XGBOOST_REGISTER_METRIC(MAP, "map")
 .set_body([](const char* param) { return new EvalMAP("map", param); });
 
 XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik")
-.describe("Negative log partial likelihood of Cox proportioanl hazards model.")
+.describe("Negative log partial likelihood of Cox proportional hazards model.")
 .set_body([](const char*) { return new EvalCox(); });
 }  // namespace metric
 }  // namespace xgboost
diff --git a/src/objective/regression_obj.cu b/src/objective/regression_obj.cu
index 89bbb5081af3..cb5e0f48e5f7 100644
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -271,7 +271,7 @@ class PoissonRegression : public ObjFunction {
 DMLC_REGISTER_PARAMETER(PoissonRegressionParam);
 
 XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson")
-.describe("Possion regression for count data.")
+.describe("Poisson regression for count data.")
 .set_body([]() { return new PoissonRegression(); });
 
 
diff --git a/src/tree/constraints.h b/src/tree/constraints.h
index facc5ebeea34..580576a5889d 100644
--- a/src/tree/constraints.h
+++ b/src/tree/constraints.h
@@ -17,7 +17,7 @@ namespace xgboost {
 /*!
  * \brief Feature interaction constraint implementation for CPU tree updaters.
  *
- * The interface is similiar to the one for GPU Hist.
+ * The interface is similar to the one for GPU Hist.
  */
 class FeatureInteractionConstraintHost {
  protected:
diff --git a/src/tree/gpu_hist/evaluate_splits.cu b/src/tree/gpu_hist/evaluate_splits.cu
index 7d7edcb55e36..308d7fde6fad 100644
--- a/src/tree/gpu_hist/evaluate_splits.cu
+++ b/src/tree/gpu_hist/evaluate_splits.cu
@@ -125,7 +125,7 @@ struct UpdateNumeric {
                              EvaluateSplitInputs<GradientSumT> const &inputs,
                              DeviceSplitCandidate *best_split) {
     // Use pointer from cut to indicate begin and end of bins for each feature.
-    uint32_t gidx_begin = inputs.feature_segments[fidx];  // begining bin
+    uint32_t gidx_begin = inputs.feature_segments[fidx];  // beginning bin
     int split_gidx = (scan_begin + threadIdx.x) - 1;
     float fvalue;
     if (split_gidx < static_cast<int>(gidx_begin)) {
@@ -152,7 +152,7 @@ __device__ void EvaluateFeature(
     TempStorageT* temp_storage         // temp memory for cub operations
 ) {
   // Use pointer from cut to indicate begin and end of bins for each feature.
-  uint32_t gidx_begin = inputs.feature_segments[fidx];  // begining bin
+  uint32_t gidx_begin = inputs.feature_segments[fidx];  // beginning bin
   uint32_t gidx_end =
       inputs.feature_segments[fidx + 1];  // end bin for i^th feature
   auto feature_hist = inputs.gradient_histogram.subspan(gidx_begin, gidx_end - gidx_begin);
diff --git a/src/tree/gpu_hist/gradient_based_sampler.cuh b/src/tree/gpu_hist/gradient_based_sampler.cuh
index 7394804d195f..3208e85a2d92 100644
--- a/src/tree/gpu_hist/gradient_based_sampler.cuh
+++ b/src/tree/gpu_hist/gradient_based_sampler.cuh
@@ -124,7 +124,7 @@ class ExternalMemoryGradientBasedSampling : public SamplingStrategy {
  * Processing Systems (pp. 3146-3154).
  * \see Zhu, R. (2016). Gradient-based sampling: An adaptive importance sampling for least-squares.
  * In Advances in Neural Information Processing Systems (pp. 406-414).
- * \see Ohlsson, E. (1998). Sequential poisson sampling. Journal of official Statistics, 14(2), 149.
+ * \see Ohlsson, E. (1998). Sequential Poisson sampling. Journal of official Statistics, 14(2), 149.
  */
 class GradientBasedSampler {
  public:
diff --git a/src/tree/gpu_hist/histogram.cu b/src/tree/gpu_hist/histogram.cu
index 1c03034eaaf1..aae2fbc04da7 100644
--- a/src/tree/gpu_hist/histogram.cu
+++ b/src/tree/gpu_hist/histogram.cu
@@ -17,7 +17,7 @@
 
 namespace xgboost {
 namespace tree {
-// Following 2 functions are slightly modifed version of fbcuda.
+// Following 2 functions are slightly modified version of fbcuda.
 
 /* \brief Constructs a rounding factor used to truncate elements in a sum such that the
    sum of the truncated elements is the same no matter what the order of the sum is.
diff --git a/src/tree/param.h b/src/tree/param.h
index 2cae7686e6e1..0367dd2deec4 100644
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -76,7 +76,7 @@ struct TrainParam : public XGBoostParameter<TrainParam> {
   // the criteria to use for ranking splits
   std::string split_evaluator;
 
-  // ------ From cpu quantile histogram -------.
+  // ------ From CPU quantile histogram -------.
   // percentage threshold for treating a feature as sparse
   // e.g. 0.2 indicates a feature with fewer than 20% nonzeros is considered sparse
   double sparse_threshold;
@@ -316,7 +316,7 @@ XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, StatT stat) {
   return CalcGain(p, stat.GetGrad(), stat.GetHess());
 }
 
-// Used in gpu code where GradientPair is used for gradient sum, not GradStats.
+// Used in GPU code where GradientPair is used for gradient sum, not GradStats.
 template <typename TrainingParams, typename GpairT>
 XGBOOST_DEVICE inline float CalcWeight(const TrainingParams &p, GpairT sum_grad) {
   return CalcWeight(p, sum_grad.GetGrad(), sum_grad.GetHess());
@@ -484,7 +484,7 @@ using SplitEntry = SplitEntryContainer<GradStats>;
 
 /*
  * \brief Parse the interaction constraints from string.
- * \param constraint_str String storing the interfaction constraints:
+ * \param constraint_str String storing the interaction constraints:
  *
  *  Example input string:
  *
diff --git a/src/tree/tree_model.cc b/src/tree/tree_model.cc
index 1d02278de5c2..06354bbf5e1e 100644
--- a/src/tree/tree_model.cc
+++ b/src/tree/tree_model.cc
@@ -157,7 +157,7 @@ TreeGenerator* TreeGenerator::Create(std::string const& attrs, FeatureMap const&
   if (pos != std::string::npos) {
     name = attrs.substr(0, pos);
     params = attrs.substr(pos+1, attrs.length() - pos - 1);
-    // Eliminate all occurances of single quote string.
+    // Eliminate all occurrences of single quote string.
     size_t pos = std::string::npos;
     while ((pos = params.find('\'')) != std::string::npos) {
       params.replace(pos, 1, "\"");
@@ -1069,7 +1069,7 @@ void RegTree::CalculateContributionsApprox(const RegTree::FVec &feat,
 // Used by TreeShap
 // data we keep about our decision path
 // note that pweight is included for convenience and is not tied with the other attributes
-// the pweight of the i'th path element is the permuation weight of paths with i-1 ones in them
+// the pweight of the i'th path element is the permutation weight of paths with i-1 ones in them
 struct PathElement {
   int feature_index;
   bst_float zero_fraction;
@@ -1123,7 +1123,7 @@ void UnwindPath(PathElement *unique_path, unsigned unique_depth,
   }
 }
 
-// determine what the total permuation weight would be if
+// determine what the total permutation weight would be if
 // we unwound a previous extension in the decision path
 bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_depth,
                          unsigned path_index) {
diff --git a/src/tree/updater_basemaker-inl.h b/src/tree/updater_basemaker-inl.h
index 860ff60d71af..6a605ef04a21 100644
--- a/src/tree/updater_basemaker-inl.h
+++ b/src/tree/updater_basemaker-inl.h
@@ -196,8 +196,8 @@ class BaseMaker: public TreeUpdater {
     }
   }
   /*!
-   * \brief this is helper function uses column based data structure,
-   *        reset the positions to the lastest one
+   * \brief This is a helper function that uses a column based data structure
+   *        and reset the positions to the latest one
    * \param nodes the set of nodes that contains the split to be used
    * \param p_fmat feature matrix needed for tree construction
    * \param tree the regression tree structure
diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu
index 6ac8a92f3749..9b24a453076b 100644
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -549,7 +549,7 @@ struct GPUHistMakerDevice {
       bst_float weight = evaluator.CalcWeight(
           pos, param_d, GradStats{d_node_sum_gradients[pos]});
       static_assert(!std::is_const<decltype(out_preds_d)>::value, "");
-      auto v_predt = out_preds_d;  // for some reaon out_preds_d is const by both nvcc and clang.
+      auto v_predt = out_preds_d;  // for some reason out_preds_d is const by both nvcc and clang.
       v_predt[d_ridx[local_idx]] += weight * param_d.learning_rate;
     });
     row_partitioner.reset();
diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc
index 353b365f2ebf..1c086b69a8e9 100644
--- a/src/tree/updater_histmaker.cc
+++ b/src/tree/updater_histmaker.cc
@@ -401,7 +401,7 @@ class CQHistMaker: public HistMaker {
     for (auto& sketch : sketchs_) {
       sketch.Init(info.num_row_, this->param_.sketch_eps);
     }
-    // intitialize the summary array
+    // initialize the summary array
     summary_array_.resize(sketchs_.size());
     // setup maximum size
     unsigned max_size = this->param_.MaxSketchSize();
@@ -409,7 +409,7 @@ class CQHistMaker: public HistMaker {
       summary_array_[i].Reserve(max_size);
     }
     {
-      // get smmary
+      // get summary
       thread_sketch_.resize(omp_get_max_threads());
 
       // TWOPASS: use the real set + split set in the column iteration.
diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h
index 7d6c5db975f2..5146df7ab129 100644
--- a/src/tree/updater_quantile_hist.h
+++ b/src/tree/updater_quantile_hist.h
@@ -441,7 +441,7 @@ class QuantileHistMaker: public TreeUpdater {
     std::unique_ptr<ExpandQueue> qexpand_loss_guided_;
     std::vector<ExpandEntry> qexpand_depth_wise_;
     // key is the node id which should be calculated by Subtraction Trick, value is the node which
-    // provides the evidence for substracts
+    // provides the evidence for subtraction
     std::vector<ExpandEntry> nodes_for_subtraction_trick_;
     // list of nodes whose histograms would be built explicitly.
     std::vector<ExpandEntry> nodes_for_explicit_hist_build_;
diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc
index 0d553638db44..968cf0320994 100644
--- a/src/tree/updater_refresh.cc
+++ b/src/tree/updater_refresh.cc
@@ -123,7 +123,7 @@ class TreeRefresher: public TreeUpdater {
     // start from groups that belongs to current data
     auto pid = 0;
     gstats[pid].Add(gpair[ridx]);
-    // tranverse tree
+    // traverse tree
     while (!tree[pid].IsLeaf()) {
       unsigned split_index = tree[pid].SplitIndex();
       pid = tree.GetNext(pid, feat.GetFvalue(split_index), feat.IsMissing(split_index));
diff --git a/tests/README.md b/tests/README.md
index 2dca9304801b..d360ac251f48 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -1,4 +1,4 @@
-This folder contains testcases for XGBoost c++ core, Python package and some other CI
+This folder contains test cases for XGBoost c++ core, Python package and some other CI
 facilities.
 
 # Directories
@@ -9,7 +9,7 @@ facilities.
   * python: Tests for Python package, demonstrations and CLI.  For how to setup the
     dependencies for tests, see conda files in `ci_build`.
   * python-gpu: Similar to python tests, but for GPU.
-  * travis: CI facilities for travis.
+  * travis: CI facilities for Travis.
   * distributed: Legacy tests for distributed system.  Most of the distributed tests are
     in Python tests using `dask` and jvm package using `spark`.
   * benchmark: Legacy benchmark code.  There are a number of benchmark projects for
@@ -17,4 +17,4 @@ facilities.
 
 # Others
   * pytest.ini: Describes the `pytest` marker for python tests, some markers are generated
-    by `conftest.py` file.
\ No newline at end of file
+    by `conftest.py` file.
diff --git a/tests/benchmark/generate_libsvm.py b/tests/benchmark/generate_libsvm.py
index b0ec27318579..be152df39af4 100644
--- a/tests/benchmark/generate_libsvm.py
+++ b/tests/benchmark/generate_libsvm.py
@@ -1,4 +1,4 @@
-"""Generate synthetic data in LibSVM format."""
+"""Generate synthetic data in LIBSVM format."""
 
 import argparse
 import io