From f0b06518a810301366fd1344ada556a6d94912c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Llu=C3=ADs?= Date: Fri, 8 Dec 2023 22:45:44 +0100 Subject: [PATCH] Update linters --- .lintr | 31 +--- R/evaluate.R | 244 +++++++++++++++---------------- R/follow_up.R | 2 +- R/indexing.R | 1 - R/optimum.R | 2 +- experDesign.Rproj | 1 - tests/testthat/test-replicates.R | 2 +- 7 files changed, 126 insertions(+), 157 deletions(-) diff --git a/.lintr b/.lintr index f726dab..193ec6c 100644 --- a/.lintr +++ b/.lintr @@ -1,30 +1 @@ -linters: with_defaults( - absolute_path_linter, - assignment_linter, - closed_curly_linter = NULL, - commas_linter, - extraction_operator_linter, - implicit_integer_linter, - infix_spaces_linter, - line_length_linter(80), - nonportable_path_linter, - no_tab_linter, - object_length_linter(40), - object_name_linter("lowerCamelCase"), - object_usage_linter = NULL, - open_curly_linter = NULL, - semicolon_terminator_linter, - pipe_continuation_linter, - single_quotes_linter, - spaces_inside_linter, - spaces_left_parentheses_linter, - seq_linter, - todo_comment_linter, - trailing_blank_lines_linter, - trailing_whitespace_linter, - T_and_F_symbol_linter, - undesirable_function_linter, - undesirable_operator_linter, - unneeded_concatenation_linter, -default = default_linters) - +linters: linters_with_defaults() diff --git a/R/evaluate.R b/R/evaluate.R index bfe812a..d247256 100644 --- a/R/evaluate.R +++ b/R/evaluate.R @@ -1,122 +1,122 @@ -# A function to calculate the difference between a matrix and the original -# dataset -evaluate_helper <- function(x, original_x){ - stopifnot(ncol(x) == length(original_x)) - out <- sweep(x, 2, original_x, "-") - colMeans(abs(out), na.rm = TRUE) -} - -# To insert a vector or a matrix inside another matrix -insert <- function(matrix, vector, name = NULL) { - if (is.matrix(vector)) { - nam <- colnames(vector) - name <- rownames(vector) - } else { - nam <- names(vector) - } - matrix[name, nam] <- vector - matrix -} - -#' Evaluate each variable provided -#' -#' Measure some summary statistics of the whole cohort of samples -#' @param pheno Data.frame with information about the samples -#' @return A matrix with the mean, standard deviation, MAD values of the -#' numeric variables, the entropy of the categorical, and the amount of -#' `NA` per variable. -#' @family functions to evaluate samples -#' @export -#' @examples -#' data(survey, package = "MASS") -#' evaluate_orig(survey[, c("Sex", "Age", "Smoke")]) -evaluate_orig <- function(pheno) { - if (!.check_data(pheno)) { - warning("There might be some problems with the data use check_data().") - } - num <- is_num(pheno) - .evaluate_orig(pheno, num) -} - -.evaluate_orig <- function(pheno, num) { - stopifnot(!is.null(colnames(pheno))) - original <- empty_res(pheno, num) - - ev_subset(x = seq_len(nrow(pheno)), pheno = pheno, numeric = num, diff = original) -} - -#' Evaluates a data.frame -#' -#' Measures several indicators per group -#' @param i Index -#' @inheritParams evaluate_orig -#' @return An array of three dimensions with the mean, standard deviation -#' ([sd()]), and median absolute deviation ([mad()]) of the numeric variables, the -#' entropy of the categorical and the number of `NA` by each subgroup. -#' @family functions to evaluate samples -#' @seealso If you have already an index you can use [use_index()]. -#' @export -#' @examples -#' data(survey, package = "MASS") -#' index <- create_subset(nrow(survey), 50, 5) -#' ev_index <- evaluate_index(index, survey[, c("Sex", "Smoke")]) -#' ev_index["entropy", , ] -evaluate_index <- function(i, pheno) { - if (!.check_data(pheno)) { - warning("There might be some problems with the data use check_data().", call. = FALSE) - } - .evaluate_index(i, pheno, is_num(pheno)) - -} - - -.evaluate_index <- function(i, pheno, num) { - d <- empty_res(pheno, num) - if (sum(!num) > 1) { - variables <- c(colnames(pheno), "mix_cat") - } else { - variables <- colnames(pheno) - } - - out <- sapply(i, ev_subset, pheno = pheno, numeric = num, diff = d, simplify = "array") - - dimnames(out) <- list("stat" = rownames(d), - "variables" = variables, - "subgroups" = names(i)) - out - -} - -ev_subset <- function(x, pheno, numeric, diff){ - - subset_na <- na_orig <- colSums(is.na(pheno[x, , drop = FALSE])) - subset_na <- t(as.matrix(subset_na)) - rownames(subset_na) <- "na" - diff1 <- insert(diff, subset_na, "na") - - # Change the defaults of evaluations if any change happens on numeric - # or categorical tests (there are more tests or less) than - # 4 and 3 for categorical and numerical respectively. - if (sum(numeric) >= 1) { - # - pheno_num <- pheno[x, numeric, drop = FALSE] - subset_num <- apply(pheno_num, 2, function(y) { - c("sd" = sd(y, na.rm = TRUE), - "mean" = mean(y, na.rm = TRUE), - "mad" = mad(y, na.rm = TRUE)) - }) - diff1 <- insert(diff1, subset_num) - } - - if (sum(!numeric) >= 1) { - pheno_cat <- droplevels(pheno[x, !numeric, drop = FALSE]) - if (sum(!numeric) > 1) { - pheno_cat$mix_cat <- apply(pheno_cat, 1, paste0, collapse = "") - } - - subset_entropy <- apply(pheno_cat, 2, entropy) - diff1 <- insert(diff1, subset_entropy, "entropy") - } - - diff1 -} +# A function to calculate the difference between a matrix and the original +# dataset +evaluate_helper <- function(x, original_x){ + stopifnot(ncol(x) == length(original_x)) + out <- sweep(x, 2, original_x, "-") + colMeans(abs(out), na.rm = TRUE) +} + +# To insert a vector or a matrix inside another matrix +insert <- function(matrix, vector, name = NULL) { + if (is.matrix(vector)) { + nam <- colnames(vector) + name <- rownames(vector) + } else { + nam <- names(vector) + } + matrix[name, nam] <- vector + matrix +} + +#' Evaluate each variable provided +#' +#' Measure some summary statistics of the whole cohort of samples +#' @param pheno Data.frame with information about the samples +#' @return A matrix with the mean, standard deviation, MAD values of the +#' numeric variables, the entropy of the categorical, and the amount of +#' `NA` per variable. +#' @family functions to evaluate samples +#' @export +#' @examples +#' data(survey, package = "MASS") +#' evaluate_orig(survey[, c("Sex", "Age", "Smoke")]) +evaluate_orig <- function(pheno) { + if (!.check_data(pheno)) { + warning("There might be some problems with the data use check_data().") + } + num <- is_num(pheno) + .evaluate_orig(pheno, num) +} + +.evaluate_orig <- function(pheno, num) { + stopifnot(!is.null(colnames(pheno))) + original <- empty_res(pheno, num) + + ev_subset(x = seq_len(nrow(pheno)), pheno = pheno, numeric = num, diff = original) +} + +#' Evaluates a data.frame +#' +#' Measures several indicators per group +#' @param i Index +#' @inheritParams evaluate_orig +#' @return An array of three dimensions with the mean, standard deviation +#' ([sd()]), and median absolute deviation ([mad()]) of the numeric variables, the +#' entropy of the categorical and the number of `NA` by each subgroup. +#' @family functions to evaluate samples +#' @seealso If you have already an index you can use [use_index()]. +#' @export +#' @examples +#' data(survey, package = "MASS") +#' index <- create_subset(nrow(survey), 50, 5) +#' ev_index <- evaluate_index(index, survey[, c("Sex", "Smoke")]) +#' ev_index["entropy", , ] +evaluate_index <- function(i, pheno) { + if (!.check_data(pheno)) { + warning("There might be some problems with the data use check_data().", call. = FALSE) + } + .evaluate_index(i, pheno, is_num(pheno)) + +} + + +.evaluate_index <- function(i, pheno, num) { + d <- empty_res(pheno, num) + if (sum(!num) > 1) { + variables <- c(colnames(pheno), "mix_cat") + } else { + variables <- colnames(pheno) + } + + out <- sapply(i, ev_subset, pheno = pheno, numeric = num, diff = d, simplify = "array") + + dimnames(out) <- list("stat" = rownames(d), + "variables" = variables, + "subgroups" = names(i)) + out + +} + +ev_subset <- function(x, pheno, numeric, diff){ + + subset_na <- colSums(is.na(pheno[x, , drop = FALSE])) + subset_na <- t(as.matrix(subset_na)) + rownames(subset_na) <- "na" + diff1 <- insert(diff, subset_na, "na") + + # Change the defaults of evaluations if any change happens on numeric + # or categorical tests (there are more tests or less) than + # 4 and 3 for categorical and numerical respectively. + if (sum(numeric) >= 1) { + # + pheno_num <- pheno[x, numeric, drop = FALSE] + subset_num <- apply(pheno_num, 2, function(y) { + c("sd" = sd(y, na.rm = TRUE), + "mean" = mean(y, na.rm = TRUE), + "mad" = mad(y, na.rm = TRUE)) + }) + diff1 <- insert(diff1, subset_num) + } + + if (sum(!numeric) >= 1) { + pheno_cat <- droplevels(pheno[x, !numeric, drop = FALSE]) + if (sum(!numeric) > 1) { + pheno_cat$mix_cat <- apply(pheno_cat, 1, paste0, collapse = "") + } + + subset_entropy <- apply(pheno_cat, 2, entropy) + diff1 <- insert(diff1, subset_entropy, "entropy") + } + + diff1 +} diff --git a/R/follow_up.R b/R/follow_up.R index d20444a..bfa64a3 100644 --- a/R/follow_up.R +++ b/R/follow_up.R @@ -213,7 +213,7 @@ valid_followup <- function(old_data = NULL, new_data = NULL, all_data = NULL, check_old <- .check_data(old_data, verbose = FALSE) ok <- TRUE if (!check_all) { - if (verbose){ + if (verbose) { warning("There are some problems with the data.", call. = FALSE) } ok <- FALSE diff --git a/R/indexing.R b/R/indexing.R index b60d5f6..2933868 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -221,4 +221,3 @@ apply_index <- function(pheno, index, name = "old_rows") { pheno <- pheno[old_rows, , drop = FALSE] add_column(pheno, old_rows, name) } - diff --git a/R/optimum.R b/R/optimum.R index 1f24ce4..031c316 100644 --- a/R/optimum.R +++ b/R/optimum.R @@ -86,7 +86,7 @@ internal_batches <- function(size_data, size_subset, batches) { if (size_subset*batches == size_data) { return(rep(size_subset, times = batches)) } - # If not all samples can be allocated return the maximum number of samples + # If not all samples can be allocated return the maximum number of samples max_batch_size <- optimum_subset(size_data, batches) if (max_batch_size > size_subset) { return(rep(size_subset, times = batches)) diff --git a/experDesign.Rproj b/experDesign.Rproj index 92e5d55..79a59c8 100644 --- a/experDesign.Rproj +++ b/experDesign.Rproj @@ -12,7 +12,6 @@ Encoding: UTF-8 RnwWeave: Sweave LaTeX: pdfLaTeX -AutoAppendNewline: Yes StripTrailingWhitespace: Yes BuildType: Package diff --git a/tests/testthat/test-replicates.R b/tests/testthat/test-replicates.R index f01a236..657416a 100644 --- a/tests/testthat/test-replicates.R +++ b/tests/testthat/test-replicates.R @@ -14,5 +14,5 @@ test_that("Setting 0 replicates is an error", { set.seed(4568) nas <- c(137, 70) expect_error(replicates(survey[-nas, c("Sex", "Smoke", "Age")], size_subset = 50, - iterations = 25, controls = 0)) + iterations = 25, controls = 0)) })