From 60589b56604d680b6bdf57d86c53f54fa5064d7f Mon Sep 17 00:00:00 2001 From: olivroy <52606734+olivroy@users.noreply.github.com> Date: Wed, 21 Feb 2024 09:14:07 -0500 Subject: [PATCH 01/11] Add website to DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index fec79b17..3703f40a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,7 +43,7 @@ Authors@R: ) Description: A traceability focused tool created to simplify the data manipulation necessary to create clinical summaries. License: MIT + file LICENSE -URL: https://github.com/atorus-research/Tplyr +URL: https://atorus-research.github.io/Tplyr/, https://github.com/atorus-research/Tplyr BugReports: https://github.com/atorus-research/Tplyr/issues Encoding: UTF-8 Depends: R (>= 3.5.0) From b1cdb28eeb6574eee5e10aeddb4a5bf746813b64 Mon Sep 17 00:00:00 2001 From: "mike.stackhouse" Date: Tue, 2 Sep 2025 16:57:42 -0400 Subject: [PATCH 02/11] #182 patch issue with NAs in two way matrix --- R/riskdiff.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/riskdiff.R b/R/riskdiff.R index be17c0b8..4569c40a 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -207,7 +207,8 @@ prep_two_way <- function(comp) { # Pivot out to give the var names n_ref, n_comp, total_ref, total_comp for two way pivot_wider(id_cols = c(match_exact(c(by, cols, head(target_var, -1))), 'summary_var'), names_from=!!treat_var, - values_from = c('n', 'total')) + values_from = c('n', 'total')) %>% + fill(total_comp, total_ref, .direction="downup") }, envir=caller_env()) From 206f99b12acfecc24fa2a4d3e68906a17a9613fb Mon Sep 17 00:00:00 2001 From: "mike.stackhouse" Date: Wed, 3 Sep 2025 12:15:07 -0400 Subject: [PATCH 03/11] More robust that compensates for grouping and introduce test case --- R/riskdiff.R | 20 ++++- tests/testthat/_snaps/riskdiff.md | 119 ++++++++++++++++++++++++++++++ tests/testthat/test-riskdiff.R | 23 ++++++ 3 files changed, 158 insertions(+), 4 deletions(-) diff --git a/R/riskdiff.R b/R/riskdiff.R index 4569c40a..a6af81fb 100644 --- a/R/riskdiff.R +++ b/R/riskdiff.R @@ -176,7 +176,21 @@ prep_two_way <- function(comp) { msg = paste0("There are no records for the following groups within the variable ", as_name(treat_var), ": ", paste(invalid_groups, collapse=", "))) - two_way <- numeric_data + # create the merge columns + mrg <- as_label(pop_treat_var) + names(mrg) <- as_label(treat_var) + mrg_cols <- append(mrg, map_chr(cols, as_label)) + + two_way <- numeric_data %>% + left_join( + select(header_n, everything(), tot_fill = n), + by = mrg_cols + ) %>% + mutate( + distinct_total = if_else(is.na(distinct_total), tot_fill, distinct_total) + ) + + rm(mrg, mrg_cols) # Nested layers need to plug the NAs left over - needs revision in the future if (is_built_nest && quo_is_symbol(by[[1]])) { @@ -188,7 +202,6 @@ prep_two_way <- function(comp) { ) } - # If distinct is set and distinct values are there, use them if (comp_distinct && !is.null(distinct_by)) { two_way <- two_way %>% @@ -207,8 +220,7 @@ prep_two_way <- function(comp) { # Pivot out to give the var names n_ref, n_comp, total_ref, total_comp for two way pivot_wider(id_cols = c(match_exact(c(by, cols, head(target_var, -1))), 'summary_var'), names_from=!!treat_var, - values_from = c('n', 'total')) %>% - fill(total_comp, total_ref, .direction="downup") + values_from = c('n', 'total')) }, envir=caller_env()) diff --git a/tests/testthat/_snaps/riskdiff.md b/tests/testthat/_snaps/riskdiff.md index 7a376ad0..b39e111e 100644 --- a/tests/testthat/_snaps/riskdiff.md +++ b/tests/testthat/_snaps/riskdiff.md @@ -22,3 +22,122 @@ Comparison {4, 4} has duplicated values. Comparisons must not be duplicates +# Missing counts don't cause error in comparisons + + Code + head(as.data.frame(build(t))) + Condition + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Warning in `prop.test()`: + Chi-squared approximation may be incorrect + Output + row_label1 row_label2 + 1 SKIN AND SUBCUTANEOUS TISSUE DISORDERS SKIN AND SUBCUTANEOUS TISSUE DISORDERS + 2 SKIN AND SUBCUTANEOUS TISSUE DISORDERS ALOPECIA + 3 SKIN AND SUBCUTANEOUS TISSUE DISORDERS BLISTER + 4 SKIN AND SUBCUTANEOUS TISSUE DISORDERS COLD SWEAT + 5 SKIN AND SUBCUTANEOUS TISSUE DISORDERS DERMATITIS ATOPIC + 6 SKIN AND SUBCUTANEOUS TISSUE DISORDERS DERMATITIS CONTACT + var1_Placebo_F var1_Placebo_M var1_Xanomeline High Dose_F + 1 13 ( 24.5%) 8 ( 24.2%) 0 ( 0.0%) + 2 1 ( 1.9%) 0 ( 0.0%) 0 ( 0.0%) + 3 0 ( 0.0%) 0 ( 0.0%) 0 ( 0.0%) + 4 0 ( 0.0%) 1 ( 3.0%) 0 ( 0.0%) + 5 0 ( 0.0%) 1 ( 3.0%) 0 ( 0.0%) + 6 0 ( 0.0%) 0 ( 0.0%) 0 ( 0.0%) + var1_Xanomeline High Dose_M var1_Xanomeline Low Dose_F + 1 0 ( 0.0%) 24 ( 48.0%) + 2 0 ( 0.0%) 0 ( 0.0%) + 3 0 ( 0.0%) 2 ( 4.0%) + 4 0 ( 0.0%) 0 ( 0.0%) + 5 0 ( 0.0%) 0 ( 0.0%) + 6 0 ( 0.0%) 0 ( 0.0%) + var1_Xanomeline Low Dose_M ord_layer_index + 1 18 ( 52.9%) 1 + 2 0 ( 0.0%) 1 + 3 3 ( 8.8%) 1 + 4 0 ( 0.0%) 1 + 5 0 ( 0.0%) 1 + 6 1 ( 2.9%) 1 + rdiff_Xanomeline High Dose_Placebo_F rdiff_Xanomeline High Dose_Placebo_M + 1 -0.245 (-0.383, -0.108) -0.242 (-0.415, -0.070) + 2 -0.019 (-0.074, 0.037) 0.000 ( 0.000, 0.000) + 3 0.000 ( 0.000, 0.000) 0.000 ( 0.000, 0.000) + 4 0.000 ( 0.000, 0.000) -0.030 (-0.115, 0.055) + 5 0.000 ( 0.000, 0.000) -0.030 (-0.115, 0.055) + 6 0.000 ( 0.000, 0.000) 0.000 ( 0.000, 0.000) + ord_layer_1 ord_layer_2 + 1 1 Inf + 2 1 1 + 3 1 2 + 4 1 3 + 5 1 4 + 6 1 5 + diff --git a/tests/testthat/test-riskdiff.R b/tests/testthat/test-riskdiff.R index 644ffec0..5735edbe 100644 --- a/tests/testthat/test-riskdiff.R +++ b/tests/testthat/test-riskdiff.R @@ -278,3 +278,26 @@ test_that("Error generates when duplicating riskdiff comparison values", { ) }) + +test_that("Missing counts don't cause error in comparisons", { + + +adae <- filter(tplyr_adae, TRTA != "Xanomeline High Dose" & AEDECOD != "ACTINIC KERATOSIS") + +# Create table +t <- tplyr_table(adae, TRTA, cols=SEX) %>% + # Set population + set_pop_data(tplyr_adsl) %>% + set_pop_treat_var(TRT01A) %>% + # Layer 1: Organ System and OCMQ (Narrow) Count Layer + add_layer( + group_count(vars(AEBODSYS, AEDECOD)) %>% + # Set distinct counts per subject + set_distinct_by(USUBJID) %>% + # Add risk differences + add_risk_diff(c("Xanomeline High Dose", "Placebo")) + ) + + # Build the table + expect_snapshot(head(as.data.frame(build(t)))) +}) \ No newline at end of file From 1b6a1dfb0969198584dac3b13688541a7809cdb5 Mon Sep 17 00:00:00 2001 From: "mike.stackhouse" Date: Wed, 3 Sep 2025 12:39:13 -0400 Subject: [PATCH 04/11] workflow updates. --- .github/workflows/R-CMD-check.yaml | 136 ++++++++++++----------------- .github/workflows/rhub.yaml | 95 ++++++++++++++++++++ 2 files changed, 150 insertions(+), 81 deletions(-) create mode 100644 .github/workflows/rhub.yaml diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 1e93f191..6e7598fe 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,85 +1,59 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: - push: - branches: - - main - - master - - devel - pull_request: - branches: - - main - - master - - devel + push: + branches: + - main + - master + - devel + pull_request: + branches: + - main + - master + - devel -name: R-CMD-check +name: R-CMD-check.yaml -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - - - uses: r-lib/actions/setup-pandoc@v2 +permissions: read-all - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - - - name: Upload check results - if: failure() - uses: actions/upload-artifact@main - with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} + - {os: ubuntu-22.04, r: 'release', rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest"} + - {os: ubuntu-22.04, r: 'devel', rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest"} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' \ No newline at end of file diff --git a/.github/workflows/rhub.yaml b/.github/workflows/rhub.yaml new file mode 100644 index 00000000..bdfab195 --- /dev/null +++ b/.github/workflows/rhub.yaml @@ -0,0 +1,95 @@ +# R-hub's generic GitHub Actions workflow file. It's canonical location is at +# https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml +# You can update this file to a newer version using the rhub2 package: +# +# rhub::rhub_setup() +# +# It is unlikely that you need to modify this file manually. + +name: R-hub +run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" + +on: + workflow_dispatch: + inputs: + config: + description: 'A comma separated list of R-hub platforms to use.' + type: string + default: 'linux,windows,macos' + name: + description: 'Run name. You can leave this empty now.' + type: string + id: + description: 'Unique ID. You can leave this empty now.' + type: string + +jobs: + + setup: + runs-on: ubuntu-latest + outputs: + containers: ${{ steps.rhub-setup.outputs.containers }} + platforms: ${{ steps.rhub-setup.outputs.platforms }} + + steps: + # NO NEED TO CHECKOUT HERE + - uses: r-hub/actions/setup@v1 + with: + config: ${{ github.event.inputs.config }} + id: rhub-setup + + linux-containers: + needs: setup + if: ${{ needs.setup.outputs.containers != '[]' }} + runs-on: ubuntu-latest + name: ${{ matrix.config.label }} + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.containers) }} + container: + image: ${{ matrix.config.container }} + + steps: + - uses: r-hub/actions/checkout@v1 + - uses: r-hub/actions/platform-info@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/setup-deps@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/run-check@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + + other-platforms: + needs: setup + if: ${{ needs.setup.outputs.platforms != '[]' }} + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.label }} + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.platforms) }} + + steps: + - uses: r-hub/actions/checkout@v1 + - uses: r-hub/actions/setup-r@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} + - uses: r-hub/actions/platform-info@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/setup-deps@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} + - uses: r-hub/actions/run-check@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} \ No newline at end of file From 4def9a1cef86ac9fe101f0156237ebb81a7add43 Mon Sep 17 00:00:00 2001 From: "mike.stackhouse" Date: Fri, 5 Sep 2025 10:59:52 -0400 Subject: [PATCH 05/11] Resolve #193 --- R/regex.R | 19 +++++++++++++++++-- R/str_extractors.R | 18 +++++++++--------- tests/testthat/test-str_extractors.R | 28 +++++++++++++++++----------- 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/R/regex.R b/R/regex.R index d259212f..46105580 100644 --- a/R/regex.R +++ b/R/regex.R @@ -26,13 +26,14 @@ #' #' get_tplyr_regex('format_group') #' -get_tplyr_regex <- function(rx=c("format_string", "format_group")) { +get_tplyr_regex <- function(rx=c("format_string", "format_group", "number_group")) { rx <- match.arg(rx) switch( rx, 'format_string' = get_format_string_regex(), - 'format_group' = get_format_group_regex() + 'format_group' = get_format_group_regex(), + 'number_group' = get_numeric_group_regex() ) } @@ -110,3 +111,17 @@ get_format_group_regex <- function() { regex(paste0(nwsd, ws, num, nws)) } + +#' Return the regex for identifying numbers within an output string +#' +#' This regex targets the individual numbers within the string +#' +#' @return A regular expression +#' @noRd +get_numeric_group_regex <- function() { + #`-?` - Matches an optional negative sign + # `(?:\d*\.\d+|\d+)` - A non-capturing group with two alternatives: + # `\d*\.\d+` - Matches decimals like `.75`, `0.56`, or `123.45` + # `\d+` - Matches integers like `1`, `523`, `56` + regex("-?(?:\\d*\\.\\d+|\\d+)") +} diff --git a/R/str_extractors.R b/R/str_extractors.R index cdb73535..f3a9b46e 100644 --- a/R/str_extractors.R +++ b/R/str_extractors.R @@ -22,7 +22,7 @@ #' #' @examples #' -#' string <- c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)") +#' string <- c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)", "-1 (-.56, .75) -523%, 56 | -34") #' #' str_extract_fmt_group(string, 2) #' @@ -31,11 +31,11 @@ str_extract_fmt_group <- function(string, format_group) { if (!inherits(string, "character")) { - stop("Paramter `string` must be a character vector", call.=FALSE) + stop("Parameter `string` must be a character vector", call.=FALSE) } - if (!inherits(format_group, "numeric") || (inherits(format_group, "numeric") && format_group %% 1 != 0)) { - stop("Paramter `format_group` must be an integer", call.=FALSE) + if (!inherits(format_group, c("integer", "numeric")) || (inherits(format_group, "numeric") && format_group %% 1 != 0)) { + stop("Parameter `format_group` must be an integer", call.=FALSE) } # Pull out regex to drive the work @@ -57,15 +57,15 @@ str_extract_fmt_group <- function(string, format_group) { str_extract_num <- function(string, format_group) { if (!inherits(string, "character")) { - stop("Paramter `string` must be a character vector", call.=FALSE) + stop("Parameter `string` must be a character vector", call.=FALSE) } - if (!inherits(format_group, "numeric") || (inherits(format_group, "numeric") && format_group %% 1 != 0)) { - stop("Paramter `format_group` must be an integer", call.=FALSE) + if (!inherits(format_group, c("integer", "numeric")) || (inherits(format_group, "numeric") && format_group %% 1 != 0)) { + stop("Parameter `format_group` must be an integer", call.=FALSE) } # Pull out regex to drive the work - f_grp_rx <- get_format_group_regex() + f_grp_rx <- get_numeric_group_regex() # Pull out all the match groups and then get the numeric for the conditional number match_groups <- str_match_all(string, f_grp_rx) @@ -73,6 +73,6 @@ str_extract_num <- function(string, format_group) { # Get the number upon which the condition will be evaluated map_dbl( match_groups, - ~ if (nrow(.) < format_group) {NA_real_} else {as.double(.[format_group, 2])} + ~ if (nrow(.) < format_group) {NA_real_} else {as.double(.[format_group, 1])} ) } diff --git a/tests/testthat/test-str_extractors.R b/tests/testthat/test-str_extractors.R index 393a2d67..207f968f 100644 --- a/tests/testthat/test-str_extractors.R +++ b/tests/testthat/test-str_extractors.R @@ -1,57 +1,63 @@ -string <- c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)") +string <- c(" 0 (0.0%)", " 8 (9.3%)", "78 (90.7%)", "-1 (-0.56, -.75) -523%") test_that("String extractor errors generate properly", { expect_error( str_extract_fmt_group(c(1), 1), - "Paramter `string`" + "Parameter `string`" ) expect_error( str_extract_fmt_group(string, "hi"), - "Paramter `format_group`" + "Parameter `format_group`" ) expect_error( str_extract_num(c(1), 1), - "Paramter `string`" + "Parameter `string`" ) expect_error( str_extract_num(string, "hi"), - "Paramter `format_group`" + "Parameter `format_group`" ) }) test_that("Format groups can be extracted", { expect_equal( str_extract_fmt_group(string, 1), - c(' 0', ' 8', '78') + c(' 0', ' 8', '78', "-1") ) expect_equal( str_extract_fmt_group(string, 2), - c("(0.0%)", "(9.3%)", "(90.7%)") + c("(0.0%)", "(9.3%)", "(90.7%)", "(-0.56,") ) expect_equal( str_extract_fmt_group(string, 3), - rep(NA_character_, 3) + c(rep(NA_character_, 3), "-.75)") ) }) test_that("Numbers from format groups can be extracted", { expect_equal( str_extract_num(string, 1), - c(0, 8, 78) + c(0, 8, 78, -1) ) expect_equal( str_extract_num(string, 2), - c(0.0, 9.3, 90.7) + c(0.0, 9.3, 90.7, -.56) ) expect_equal( str_extract_num(string, 3), - rep(NA_real_, 3) + c(rep(NA_real_, 3), -.75) ) + + expect_equal( + str_extract_num(string, 4), + c(rep(NA_real_, 3), -523) + ) + }) From a129f1e087178e7524675018d550176b76e047b8 Mon Sep 17 00:00:00 2001 From: "mike.stackhouse" Date: Fri, 5 Sep 2025 11:02:21 -0400 Subject: [PATCH 06/11] Documentation --- DESCRIPTION | 2 +- R/regex.R | 7 +++++-- man/Tplyr.Rd | 2 +- man/get_tplyr_regex.Rd | 9 ++++++--- man/str_extractors.Rd | 2 +- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fec79b17..4c636c24 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -71,7 +71,7 @@ Suggests: pharmaRTF, withr VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 RdMacros: lifecycle Config/testthat/edition: 3 LazyData: true diff --git a/R/regex.R b/R/regex.R index 46105580..786cca11 100644 --- a/R/regex.R +++ b/R/regex.R @@ -3,7 +3,7 @@ #' This function allows you to extract important regular expressions used inside #' Tplyr. #' -#' There are two important regular expressions used within Tplyr. The +#' There are three important regular expressions used within Tplyr. The #' format_string expression is the expression to parse format strings. This is #' what is used to make sense out of strings like 'xx (XX.x%)' or 'a+1 (A.a+2)' #' by inferring what the user is specifying about number formatting. @@ -11,7 +11,8 @@ #' The 'format_group' regex is the opposite of this, and when given a string of #' numbers, such as ' 5 (34%) \[9]' will return the separate segments of numbers #' broken into their format groups, which in this example would be ' 5', -#' '(34%)', and '\[9]'. +#' '(34%)', and '\[9]'. Lastly, the 'number_group' regex has a similar application +#' to the 'format_group' regex, but targets only numbers #' #' @param rx A character string with either the value 'format_string' or #' 'format_group' @@ -25,6 +26,8 @@ #' get_tplyr_regex('format_string') #' #' get_tplyr_regex('format_group') +#' +#' get_tplyr_regex('number_group') #' get_tplyr_regex <- function(rx=c("format_string", "format_group", "number_group")) { rx <- match.arg(rx) diff --git a/man/Tplyr.Rd b/man/Tplyr.Rd index 06706f33..330856e3 100644 --- a/man/Tplyr.Rd +++ b/man/Tplyr.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/zzz.R \docType{package} \name{Tplyr} -\alias{Tplyr} \alias{Tplyr-package} +\alias{Tplyr} \title{A grammar of summary data for clinical reports} \description{ `r lifecycle::badge("experimental")` diff --git a/man/get_tplyr_regex.Rd b/man/get_tplyr_regex.Rd index 448057a1..eca594b7 100644 --- a/man/get_tplyr_regex.Rd +++ b/man/get_tplyr_regex.Rd @@ -4,7 +4,7 @@ \alias{get_tplyr_regex} \title{Retrieve one of Tplyr's regular expressions} \usage{ -get_tplyr_regex(rx = c("format_string", "format_group")) +get_tplyr_regex(rx = c("format_string", "format_group", "number_group")) } \arguments{ \item{rx}{A character string with either the value 'format_string' or @@ -18,7 +18,7 @@ This function allows you to extract important regular expressions used inside Tplyr. } \details{ -There are two important regular expressions used within Tplyr. The +There are three important regular expressions used within Tplyr. The format_string expression is the expression to parse format strings. This is what is used to make sense out of strings like 'xx (XX.x\%)' or 'a+1 (A.a+2)' by inferring what the user is specifying about number formatting. @@ -26,7 +26,8 @@ by inferring what the user is specifying about number formatting. The 'format_group' regex is the opposite of this, and when given a string of numbers, such as ' 5 (34\%) [9]' will return the separate segments of numbers broken into their format groups, which in this example would be ' 5', -'(34\%)', and '[9]'. +'(34\%)', and '[9]'. Lastly, the 'number_group' regex has a similar application +to the 'format_group' regex, but targets only numbers } \examples{ @@ -34,4 +35,6 @@ get_tplyr_regex('format_string') get_tplyr_regex('format_group') +get_tplyr_regex('number_group') + } diff --git a/man/str_extractors.Rd b/man/str_extractors.Rd index b8fe8b9b..be09ac4d 100644 --- a/man/str_extractors.Rd +++ b/man/str_extractors.Rd @@ -31,7 +31,7 @@ are ' 5', '(34.4\%)', and '[9]'. } \examples{ -string <- c(" 0 (0.0\%)", " 8 (9.3\%)", "78 (90.7\%)") +string <- c(" 0 (0.0\%)", " 8 (9.3\%)", "78 (90.7\%)", "-1 (-.56, .75) -523\%, 56 | -34") str_extract_fmt_group(string, 2) From 280eaba9b66d11c1bccd763017cd6f0ba52c7efe Mon Sep 17 00:00:00 2001 From: "mike.stackhouse" Date: Mon, 8 Sep 2025 09:42:24 -0400 Subject: [PATCH 07/11] dev version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b5a8d71d..3f128f73 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Tplyr Title: A Traceability Focused Grammar of Clinical Data Summary -Version: 1.2.1 +Version: 1.2.1.9000 Authors@R: c( person(given = "Eli", From 57909de67e1ac196dacc88000f2636da0faeeab0 Mon Sep 17 00:00:00 2001 From: lanmino Date: Tue, 11 Nov 2025 09:37:30 -0600 Subject: [PATCH 08/11] Update shift.Rmd Typos and grammar; added one sentence line 68 --- vignettes/shift.Rmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vignettes/shift.Rmd b/vignettes/shift.Rmd index 43a30e8d..d4af9376 100644 --- a/vignettes/shift.Rmd +++ b/vignettes/shift.Rmd @@ -24,14 +24,14 @@ library(knitr) Shift tables are a special kind of frequency table - but what they count are changes in state. This is most common when looking at laboratory ranges, where you may be interested in seeing how a subject's results related to normal ranges. The 'change in state' would refer to how that subject's results were at baseline versus different points of measure. Shift tables allow you to see the distribution of how subjects move between normal ranges, and if the population is improving or worsening as the study progresses. -While shift tables are very similar to a normal frequency table, there's more nuance here, and thus we decided to create `group_shift()`. This function is largely an abstraction of a count layer, and in fact re-uses a good deal of the same underlying code. But we handle some of the complexity for you to make the interface easy to use and the behavior similar to that of the `group_count()` and `group_desc()` APIs. Given that shift tables are built on count layers, many of functions that work with count layers behave in the same way when using shift layers. However, the following cannot be used in shift layers: +While shift tables are very similar to a normal frequency table, there's more nuance here, and thus we decided to create `group_shift()`. This function is largely an abstraction of a count layer, and in fact re-uses a good deal of the same underlying code. But we handle some of the complexity for you to make the interface easy to use and the behavior similar to that of the `group_count()` and `group_desc()` APIs. Given that shift tables are built on count layers, many functions that work with count layers behave in the same way when used on shift layers. However, the following cannot be used in shift layers: - Functions related to nested counts, including `set_nest_count()`, `set_outer_sort_position()` - Functions related to total rows and missing rows, including `set_missing_count()`, `add_total_row()`, `set_total_row_label()` - Risk difference, including `add_risk_diff()` -- and finally, result based sorting methods, including `set_order_count_method()`, `set_ordering_cols()`, `set_result_order_var()` +- and finally, result-based sorting methods, including `set_order_count_method()`, `set_ordering_cols()`, `set_result_order_var()` -One thing to note - the `group_shift()` API is intended to be used on shift tables where one group is presented in rows and the other group in columns. Occasionally, shift tables will have a row based approach that shows "Low to High", "Normal to High", etc. For those situations, `group_count()` will do just fine. +One thing to note - the `group_shift()` API is intended to be used on shift tables where one group is presented in rows and the other group in columns. Occasionally, shift tables will have a row-based approach that shows "Low to High", "Normal to High", etc. For those situations, `group_count()` will do just fine. ## A Basic Example @@ -49,7 +49,7 @@ tplyr_table(tplyr_adlb, TRTA, where=PARAMCD == "CK") %>% First, let's look at the differences in the shift API. Shift layers *must* take a row and a column variable, as the layer is designed to create a box for you that explains the changes in state. The row variable will typically be your "from" variable, and the column variable will typically be your "to" variable. Behind the scenes, **Tplyr** breaks this down for you to properly count and present the data. -For the most part, the last example gets us where we want to go - but there's still some that's left to be desired. It doesn’t look like there are any 'L' values for BNRIND in the dataset so we are not getting and rows containing 'L'. Let’s see if we can fix that by dummying in the possible values. +For the most part, the last example gets us where we want to go - but there's still some that's left to be desired. It doesn’t look like there are any 'L' values for BNRIND in the dataset so we are not getting any rows containing 'L'. Let’s see if we can fix that by dummying in the possible values. ## Filling Missing Groups Using Factors @@ -65,7 +65,7 @@ tplyr_table(tplyr_adlb, TRTA, where=PARAMCD == "CK") %>% kable() ``` -There we go. This is another situation where using factors in R let's us dummy values within the dataset. Furthermore, since factors are ordered, it automatically corrected the sort order of the row labels too. +There we go. This is another situation where using factors in R enables us to dummy values within the dataset. Furthermore, since factors are ordered, Tplyr automatically corrected the sort order of the row labels too. Now, instead of alphabetically (H then L then N), our rows are sorted by factor levels (L then N then H). ## Where to go from here From b481b748c4ace07114d7fe05adea37b7dcf879f3 Mon Sep 17 00:00:00 2001 From: lanmino Date: Tue, 11 Nov 2025 09:46:19 -0600 Subject: [PATCH 09/11] Update layer_templates.Rmd Typos and grammar --- vignettes/layer_templates.Rmd | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vignettes/layer_templates.Rmd b/vignettes/layer_templates.Rmd index 021fc0b6..ad21cbb7 100644 --- a/vignettes/layer_templates.Rmd +++ b/vignettes/layer_templates.Rmd @@ -21,11 +21,11 @@ library(knitr) There are several scenarios where a layer template may be useful. Some tables, like demographics tables, may have many layers that will all essentially look the same. Categorical variables will have the same count layer settings, and continuous variables will have the same desc layer settings. A template allows a user to build those settings once per layer, then reference the template when the **Tplyr** table is actually built. Another scenario might be building a set of company layer templates that are built for standard tables to reduce the footprint of code across analyses. In either of these cases, the idea is the reduce the amount of redundant code necessary to create a table. -Tplyr has already has a couple of mechanisms to reduce redundant application of formats. For example, `vignettes('tplyr_options')` shows how the options `tplyr.count_layer_default_formats`, `tplyr.desc_layer_default_formats`, and `tplyr.shift_layer_default_formats` can be used to create default format string settings. Additionally, you can set formats table wide using `set_count_layer_formats()`, `set_desc_layer_formats()`, or `set_shift_layer_formats()`. But what these functions and options _don't_ allow you to do is pre-set and reuse the settings for an entire layer, so all of the additional potential layer modifying functions are ignored. This is where layer templates come in. +Tplyr has already has mechanisms to reduce redundant application of formats. For example, `vignettes('tplyr_options')` shows how the options `tplyr.count_layer_default_formats`, `tplyr.desc_layer_default_formats`, and `tplyr.shift_layer_default_formats` can be used to create default format string settings. Additionally, you can set formats table-wide using `set_count_layer_formats()`, `set_desc_layer_formats()`, or `set_shift_layer_formats()`. But what these functions and options _don't_ allow you to do is pre-set and reuse the settings for an entire layer, so all of the additional potential layer-modifying functions are ignored. This is where layer templates come in. # Basic Templates -The functions `new_layer_template()` and `use_template()` allow a user to create and use layer templates. Layer templates allow a user to pre-build and reuse an entire layer configuration, from the layer constructor down to all modifying functions. Furthermore, users can specify parameters they may want to be interchangeable. Additionally, layer templates are extensible, so a template can be use and then further extended with additional layer modifying functions. +The functions `new_layer_template()` and `use_template()` allow a user to create and use layer templates. Layer templates allow a user to pre-build and reuse an entire layer configuration, from the layer constructor down to all modifying functions. Furthermore, users can specify parameters they may want to be interchangeable. Additionally, layer templates are extensible, so a template can be used and then further extended with additional layer-modifying functions. Consider the following example: @@ -37,7 +37,7 @@ new_layer_template( ) ``` -In this example, we've created a basic layer template. The template is named "example_template", and this is the name we'll use to reference the template when we want to use it. When the template is created, we start with the function `group_count(...)`. Note the use of the ellipsis (i.e. `...`). This is a required part of a layer template. Templates must start with a **Tplyr** layer constructor, which is one of the function `group_count()`, `group_desc()`, or `group_shift()`. The ellipsis is necessary because when the template is used, we are able to pass arguments directly into the layer constructor. For example: +In this example, we've created a basic layer template. The template is named "example_template", and this is the name we'll use to reference the template when we want to use it. When the template is created, we start with the function `group_count(...)`. Note the use of the ellipsis (i.e. `...`). This is a required part of a layer template. Templates must start with a **Tplyr** layer constructor, which is one of the functions `group_count()`, `group_desc()`, or `group_shift()`. The ellipsis is necessary because when the template is used, we are able to pass arguments directly into the layer constructor. For example: ```{r using a template} tplyr_table(tplyr_adsl, TRT01P) %>% @@ -48,7 +48,7 @@ tplyr_table(tplyr_adsl, TRT01P) %>% kable() ``` -Within `use_template()`, the first parameter is the template name. After that, we supply arguments as we normally would into `group_count()`, `group_desc()`, or `group_shift()`. Additionally, note that our formats have been applied just as they would be if we used `set_format_strings()` as specified in the template. Our template was applied, the table built with all of the settings appropriately. +Within `use_template()`, the first parameter is the template name. After that, we supply arguments as we normally would into `group_count()`, `group_desc()`, or `group_shift()`. Additionally, note that our formats have been applied just as they would be if we used `set_format_strings()` as specified in the template. Our template was applied, and the table built with all of the settings appropriately. An additional feature of layer templates is that they act just as any other function would in a **Tplyr** layer. This means that they're also extensible and can be expanded on directly within a **Tplyr** table. For example: @@ -62,7 +62,7 @@ tplyr_table(tplyr_adsl, TRT01P) %>% kable() ``` -Here we show two things - first, that the we called the template without the by variable argument from the previous example. This allows a template to have some flexibility depending on the context of its usage. Furthermore, we added the additional modifier function `add_total_row()`. In this example, we took the layer as constructed by the template and then modified that layer further. This may be useful if most but not all of a layer is reusable. The reusable portions can be put in a template, and the rest added using normal **Tplyr** syntax. +Here we show two things - first, that we called the template without the *by* variable argument from the previous example. This allows a template to have some flexibility depending on the context of its usage. Furthermore, we added the additional modifier function `add_total_row()`. In this example, we took the layer as constructed by the template and then modified that layer further. This may be useful if most but not all of a layer is reusable. The reusable portions can be put in a template, and the rest added using normal **Tplyr** syntax. ## Templates With Parameters From 968ee38b8aa5547a2600390662af2b7c061c0120 Mon Sep 17 00:00:00 2001 From: lanmino Date: Tue, 11 Nov 2025 09:54:05 -0600 Subject: [PATCH 10/11] Update layer_templates.Rmd typo --- vignettes/layer_templates.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/layer_templates.Rmd b/vignettes/layer_templates.Rmd index ad21cbb7..c166905b 100644 --- a/vignettes/layer_templates.Rmd +++ b/vignettes/layer_templates.Rmd @@ -19,7 +19,7 @@ library(Tplyr) library(knitr) ``` -There are several scenarios where a layer template may be useful. Some tables, like demographics tables, may have many layers that will all essentially look the same. Categorical variables will have the same count layer settings, and continuous variables will have the same desc layer settings. A template allows a user to build those settings once per layer, then reference the template when the **Tplyr** table is actually built. Another scenario might be building a set of company layer templates that are built for standard tables to reduce the footprint of code across analyses. In either of these cases, the idea is the reduce the amount of redundant code necessary to create a table. +There are several scenarios where a layer template may be useful. Some tables, like demographics tables, may have many layers that will all essentially look the same. Categorical variables will have the same count layer settings, and continuous variables will have the same desc layer settings. A template allows a user to build those settings once per layer, then reference the template when the **Tplyr** table is actually built. Another scenario might be building a set of company layer templates that are built for standard tables to reduce the footprint of code across analyses. In either of these cases, the idea is to reduce the amount of redundant code necessary to create a table. Tplyr has already has mechanisms to reduce redundant application of formats. For example, `vignettes('tplyr_options')` shows how the options `tplyr.count_layer_default_formats`, `tplyr.desc_layer_default_formats`, and `tplyr.shift_layer_default_formats` can be used to create default format string settings. Additionally, you can set formats table-wide using `set_count_layer_formats()`, `set_desc_layer_formats()`, or `set_shift_layer_formats()`. But what these functions and options _don't_ allow you to do is pre-set and reuse the settings for an entire layer, so all of the additional potential layer-modifying functions are ignored. This is where layer templates come in. From d18f58f6df8ab165a8ee53b93003a22c3df6e9f6 Mon Sep 17 00:00:00 2001 From: lanmino Date: Tue, 11 Nov 2025 10:11:32 -0600 Subject: [PATCH 11/11] Update denom.Rmd typos, grammar, one sentence rewrite --- vignettes/denom.Rmd | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/vignettes/denom.Rmd b/vignettes/denom.Rmd index cb657c4e..e99d97a4 100644 --- a/vignettes/denom.Rmd +++ b/vignettes/denom.Rmd @@ -30,7 +30,7 @@ Make sure you have a good understand of count and shift layers before you review ## Population Data in the Denominator -What do you do when your target dataset doesn't _have_ the information necessary to create your denominator? For example - when you create an adverse event table, the adverse event dataset likely only contains records for subjects who experienced an adverse event. But subjects who did _not_ have an adverse event are still part of the study population and must be considered in the denominator. +What do you do when your target dataset doesn't _have_ the information necessary to create your denominator? For example, when you create an adverse event table, the adverse event dataset likely only contains records for subjects who experienced an adverse event. But subjects who did _not_ have an adverse event are still part of the study population and must be considered in the denominator. For this reason,**Tplyr** allows lets you set a separate population dataset - but there are a couple things you need to do to trigger **Tplyr** to use the population data as your denominator. @@ -74,11 +74,11 @@ Fortunately, denominators are much simpler when they're kept within a single dat ## Denominator Grouping -When you're looking within a single dataset, there are a couple factors that you need to consider for a denominator. The first is which grouping variables create those denominators. Let's look at this from two perspectives - count layers and shift layers. +When you're looking within a single dataset, there are a couple factors that you need to consider for a denominator. Firstly, which grouping variables create those denominators? Let's look at this from two perspectives: count layers and shift layers. ### Count layers -Most of the complexity of denominators comes from nuanced situations. A solid 80% of the time, defaults will work. For example, in a frequency table, you will typically want data within a column to sum to 100%. For example: +Most of the complexity of denominators comes from nuanced situations. Tplyr is designed with practical defaults that suit most clinical summaries. For example, in a frequency table, you will typically want data within a column to sum to 100%, like so: ```{r} tplyr_adsl <- tplyr_adsl %>% @@ -180,9 +180,9 @@ There are some circumstances that you'll encounter where the filter used for a d Yeah we know - there are a lot of different places that filtering can happen... -So let's take the example shown below. The first layer has no layer level filtering applied, so the table level `where` is the only filter applied. The second layer has a layer level filter applied, so the denominators will be based on that layer level filter. Notice how in this case, the percentages in the second layer add up to 100%. This is because the denominator only includes values used in that layer. +So let's take the example shown below. The first layer has no layer-level filtering applied, so the table-level `where` is the only filter applied. The second layer has a layer-level filter applied, so the denominators will be based on that layer-level filter. Notice how in this case, the percentages in the second layer add up to 100%. This is because the denominator only includes values used in that layer. -The third layer has a layer level filter applied, but additionally uses `set_denom_where()`. The `set_denom_where()` in this example is actually *removing* the layer level filter for the denominators. This is because in R, when you filter using `TRUE`, the filter returns all records. So by using `TRUE` in `set_denom_where()`, the layer level filter is effectively removed. This causes the denominator to include all values available from the table and not just those selected for that layer - so for this layer, the percentages will *not add up to 100%*. This is important - this allows the percentages from Layer 3 to sum to the total percentage of "DISCONTINUED" from Layer 1. +The third layer has a layer-level filter applied, but additionally uses `set_denom_where()`. The `set_denom_where()` in this example is actually *removing* the layer-level filter for the denominators. This is because in R, when you filter using `TRUE`, the filter returns all records. So by using `TRUE` in `set_denom_where()`, the layer-level filter is effectively removed. This causes the denominator to include all values available from the table and not just those selected for that layer - so for this layer, the percentages will *not add up to 100%*. This is important - this allows the percentages from Layer 3 to sum to the total percentage of "DISCONTINUED" from Layer 1. ```{r} tplyr_adsl2 <- tplyr_adsl %>% @@ -210,9 +210,9 @@ t %>% Missing counts are a tricky area for frequency tables, and they play directly in with denominators as well. These values raise a number of questions. For example, do you want to format the missing counts the same way as the event counts? Do you want to present missing counts with percentages? Do missing counts belong in the denominator? -The `set_missing_count()` function can take a new `f_str()` object to set the display of missing values. If not specified, the associated count layer's format will be used. Using the `...` parameter, you are able to specify the row label desired for missing values and values that you determine to be considered 'missing'. For example, you may have NA values in the target variable, and then values like "Not Collected" that you also wish to consider "missing". `set_missing_count()` allows you to group those together. Actually - you're able to establish as many different "missing" groups as you want - even though that scenario is fairly unlikely. +The `set_missing_count()` function can take a new `f_str()` object to set the display of missing values. If not specified, the associated count layer's format will be used. Using the `...` parameter, you are able to specify the row label desired for missing values and values that you determine to be considered 'missing'. For example, you may have NA values in the target variable, and then values like "Not Collected" that you also wish to consider "missing". `set_missing_count()` allows you to group those together. Actually you're able to establish as many different "missing" groups as you want - even though that scenario is fairly unlikely. -In the example below 50 random values are removed and NA is specified as the missing string. This leads us to another parameter - `denom_ignore`. By default, if you specify missing values they will still be considered within the denominator, but when you have missing counts, you may wish to exclude them from the totals being summarized. By setting `denom_ignore` to TRUE, your denominators will ignore any groups of missing values that you've specified. +In the example below, 50 random values are removed and NA is specified as the missing string. This leads us to another parameter: `denom_ignore`. By default, Tplyr will include missing values within the denominator, but you may wish to exclude them from the totals being summarized. By setting `denom_ignore` to TRUE, your denominators will ignore any groups of missing values that you've specified. ```{r} tplyr_adae2 <- tplyr_adae @@ -231,11 +231,11 @@ t %>% kable() ``` -We did one more other thing worth explaining in the example above - gave the missing count its own sort value. If you leave this field null, it will simply be the maximum value in the order layer plus 1, to put the Missing counts at the bottom during an ascending sort. But tables can be sorted a lot of different ways, as you'll see in the sort vignette. So instead of trying to come up with novel ways for you to control where the missing row goes - we decided to just let you specify your own value. +We did one more other thing worth explaining in the example above - we gave the missing count its own sort value. If you leave this field null, it will simply be the maximum value in the order layer plus 1, to put the Missing counts at the bottom during an ascending sort. But tables can be sorted a lot of different ways, as you'll see in the sort vignette. So instead of trying to come up with novel ways for you to control where the missing row goes, we decided to just let you specify your own value. ## Missing Subjects -Missing counts and counting missing subjects work two different ways within Tplyr. Missing counts, as described above, will examine the records present in the data and collect and missing values. But for these results to be counted, they need to first be provided within the input data itself. On the other hand, missing subjects are calculated by looking at the difference between the potential number of subjects within the column (i.e. the combination of the treatment variables and column variables) and the number of subjects actually present. Consider this example: +Missing counts and counting missing subjects work two different ways within Tplyr. Missing counts, as described above, will examine the records present in the data and collect any missing values. But for these results to be counted, they need to first be provided within the input data itself. On the other hand, missing subjects are calculated by looking at the difference between the *potential* number of subjects within the column (i.e. the combination of the treatment variables and column variables) and the number of subjects *actually* present. Consider this example: ```{r missing_subs1} missing_subs <- tplyr_table(tplyr_adae, TRTA) %>% @@ -255,7 +255,7 @@ Missing counts and counting missing subjects work two different ways within Tply kable() ``` -In the example above, we produce a nested count layer. The function `add_missing_subjects_row()` triggers the addition of the new result row for which the missing subjects are calculated. The row label applied for this can be configured using `set_missing_subjects_row_label()`, and the row label itself will default to 'Missing'. Depending on your sorting needs, a `sort_value` can be applied to whatever numeric value you provide. Lastly, you can provide an `f_str()` to format the missing subjects row separately from the rest of the layer, but whatever format is applied to the layer will apply otherwise. +In the example above, we produce a nested count layer. The function `add_missing_subjects_row()` triggers the addition of the new result row for which the missing subjects are calculated. The row label applied for this can be configured using `set_missing_subjects_row_label()`, and the row label itself will default to 'Missing'. Depending on your sorting needs, a `sort_value` can be applied to whatever numeric value you provide. You can also provide an `f_str()` to format the missing subjects row separately from the rest of the layer. Note that in nested count layers, missing subject rows will generate for each independent group within the outer layer. Outer layers cannot have missing subject rows calculated individually. This would best be done in an independent layer itself, as the result would apply to the whole input target dataset. @@ -306,7 +306,7 @@ tplyr_table(tplyr_adsl2, TRT01P) %>% kable() ``` -Now the table is more intuitive. We used `set_missing_count()` to update our denominators, so missing have been excluded. Now, the total row intuitively matches the denominators used within each group, and we can see how many missing records were excluded. +Now the table is more intuitive. We used `set_missing_count()` to update our denominators, so missings have been excluded. Now, the total row intuitively matches the denominators used within each group, and we can see how many missing records were excluded. _You may have stumbled upon this portion of the vignette while searching for how to create a total column. **Tplyr** allows you to do this as well with the function `add_total_group()` and read more in `vignette("table")`._