diff --git a/.github/workflows/link_check.yml b/.github/workflows/link_check.yml new file mode 100644 index 0000000..6f64dc4 --- /dev/null +++ b/.github/workflows/link_check.yml @@ -0,0 +1,29 @@ +name: Links (Fail Fast) + +on: + pull_request: + branches: + - main + +jobs: + linkChecker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up R # Install R from CRAN + uses: r-lib/actions/setup-r@v2 + with: + r-version: '4.4.3' # You can specify a different R version if needed + + - name: Install R packages + run: | + Rscript -e 'install.packages("fs")' + shell: bash + + - name: Link Checker + uses: lycheeverse/lychee-action@v1.8.0 + with: + fail: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml new file mode 100644 index 0000000..f0e9706 --- /dev/null +++ b/.github/workflows/spellcheck.yml @@ -0,0 +1,42 @@ +name: Spellcheck +on: + pull_request: + branches: ['main'] +jobs: + Spelling: + runs-on: ubuntu-latest + container: + image: "rocker/tidyverse:4.3.1" + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Install packages + run: | + if (!require("roxygen2")) install.packages("roxygen2") + if (!require("spelling")) install.packages("spelling") + if (!require("testthat")) install.packages("testthat") + shell: Rscript {0} + + - name: Check spelling + run: | + library(spelling) + + # Check the entire package (R files, documentation, vignettes, etc.) + words <- spelling::spell_check_package() + + print(words) + + testthat::test_that(desc = "Check spelling", code = { + testthat::expect_equal( + object = nrow(words), + expected = 0, + info = if (nrow(words) > 0) { + paste("Spelling errors found:\n", + paste(capture.output(print(words)), collapse = "\n")) + } else { + "No spelling errors" + } + ) + }) + shell: Rscript {0} \ No newline at end of file diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml new file mode 100644 index 0000000..80576a8 --- /dev/null +++ b/.github/workflows/style_check.yml @@ -0,0 +1,20 @@ +name: Style +on: + pull_request: {branches: ['main']} + +jobs: + Style: + runs-on: ubuntu-latest + container: {image: "rocker/tidyverse:4.2.1"} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Install styler + run: install.packages("styler") + shell: Rscript {0} + - name: styler version + run: packageVersion("styler") + shell: Rscript {0} + - name: Run styler + run: styler::style_dir(dry = "fail") + shell: Rscript {0} diff --git a/DESCRIPTION b/DESCRIPTION index 1c19185..737abbd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,4 +46,4 @@ VignetteBuilder: knitr Encoding: UTF-8 Roxygen: list(markdown = TRUE, r6 = FALSE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 diff --git a/R/DatasetMeta.R b/R/DatasetMeta.R index 009a4cf..e229edc 100644 --- a/R/DatasetMeta.R +++ b/R/DatasetMeta.R @@ -10,64 +10,64 @@ DatasetMeta <- R6::R6Class("DatasetMeta", inherit = MetaCore, private = list( - .name = NA, - .label = NA, - .num_vars = NA, - .key_vars = NA, - - .greet = function(quiet) { - cli_par() - cli_alert_success("{private$.name} dataset successfully selected") - if (quiet) { - cli_inform(c( - "i" = col_red("Dataset metadata specification subsetted with suppressed warnings"))) - } - cli_end() - } + .name = NA, + .label = NA, + .num_vars = NA, + .key_vars = NA, + .greet = function(quiet) { + cli_par() + cli_alert_success("{private$.name} dataset successfully selected") + if (quiet) { + cli_inform(c( + "i" = col_red("Dataset metadata specification subsetted with suppressed warnings") + )) + } + cli_end() + } ), - public = list( - initialize = function(metacore, quiet = FALSE) { - super$initialize( - ds_spec = metacore$ds_spec, - ds_vars = metacore$ds_vars, - var_spec = metacore$var_spec, - value_spec = metacore$value_spec, - derivations = metacore$derivations, - codelist = metacore$codelist, - supp = metacore$supp - ) - private$.name = metacore$ds_spec$dataset[1] - private$.label = metacore$ds_spec$label[1] - private$.num_vars = metacore$ds_vars |> nrow() - private$.key_vars = metacore$ds_vars |> - filter(!is.na(key_seq)) |> - pull(variable) - - private$.greet(quiet) - }, + initialize = function(metacore, quiet = FALSE) { + super$initialize( + ds_spec = metacore$ds_spec, + ds_vars = metacore$ds_vars, + var_spec = metacore$var_spec, + value_spec = metacore$value_spec, + derivations = metacore$derivations, + codelist = metacore$codelist, + supp = metacore$supp + ) + private$.name <- metacore$ds_spec$dataset[1] + private$.label <- metacore$ds_spec$label[1] + private$.num_vars <- metacore$ds_vars |> nrow() + private$.key_vars <- metacore$ds_vars |> + filter(!is.na(key_seq)) |> + pull(variable) - print = function() { - tables <- ls(envir = self) + private$.greet(quiet) + }, + print = function() { + tables <- ls(envir = self) - cli_par() - cli_rule(left = "Dataset specification object for {private$.name} ({private$.label})") - cli_text("The dataset contains {private$.num_vars} variable{?s}") - cli_text("Dataset key{?s}: {ansi_collapse(private$.key_vars, last = ', ')}") - cli_end() + cli_par() + cli_rule(left = "Dataset specification object for {private$.name} ({private$.label})") + cli_text("The dataset contains {private$.num_vars} variable{?s}") + cli_text("Dataset key{?s}: {ansi_collapse(private$.key_vars, last = ', ')}") + cli_end() - cli_par() - cli_text("The structure of the specification object is:") - for (table in tables) { - obj <- get(table, envir = self) - if (!is.list(obj)) { next } - cli_bullets(c(">" = "{table}: {typeof(table)} [{dim(obj)[1]} x {dim(obj)[2]}] {ansi_collapse(names(obj), last = ', ')}")) + cli_par() + cli_text("The structure of the specification object is:") + for (table in tables) { + obj <- get(table, envir = self) + if (!is.list(obj)) { + next } - cli_end() + cli_bullets(c(">" = "{table}: {typeof(table)} [{dim(obj)[1]} x {dim(obj)[2]}] {ansi_collapse(names(obj), last = ', ')}")) + } + cli_end() - cli_div() - cli_text("To inspect the specification object use {.fn View} in the console.") - cli_end() - } + cli_div() + cli_text("To inspect the specification object use {.fn View} in the console.") + cli_end() + } ) ) diff --git a/R/checks.R b/R/checks.R index c7807ab..fe79859 100644 --- a/R/checks.R +++ b/R/checks.R @@ -29,20 +29,20 @@ #' #' check_inconsistent_types(metacore) #' @rdname checks -check_inconsistent_labels <- function(metacore){ - basic_check(label, metacore) +check_inconsistent_labels <- function(metacore) { + basic_check(label, metacore) } #' @export #' @rdname checks -check_inconsistent_types <- function(metacore){ - basic_check(type, metacore) +check_inconsistent_types <- function(metacore) { + basic_check(type, metacore) } #' @export #' @rdname checks -check_inconsistent_formats <- function(metacore){ - basic_check(format, metacore) +check_inconsistent_formats <- function(metacore) { + basic_check(format, metacore) } #' Base code for running cross variable checks @@ -55,32 +55,34 @@ check_inconsistent_formats <- function(metacore){ #' @noRd #' @importFrom stringr str_glue #' @importFrom dplyr across -basic_check <- function(col_to_check, metacore){ - if(!is_metacore(metacore)){ - cli_abort("Expects a metacore object", call. = FALSE) - } +basic_check <- function(col_to_check, metacore) { + if (!is_metacore(metacore)) { + cli_abort("Expects a metacore object", call. = FALSE) + } - report_df <- metacore$var_spec %>% - mutate(var1 = str_remove(variable, "[[:alnum:]]+\\.")) %>% - group_by(var1) %>% - mutate(n_lab = n_distinct({{col_to_check}})) %>% - filter(n_lab > 1) %>% - mutate(across(everything(), remove_label)) %>% - group_by(var1, {{col_to_check}}) %>% - summarise(n_vars = n(), - ls_of_vars = list(variable), - .groups = "drop") %>% - select(variable = var1, everything()) + report_df <- metacore$var_spec %>% + mutate(var1 = str_remove(variable, "[[:alnum:]]+\\.")) %>% + group_by(var1) %>% + mutate(n_lab = n_distinct({{ col_to_check }})) %>% + filter(n_lab > 1) %>% + mutate(across(everything(), remove_label)) %>% + group_by(var1, {{ col_to_check }}) %>% + summarise( + n_vars = n(), + ls_of_vars = list(variable), + .groups = "drop" + ) %>% + select(variable = var1, everything()) - if(nrow(report_df) > 0){ - cli_warn(str_glue("Mismatch {as_label(enexpr(col_to_check))}s detected")) - return(report_df) - } else { - cli_inform(str_glue("No mismatch {as_label(enexpr(col_to_check))}s detected")) - } + if (nrow(report_df) > 0) { + cli_warn(str_glue("Mismatch {as_label(enexpr(col_to_check))}s detected")) + return(report_df) + } else { + cli_inform(str_glue("No mismatch {as_label(enexpr(col_to_check))}s detected")) + } } remove_label <- function(x) { - attr(x, "label") <- NULL - x + attr(x, "label") <- NULL + x } diff --git a/R/metacore.R b/R/metacore.R index 7567206..094a7ce 100644 --- a/R/metacore.R +++ b/R/metacore.R @@ -18,77 +18,93 @@ #' @noRd #' #' @importFrom stringr str_to_lower -MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp, quiet = FALSE){ - - private$.ds_spec <- ds_spec %>% - add_labs(dataset = "Dataset Name", - structure = "Value Structure", - label = "Dataset Label") - - private$.ds_vars <- ds_vars %>% - add_labs(dataset = "Dataset Name", - variable = "Variable Name", - key_seq = "Sequence Key", - order = "Variable Order", - mandatory = "Mandatory (Boolean)", - core = "ADaM core (Expected, Required, Permissible)", - supp_flag = "Supplemental Flag") - - private$.var_spec <- var_spec %>% - add_labs(variable = "Variable Name", - length = "Variable Length", - label = "Variable Label", - type = "Variable Class", - common = "Common Across ADaM", - format = "Variable Format") - - private$.value_spec <- value_spec %>% - add_labs(type = "Value Type", - orgin = "Origin of Value", - code_id = "ID of the Code List", - dataset = "Dataset Name", - variable = "Variable Name", - where = "Value of the Variable", - derivation_id = "ID of Derivation") %>% - mutate(origin = str_to_lower(.data$origin)) - - - private$.derivations <- derivations %>% - add_labs(derivation_id = "ID of Derivation", - derivation = "Derivation") - - private$.codelist <- codelist %>% - add_labs(code_id = "ID of the Code List", - names = "Name of the Code List", - type = "Code List/Permitted Values/External Library", - codes = "List of Codes") - - private$.codelist <- codelist %>% - add_labs(code_id = "ID of the Code List", - names = "Name of the Code List", - type = "Code List/Permitted Values/External Library", - codes = "List of Codes") - - private$.supp <- supp %>% - add_labs(dataset = "Dataset Name", - variable = "Variable Name", - idvar = "Identifying Variable", - qeval = "Evaluator") - - private$.ds_len <- ds_spec %>% nrow() - - private$.ds_names <- ds_spec %>% pull(dataset) - - private$.ds_labels <- ds_spec %>% pull(label) - if(quiet){ - suppressWarnings(self$validate()) - } - else { - self$validate() - } - - - if (inherits_only(self, c("Metacore", "R6"))) { private$.greet(quiet) } +MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp, quiet = FALSE) { + private$.ds_spec <- ds_spec %>% + add_labs( + dataset = "Dataset Name", + structure = "Value Structure", + label = "Dataset Label" + ) + + private$.ds_vars <- ds_vars %>% + add_labs( + dataset = "Dataset Name", + variable = "Variable Name", + key_seq = "Sequence Key", + order = "Variable Order", + mandatory = "Mandatory (Boolean)", + core = "ADaM core (Expected, Required, Permissible)", + supp_flag = "Supplemental Flag" + ) + + private$.var_spec <- var_spec %>% + add_labs( + variable = "Variable Name", + length = "Variable Length", + label = "Variable Label", + type = "Variable Class", + common = "Common Across ADaM", + format = "Variable Format" + ) + + private$.value_spec <- value_spec %>% + add_labs( + type = "Value Type", + orgin = "Origin of Value", + code_id = "ID of the Code List", + dataset = "Dataset Name", + variable = "Variable Name", + where = "Value of the Variable", + derivation_id = "ID of Derivation" + ) %>% + mutate(origin = str_to_lower(.data$origin)) + + + private$.derivations <- derivations %>% + add_labs( + derivation_id = "ID of Derivation", + derivation = "Derivation" + ) + + private$.codelist <- codelist %>% + add_labs( + code_id = "ID of the Code List", + names = "Name of the Code List", + type = "Code List/Permitted Values/External Library", + codes = "List of Codes" + ) + + private$.codelist <- codelist %>% + add_labs( + code_id = "ID of the Code List", + names = "Name of the Code List", + type = "Code List/Permitted Values/External Library", + codes = "List of Codes" + ) + + private$.supp <- supp %>% + add_labs( + dataset = "Dataset Name", + variable = "Variable Name", + idvar = "Identifying Variable", + qeval = "Evaluator" + ) + + private$.ds_len <- ds_spec %>% nrow() + + private$.ds_names <- ds_spec %>% pull(dataset) + + private$.ds_labels <- ds_spec %>% pull(label) + if (quiet) { + suppressWarnings(self$validate()) + } else { + self$validate() + } + + + if (inherits_only(self, c("Metacore", "R6"))) { + private$.greet(quiet) + } } @@ -98,65 +114,63 @@ MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivati #' @family Metacore #' @noRd #' -MetaCore_print <- function(...){ - cli_par() - cli_rule("Metacore object contains metadata for {private$.ds_len} datasets") - for (i in 1:private$.ds_len) { - cli_bullets(c(">" = "{private$.ds_names[i]} ({private$.ds_labels[i]})")) - } - cli_end() +MetaCore_print <- function(...) { + cli_par() + cli_rule("Metacore object contains metadata for {private$.ds_len} datasets") + for (i in 1:private$.ds_len) { + cli_bullets(c(">" = "{private$.ds_names[i]} ({private$.ds_labels[i]})")) + } + cli_end() - cli_inform("To use the {.obj Metacore} object with {.pkg metatools} package, first subset a dataset using {.fn metacore::select_dataset}") + cli_inform("To use the {.obj Metacore} object with {.pkg metatools} package, first subset a dataset using {.fn metacore::select_dataset}") } - #' Metacore R6 object validation function #' #' This checks that the labels and lengths of ds_vars match var_spec #' @family Metacore #' @noRd #' -MetaCore_validate <- function() { - if(var_name_check(private)){ - - if(nrow(private$.ds_spec) == 0 & - nrow(private$.ds_vars) == 0 & - nrow(private$.var_spec) == 0 & - nrow(private$.value_spec) == 0 & - nrow(private$.derivations) == 0 & - nrow(private$.codelist) == 0 & - nrow(private$.supp) == 0 ){ - cli_warn("Other checks were not preformed, because all datasets are empty", - call. = FALSE) - } else { - check_columns(private$.ds_spec, - private$.ds_vars, - private$.var_spec, - private$.value_spec, - private$.derivations, - private$.codelist, - private$.supp - ) - - ds_vars_check(private$.ds_vars, private$.var_spec) - value_check(private$.ds_vars, private$.value_spec) - derivation_check(private$.value_spec, private$.derivations) - codelist_check(private$.value_spec, private$.codelist) - if(nrow(private$.supp) == 0){ - supp_check(private$.ds_vars, private$.supp) - } +MetaCore_validate <- function() { + if (var_name_check(private)) { + if (nrow(private$.ds_spec) == 0 & + nrow(private$.ds_vars) == 0 & + nrow(private$.var_spec) == 0 & + nrow(private$.value_spec) == 0 & + nrow(private$.derivations) == 0 & + nrow(private$.codelist) == 0 & + nrow(private$.supp) == 0) { + cli_warn("Other checks were not preformed, because all datasets are empty", + call. = FALSE + ) + } else { + check_columns( + private$.ds_spec, + private$.ds_vars, + private$.var_spec, + private$.value_spec, + private$.derivations, + private$.codelist, + private$.supp + ) + ds_vars_check(private$.ds_vars, private$.var_spec) + value_check(private$.ds_vars, private$.value_spec) + derivation_check(private$.value_spec, private$.derivations) + codelist_check(private$.value_spec, private$.codelist) + if (nrow(private$.supp) == 0) { + supp_check(private$.ds_vars, private$.supp) } - - } else { - cli_warn("Other checks were not preformed, because column names were incorrect", - call. = FALSE) - } + } + } else { + cli_warn("Other checks were not preformed, because column names were incorrect", + call. = FALSE + ) + } } - #' readonly function factory #' #' This function is used inside the R6 active method and allows us @@ -168,61 +182,63 @@ MetaCore_validate <- function() { #' @noRd #' readonly <- function(name) { - private <- NULL - inside <- function(value) { - name <- attr(sys.function(sys.parent()), "name") - if (missing(value)) { - private[[paste0(".", name)]] - } else { - cli_abort("{name} is read only", call. = FALSE) - } - } - attributes(inside) <- list(name = name) - inside + private <- NULL + inside <- function(value) { + name <- attr(sys.function(sys.parent()), "name") + if (missing(value)) { + private[[paste0(".", name)]] + } else { + cli_abort("{name} is read only", call. = FALSE) + } + } + attributes(inside) <- list(name = name) + inside } #' Select method to subset by a single dataframe #' @param value the dataframe to subset by #' MetaCore_filter <- function(value) { - - private$.ds_spec <- private$.ds_spec %>% filter(dataset == value) - if(nrow(private$.ds_spec) == 0){ - cli_abort("{value} is not a dataset in the metacore object", call. = FALSE) - } - private$.ds_vars <- private$.ds_vars %>% filter(dataset == value) - private$.value_spec <- private$.value_spec %>% filter(dataset == value) - - - # Need clarity on X.Y.Z situation: SUPPY8.QVAL - private$.var_spec <- private$.var_spec %>% - # variables have the dataset prefix so we make this into its own column - mutate(dataset = ifelse(str_detect(variable, "\\."), str_extract(variable, "^.*(?=\\.)"), ""), - variable = str_remove(variable, "^.*\\.") - ) %>% - # then keep the variables that occur once or in the dataset to filter - filter(dataset == "" | dataset == value) %>% - # remove the temporary column - select(-dataset) %>% - # right join - right_join(private$.ds_vars %>% select(variable), by="variable", - multiple = "all") %>% - distinct(variable, .keep_all = TRUE) # for when duplicates gett through and have different lables but the same name - - # Get values/variables that need derivations - val_deriv <- private$.value_spec %>% - distinct(.data$derivation_id) %>% - na.omit() - - private$.derivations <- private$.derivations %>% - right_join(val_deriv, by = "derivation_id", multiple = "all") - - private$.codelist <- private$.codelist %>% - right_join(private$.value_spec %>% - distinct(.data$code_id) %>% - na.omit(), by = "code_id", multiple = "all") - - private$.supp <- private$.supp %>% filter(dataset == value) + private$.ds_spec <- private$.ds_spec %>% filter(dataset == value) + if (nrow(private$.ds_spec) == 0) { + cli_abort("{value} is not a dataset in the metacore object", call. = FALSE) + } + private$.ds_vars <- private$.ds_vars %>% filter(dataset == value) + private$.value_spec <- private$.value_spec %>% filter(dataset == value) + + + # Need clarity on X.Y.Z situation: SUPPY8.QVAL + private$.var_spec <- private$.var_spec %>% + # variables have the dataset prefix so we make this into its own column + mutate( + dataset = ifelse(str_detect(variable, "\\."), str_extract(variable, "^.*(?=\\.)"), ""), + variable = str_remove(variable, "^.*\\.") + ) %>% + # then keep the variables that occur once or in the dataset to filter + filter(dataset == "" | dataset == value) %>% + # remove the temporary column + select(-dataset) %>% + # right join + right_join(private$.ds_vars %>% select(variable), + by = "variable", + multiple = "all" + ) %>% + distinct(variable, .keep_all = TRUE) # for when duplicates gett through and have different lables but the same name + + # Get values/variables that need derivations + val_deriv <- private$.value_spec %>% + distinct(.data$derivation_id) %>% + na.omit() + + private$.derivations <- private$.derivations %>% + right_join(val_deriv, by = "derivation_id", multiple = "all") + + private$.codelist <- private$.codelist %>% + right_join(private$.value_spec %>% + distinct(.data$code_id) %>% + na.omit(), by = "code_id", multiple = "all") + + private$.supp <- private$.supp %>% filter(dataset == value) } #' The Metacore R6 Class @@ -234,85 +250,82 @@ MetaCore_filter <- function(value) { #' @noRd # MetaCore <- R6::R6Class("Metacore", - public = list( - initialize = MetaCore_initialize, - print = MetaCore_print, - validate = MetaCore_validate, - metacore_filter = MetaCore_filter - ), - - private = list( - .ds_spec = tibble( - dataset = character(), - structure = character(), - label = character() - ), - .ds_vars = tibble( - dataset = character(), - variable = character(), - mandatory = logical(), - key_seq = integer(), - order = integer(), - core = character(), - supp_flag = logical() - ), - .var_spec = tibble( - variable = character(), - label = character(), - length = integer(), - type = character(), - common = character(), - format = character() - ), - .value_spec = tibble( - dataset = character(), - variable = character(), - where = character(), - type = character(), - sig_dig = integer(), - code_id = character(), - origin = character(), - derivation_id = integer() - ), - .derivations = tibble( - derivation_id = integer(), - derivation = character() - ), - # code_type == df | permitted_val | external_lib - .codelist = tibble( - code_id = character(), - name = character(), - type = character(), - codes = list() - ), - .supp = tibble( - dataset = character(), - variable = character(), - idvar = character(), - qeval = character() - ), - .ds_len = NA, - .ds_names = list(), - .ds_labels = list(), - - .greet = function(quiet = FALSE) { - cli_par() - cli_alert_success("Metadata successfully imported") - if (quiet) cli_inform(c("i" = col_red("Dataset metadata imported with suppressed warnings"))) - cli_inform(c("i" = "To use the {.obj Metacore} object with {.pkg metatools} package, first subset a dataset using {.fn metacore::select_dataset}")) - cli_end() - } - ), - - active = list( - ds_spec = readonly('ds_spec'), - ds_vars = readonly('ds_vars'), - var_spec = readonly('var_spec'), - value_spec = readonly('value_spec'), - derivations = readonly('derivations'), - codelist = readonly('codelist'), - supp = readonly('supp') - ) + public = list( + initialize = MetaCore_initialize, + print = MetaCore_print, + validate = MetaCore_validate, + metacore_filter = MetaCore_filter + ), + private = list( + .ds_spec = tibble( + dataset = character(), + structure = character(), + label = character() + ), + .ds_vars = tibble( + dataset = character(), + variable = character(), + mandatory = logical(), + key_seq = integer(), + order = integer(), + core = character(), + supp_flag = logical() + ), + .var_spec = tibble( + variable = character(), + label = character(), + length = integer(), + type = character(), + common = character(), + format = character() + ), + .value_spec = tibble( + dataset = character(), + variable = character(), + where = character(), + type = character(), + sig_dig = integer(), + code_id = character(), + origin = character(), + derivation_id = integer() + ), + .derivations = tibble( + derivation_id = integer(), + derivation = character() + ), + # code_type == df | permitted_val | external_lib + .codelist = tibble( + code_id = character(), + name = character(), + type = character(), + codes = list() + ), + .supp = tibble( + dataset = character(), + variable = character(), + idvar = character(), + qeval = character() + ), + .ds_len = NA, + .ds_names = list(), + .ds_labels = list(), + .greet = function(quiet = FALSE) { + cli_par() + cli_alert_success("Metadata successfully imported") + if (quiet) cli_inform(c("i" = col_red("Dataset metadata imported with suppressed warnings"))) + cli_inform(c("i" = "To use the {.obj Metacore} object with {.pkg metatools} package, first subset a dataset using {.fn metacore::select_dataset}")) + cli_end() + } + ), + active = list( + ds_spec = readonly("ds_spec"), + ds_vars = readonly("ds_vars"), + var_spec = readonly("var_spec"), + value_spec = readonly("value_spec"), + derivations = readonly("derivations"), + codelist = readonly("codelist"), + supp = readonly("supp") + ) ) @@ -332,74 +345,78 @@ MetaCore <- R6::R6Class("Metacore", #' #' @export #' -metacore <- function( - ds_spec = tibble(dataset = character(), structure = character(), label = character()), - ds_vars = tibble(dataset = character(), variable = character(), mandatory = logical(), +metacore <- function(ds_spec = tibble(dataset = character(), structure = character(), label = character()), + ds_vars = tibble( + dataset = character(), variable = character(), mandatory = logical(), key_seq = integer(), order = integer(), core = character(), - supp_flag = logical()), - var_spec = tibble(variable = character(), label = character(), length = integer(), - type = character(), common = character(), format = character()), - value_spec = tibble(dataset = character(), - variable = character(), - where = character(), - type = character(), - sig_dig = integer(), - code_id = character(), - origin = character(), - derivation_id = integer()), - derivations = tibble(derivation_id = integer(), derivation = character()), - codelist = tibble(code_id = character(), name = character(), type = character(), codes = list()), - supp = tibble(dataset = character(), variable = character(), idvar = character(), qeval = character()), - quiet = FALSE -) { - - test <- quiet_if_true({ - + supp_flag = logical() + ), + var_spec = tibble( + variable = character(), label = character(), length = integer(), + type = character(), common = character(), format = character() + ), + value_spec = tibble( + dataset = character(), + variable = character(), + where = character(), + type = character(), + sig_dig = integer(), + code_id = character(), + origin = character(), + derivation_id = integer() + ), + derivations = tibble(derivation_id = integer(), derivation = character()), + codelist = tibble(code_id = character(), name = character(), type = character(), codes = list()), + supp = tibble(dataset = character(), variable = character(), idvar = character(), qeval = character()), + quiet = FALSE) { + test <- quiet_if_true( + { is_empty_df <- as.list(environment()) %>% - keep(is.null) + keep(is.null) if (length(is_empty_df) > 0) { - to_replace <- all_message() %>% - mutate( - convert = map(.data$test, function(x) { - if (identical(x, .Primitive("is.numeric"))) { - numeric() - } else if (identical(x, .Primitive("is.logical"))) { - logical() - } else { - character() - } - }) - ) %>% - filter(.data$dataset %in% names(is_empty_df)) %>% - group_by(.data$dataset) %>% - group_split() - - replaced <- to_replace %>% - map(function(df) { - names(df$convert) <- df$var - df$convert %>% - as_tibble() + to_replace <- all_message() %>% + mutate( + convert = map(.data$test, function(x) { + if (identical(x, .Primitive("is.numeric"))) { + numeric() + } else if (identical(x, .Primitive("is.logical"))) { + logical() + } else { + character() + } }) - - names(replaced) <- to_replace %>% map_chr(~ unique(.x$dataset)) - list2env(replaced, environment()) + ) %>% + filter(.data$dataset %in% names(is_empty_df)) %>% + group_by(.data$dataset) %>% + group_split() + + replaced <- to_replace %>% + map(function(df) { + names(df$convert) <- df$var + df$convert %>% + as_tibble() + }) + + names(replaced) <- to_replace %>% map_chr(~ unique(.x$dataset)) + list2env(replaced, environment()) } MetaCore$new( - ds_spec = ds_spec, - ds_vars = ds_vars, - var_spec = var_spec, - value_spec = value_spec, - derivations = derivations, - codelist = codelist, - supp = supp, - quiet = quiet + ds_spec = ds_spec, + ds_vars = ds_vars, + var_spec = var_spec, + value_spec = value_spec, + derivations = derivations, + codelist = codelist, + supp = supp, + quiet = quiet ) + }, + quiet = quiet + ) - }, quiet = quiet) - - if (quiet) invisible(test) else test + if (quiet) invisible(test) else test } @@ -414,34 +431,32 @@ metacore <- function( #' @return a filtered subset of the metacore object #' @export select_dataset <- function(.data, dataset, simplify = FALSE, quiet = FALSE) { - - cl <- .data$clone() - cl$metacore_filter(dataset) - - if (simplify) { - - test <- quiet_if_true({ - list( - cl$ds_vars, - cl$var_spec, - cl$value_spec, - cl$derivations, - select(cl$codelist, code_id, codes), - cl$supp - ) %>% - reduce(left_join) - }, quiet = quiet) - - } else { - - test <- quiet_if_true( - DatasetMeta$new(metacore = cl, quiet = quiet), - quiet = quiet - ) - - } - - if (quiet) invisible(test) else test + cl <- .data$clone() + cl$metacore_filter(dataset) + + if (simplify) { + test <- quiet_if_true( + { + list( + cl$ds_vars, + cl$var_spec, + cl$value_spec, + cl$derivations, + select(cl$codelist, code_id, codes), + cl$supp + ) %>% + reduce(left_join) + }, + quiet = quiet + ) + } else { + test <- quiet_if_true( + DatasetMeta$new(metacore = cl, quiet = quiet), + quiet = quiet + ) + } + + if (quiet) invisible(test) else test } @@ -468,41 +483,43 @@ select_dataset <- function(.data, dataset, simplify = FALSE, quiet = FALSE) { #' get_control_term(meta_ex, QVAL, SUPPAE) #' get_control_term(meta_ex, "QVAL", "SUPPAE") #' } -get_control_term <- function(metacode, variable, dataset = NULL){ - var_str <- ifelse(str_detect(as_label(enexpr(variable)), "\""), - as_name(variable), as_label(enexpr(variable))) - dataset_val <- ifelse(str_detect(as_label(enexpr(dataset)), "\""), - as_name(dataset), as_label(enexpr(dataset))) # to make the filter more explicit - if(!var_str %in% metacode$value_spec$variable){ - cli_abort("{var_str} not found in the value_spec table. Please check the variable name") - } - if(dataset_val == "NULL"){ - var_code_id <- metacode$value_spec %>% - filter(variable == var_str) %>% - pull(code_id) %>% - unique() - } else { - subset_data <- metacode$value_spec %>% - filter(dataset == dataset_val) - if(nrow(subset_data) == 0){ - cli_abort("{dataset_val} not found in the value_spec table. Please check the dataset name") - } - var_code_id <- subset_data %>% - filter(variable == var_str) %>% - pull(code_id) %>% - unique() - } - if(length(var_code_id) > 1){ - cli_abort("{var_str} does not have a unique control term, consider spcificing a dataset") - } - ct <- metacode$codelist %>% - filter(code_id == var_code_id) %>% - pull(codes) - if(length(ct) == 0){ - cli_inform("{var_str} has no control terminology") - } else { - return(ct[[1]]) - } +get_control_term <- function(metacode, variable, dataset = NULL) { + var_str <- ifelse(str_detect(as_label(enexpr(variable)), "\""), + as_name(variable), as_label(enexpr(variable)) + ) + dataset_val <- ifelse(str_detect(as_label(enexpr(dataset)), "\""), + as_name(dataset), as_label(enexpr(dataset)) + ) # to make the filter more explicit + if (!var_str %in% metacode$value_spec$variable) { + cli_abort("{var_str} not found in the value_spec table. Please check the variable name") + } + if (dataset_val == "NULL") { + var_code_id <- metacode$value_spec %>% + filter(variable == var_str) %>% + pull(code_id) %>% + unique() + } else { + subset_data <- metacode$value_spec %>% + filter(dataset == dataset_val) + if (nrow(subset_data) == 0) { + cli_abort("{dataset_val} not found in the value_spec table. Please check the dataset name") + } + var_code_id <- subset_data %>% + filter(variable == var_str) %>% + pull(code_id) %>% + unique() + } + if (length(var_code_id) > 1) { + cli_abort("{var_str} does not have a unique control term, consider spcificing a dataset") + } + ct <- metacode$codelist %>% + filter(code_id == var_code_id) %>% + pull(codes) + if (length(ct) == 0) { + cli_inform("{var_str} has no control terminology") + } else { + return(ct[[1]]) + } } @@ -524,23 +541,24 @@ get_control_term <- function(metacode, variable, dataset = NULL){ #' get_keys(meta_ex, "AE") #' get_keys(meta_ex, AE) #' } -get_keys <- function(metacode, dataset){ - dataset_val <- ifelse(str_detect(as_label(enexpr(dataset)), "\""), - as_name(dataset), as_label(enexpr(dataset))) # to make the filter more explicit +get_keys <- function(metacode, dataset) { + dataset_val <- ifelse(str_detect(as_label(enexpr(dataset)), "\""), + as_name(dataset), as_label(enexpr(dataset)) + ) # to make the filter more explicit - subset_data <- metacode$ds_vars %>% - filter(dataset == dataset_val) - if(nrow(subset_data) == 0){ - cli_abort("{dataset_val} not found in the ds_vars table. Please check the dataset name") - } + subset_data <- metacode$ds_vars %>% + filter(dataset == dataset_val) + if (nrow(subset_data) == 0) { + cli_abort("{dataset_val} not found in the ds_vars table. Please check the dataset name") + } - keys <- subset_data %>% - filter(!is.na(key_seq)) %>% - select(variable, key_seq) + keys <- subset_data %>% + filter(!is.na(key_seq)) %>% + select(variable, key_seq) - keys <- keys[order(keys$key_seq),] + keys <- keys[order(keys$key_seq), ] - return(keys) + return(keys) } @@ -553,26 +571,26 @@ get_keys <- function(metacode, dataset){ #' @export #' save_metacore <- function(metacore_object, path = NULL) { - # if no path save to working directory - # with same name as object - if (is.null(path)) { - nm <- deparse(substitute(metacore_object)) - path <- paste0(nm, ".rds") - - # check the suffix of the path - } else { - suffix <- str_extract(path, "\\.\\w*$") - # if the extension is .rda keep it - if (suffix == ".rds") { - path <- path - - # otherwise we need to replace it with .rda - } else { - prefix <- str_remove(path, "\\.\\w*$") - path <- paste0(prefix, ".rds") - } - } - saveRDS(metacore_object, path) + # if no path save to working directory + # with same name as object + if (is.null(path)) { + nm <- deparse(substitute(metacore_object)) + path <- paste0(nm, ".rds") + + # check the suffix of the path + } else { + suffix <- str_extract(path, "\\.\\w*$") + # if the extension is .rda keep it + if (suffix == ".rds") { + path <- path + + # otherwise we need to replace it with .rda + } else { + prefix <- str_remove(path, "\\.\\w*$") + path <- paste0(prefix, ".rds") + } + } + saveRDS(metacore_object, path) } #' load metacore object @@ -582,14 +600,16 @@ save_metacore <- function(metacore_object, path = NULL) { #' @return metacore object in memory #' @export load_metacore <- function(path = NULL) { - if (is.null(path)) { - rdss <- list.files(".", ".rds") - if (length(rdss) == 0) { - cli_abort("please supply path to metacore object ending with extension .rds", call. = FALSE) - } else { - cli_abort("metacore object path required, did you mean:", - paste(" ", rdss, sep = "\n "), call. = FALSE) - } - } - readRDS(path) + if (is.null(path)) { + rdss <- list.files(".", ".rds") + if (length(rdss) == 0) { + cli_abort("please supply path to metacore object ending with extension .rds", call. = FALSE) + } else { + cli_abort("metacore object path required, did you mean:", + paste(" ", rdss, sep = "\n "), + call. = FALSE + ) + } + } + readRDS(path) } diff --git a/R/spec_builder.R b/R/spec_builder.R index 9f1c875..18fd5a0 100644 --- a/R/spec_builder.R +++ b/R/spec_builder.R @@ -14,43 +14,40 @@ #' #' @return given a spec document it returns a metacore object #' @export -spec_to_metacore <- function(path, quiet = FALSE, where_sep_sheet = TRUE){ - - doc <- quiet_if_true(read_all_sheets(path), quiet = quiet) - - if (quiet_if_true(spec_type(path), quiet = quiet) == "by_type") { - - ds_spec <- quiet_if_true(spec_type_to_ds_spec(doc), quiet = quiet) - ds_vars <- quiet_if_true(spec_type_to_ds_vars(doc), quiet = quiet) - var_spec <- quiet_if_true(spec_type_to_var_spec(doc), quiet = quiet) - value_spec <- quiet_if_true( - spec_type_to_value_spec(doc, where_sep_sheet = where_sep_sheet), - quiet = quiet - ) - derivations <- quiet_if_true(spec_type_to_derivations(doc), quiet = quiet) - code_list <- quiet_if_true(spec_type_to_codelist(doc), quiet = quiet) - - test <- quiet_if_true( - metacore( - ds_spec, - ds_vars, - var_spec, - value_spec, - derivations, - codelist = code_list, - quiet = quiet - ), - quiet = quiet - ) - - } else { - cli_abort( - "This specification format is not currently supported. You will need to write your own reader", - call. = FALSE - ) - } - - if (quiet) invisible(test) else test +spec_to_metacore <- function(path, quiet = FALSE, where_sep_sheet = TRUE) { + doc <- quiet_if_true(read_all_sheets(path), quiet = quiet) + + if (quiet_if_true(spec_type(path), quiet = quiet) == "by_type") { + ds_spec <- quiet_if_true(spec_type_to_ds_spec(doc), quiet = quiet) + ds_vars <- quiet_if_true(spec_type_to_ds_vars(doc), quiet = quiet) + var_spec <- quiet_if_true(spec_type_to_var_spec(doc), quiet = quiet) + value_spec <- quiet_if_true( + spec_type_to_value_spec(doc, where_sep_sheet = where_sep_sheet), + quiet = quiet + ) + derivations <- quiet_if_true(spec_type_to_derivations(doc), quiet = quiet) + code_list <- quiet_if_true(spec_type_to_codelist(doc), quiet = quiet) + + test <- quiet_if_true( + metacore( + ds_spec, + ds_vars, + var_spec, + value_spec, + derivations, + codelist = code_list, + quiet = quiet + ), + quiet = quiet + ) + } else { + cli_abort( + "This specification format is not currently supported. You will need to write your own reader", + call. = FALSE + ) + } + + if (quiet) invisible(test) else test } @@ -61,25 +58,25 @@ spec_to_metacore <- function(path, quiet = FALSE, where_sep_sheet = TRUE){ #' @return returns string indicating the type of spec document #' @export #' -spec_type <- function(path){ - sheets <- excel_sheets(path) - if(!any(sheets %>% str_detect("[D|d]omains?|[D|d]atasets?"))){ - cli_abort("File does not contain a Domain/Datasets tab, which is needed. Please either modify the spec document or write a reader (see documentation for more information)", - call. = FALSE) - } else if(any(sheets %>% str_detect("ADSL|DM"))){ - type <- "by_ds" - } else if(any(sheets %>% str_detect("[V|v]ariables?"))){ - type <- "by_type" - } else { - cli_abort("File in an unknown format. Please either modify the spec document or write a reader (see documentation for more information)", - call. = FALSE) - } - type +spec_type <- function(path) { + sheets <- excel_sheets(path) + if (!any(sheets %>% str_detect("[D|d]omains?|[D|d]atasets?"))) { + cli_abort("File does not contain a Domain/Datasets tab, which is needed. Please either modify the spec document or write a reader (see documentation for more information)", + call. = FALSE + ) + } else if (any(sheets %>% str_detect("ADSL|DM"))) { + type <- "by_ds" + } else if (any(sheets %>% str_detect("[V|v]ariables?"))) { + type <- "by_type" + } else { + cli_abort("File in an unknown format. Please either modify the spec document or write a reader (see documentation for more information)", + call. = FALSE + ) + } + type } - - #' Read in all Sheets #' #' Given a path to a file, this function reads in all sheets of an excel file @@ -88,12 +85,12 @@ spec_type <- function(path){ #' @export #' #' @return a list of datasets -read_all_sheets <- function(path){ - sheets <- excel_sheets(path) - all_dat <- sheets %>% - map(~read_excel(path, sheet = ., col_types = "text")) - names(all_dat) <- sheets - all_dat +read_all_sheets <- function(path) { + sheets <- excel_sheets(path) + all_dat <- sheets %>% + map(~ read_excel(path, sheet = ., col_types = "text")) + names(all_dat) <- sheets + all_dat } @@ -112,29 +109,31 @@ read_all_sheets <- function(path){ #' @export #' #' @family spec builders -spec_type_to_ds_spec <- function(doc, cols = c("dataset" = "[N|n]ame|[D|d]ataset|[D|d]omain", - "structure" = "[S|s]tructure", - "label" = "[L|l]abel|[D|d]escription"), sheet = NULL){ - name_check <- names(cols) %in% c("dataset", "structure", "label") %>% - all() - if(!name_check | is.null(names(cols))){ - cli_abort(c( - "Supplied column vector must be named using the following names:", - "'dataset', 'structure', 'label'" - )) - } - if(!is.null(sheet)){ - sheet_ls <- str_subset(names(doc), sheet) - doc <- doc[sheet_ls] - } - - # Get missing columns - missing <- col_vars()$.ds_spec %>% - discard(~. %in% names(cols)) - - create_tbl(doc, cols) %>% - distinct() %>% - `is.na<-`(missing) +spec_type_to_ds_spec <- function(doc, cols = c( + "dataset" = "[N|n]ame|[D|d]ataset|[D|d]omain", + "structure" = "[S|s]tructure", + "label" = "[L|l]abel|[D|d]escription" + ), sheet = NULL) { + name_check <- names(cols) %in% c("dataset", "structure", "label") %>% + all() + if (!name_check | is.null(names(cols))) { + cli_abort(c( + "Supplied column vector must be named using the following names:", + "'dataset', 'structure', 'label'" + )) + } + if (!is.null(sheet)) { + sheet_ls <- str_subset(names(doc), sheet) + doc <- doc[sheet_ls] + } + + # Get missing columns + missing <- col_vars()$.ds_spec %>% + discard(~ . %in% names(cols)) + + create_tbl(doc, cols) %>% + distinct() %>% + `is.na<-`(missing) } #' Spec to ds_vars @@ -158,61 +157,70 @@ spec_type_to_ds_spec <- function(doc, cols = c("dataset" = "[N|n]ame|[D|d]ataset #' @export #' #' @family spec builders -spec_type_to_ds_vars <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]omain", - "variable" = "[V|v]ariable [[N|n]ame]?|[V|v]ariables?", - "order" = "[V|v]ariable [O|o]rder|[O|o]rder", - "mandatory" = "[K|k]eep|[M|m]andatory"), +spec_type_to_ds_vars <- function(doc, cols = c( + "dataset" = "[D|d]ataset|[D|d]omain", + "variable" = "[V|v]ariable [[N|n]ame]?|[V|v]ariables?", + "order" = "[V|v]ariable [O|o]rder|[O|o]rder", + "mandatory" = "[K|k]eep|[M|m]andatory" + ), key_seq_sep_sheet = TRUE, - key_seq_cols = c("dataset" = "Dataset", - "key_seq" = "Key Variables"), - sheet = "[V|v]ar|Datasets"){ - - name_check <- names(cols) %in% c("variable", "dataset", "order", - "mandatory", "key_seq", "core", "supp_flag") %>% - all() - - name_check_extra <- names(key_seq_cols) %in% c("dataset", "key_seq") %>% - all() %>% - ifelse(key_seq_sep_sheet, ., TRUE) # Adding it cause we only want to check when sep sheet is true - - # Testing for names of vectors - if(any(!name_check, !name_check_extra, is.null(names(cols)))){ - cli_abort("Supplied column vector must be named using the following names: + key_seq_cols = c( + "dataset" = "Dataset", + "key_seq" = "Key Variables" + ), + sheet = "[V|v]ar|Datasets") { + name_check <- names(cols) %in% c( + "variable", "dataset", "order", + "mandatory", "key_seq", "core", "supp_flag" + ) %>% + all() + + name_check_extra <- names(key_seq_cols) %in% c("dataset", "key_seq") %>% + all() %>% + ifelse(key_seq_sep_sheet, ., TRUE) # Adding it cause we only want to check when sep sheet is true + + # Testing for names of vectors + if (any(!name_check, !name_check_extra, is.null(names(cols)))) { + cli_abort("Supplied column vector must be named using the following names: 'variable', 'dataset', 'order', 'mandatory', 'core', 'key_seq', 'supp_flag'") - } - # Subsetting sheets - if(!is.null(sheet)){ - sheet_ls <- str_subset(names(doc), sheet) - doc <- doc[sheet_ls] - } - #Get base doc - out <-doc %>% - create_tbl(cols) - - # Getting the key seq values - if(key_seq_sep_sheet){ - key_seq_df <- doc %>% - create_tbl(key_seq_cols) %>% - mutate(key_seq = str_split(key_seq, ",\\s"), - key_seq = map(key_seq, function(x){ - tibble(variable = x) %>% - mutate(key_seq = row_number()) - })) %>% - unnest(key_seq) - out <- left_join(out, key_seq_df, by = c("dataset", "variable")) - } - - # Get missing columns - missing <- col_vars()$.ds_vars %>% - discard(~. %in% names(out)) - - out %>% - distinct() %>% - `is.na<-`(missing) %>% - mutate(key_seq = as.integer(.data$key_seq), - mandatory = yn_to_tf(.data$mandatory), - core = as.character(.data$core), - order = as.numeric(.data$order)) + } + # Subsetting sheets + if (!is.null(sheet)) { + sheet_ls <- str_subset(names(doc), sheet) + doc <- doc[sheet_ls] + } + # Get base doc + out <- doc %>% + create_tbl(cols) + + # Getting the key seq values + if (key_seq_sep_sheet) { + key_seq_df <- doc %>% + create_tbl(key_seq_cols) %>% + mutate( + key_seq = str_split(key_seq, ",\\s"), + key_seq = map(key_seq, function(x) { + tibble(variable = x) %>% + mutate(key_seq = row_number()) + }) + ) %>% + unnest(key_seq) + out <- left_join(out, key_seq_df, by = c("dataset", "variable")) + } + + # Get missing columns + missing <- col_vars()$.ds_vars %>% + discard(~ . %in% names(out)) + + out %>% + distinct() %>% + `is.na<-`(missing) %>% + mutate( + key_seq = as.integer(.data$key_seq), + mandatory = yn_to_tf(.data$mandatory), + core = as.character(.data$core), + order = as.numeric(.data$order) + ) } @@ -232,79 +240,86 @@ spec_type_to_ds_vars <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]oma #' @export #' #' @family spec builders -spec_type_to_var_spec <- function(doc, cols = c("variable" = "[N|n]ame|[V|v]ariables?", - "length" = "[L|l]ength", - "label" = "[L|l]abel", - "type" = "[T|t]ype", - "dataset" = "[D|d]ataset|[D|d]omain", - "format" = "[F|f]ormat"), - sheet = "[V|v]ar"){ - # Check the names - name_check <- names(cols) %in% c("variable", "length", "label", - "type", "dataset", "common", "format") %>% - all() - if(!name_check | is.null(names(cols))){ - cli_abort(paste( - "Supplied column vector must be named using the following names:", - "'variable', 'length', 'label', 'type', 'dataset', 'common', 'format'", - "If common is not avaliable it can be excluded and will be automatically filled in.", - "Additionally, dataset is only used to clarify if information differs by domain." - )) - } - - # Check if sheet is specified - if(!is.null(sheet)){ - sheet_ls <- str_subset(names(doc), sheet) - doc <- doc[sheet_ls] - } - out <- create_tbl(doc, cols) - if(!"dataset" %in% names(out)){ - dups <- out %>% - distinct() %>% - group_by(variable) %>% - summarise(n = n(), .groups = "drop") %>% - filter(n > 1) - if(nrow(dups) > 0){ - x <- dups %>% pull(variable) - cli_abort(c( - col_red("The following variables are repeated with different metadata for different datasets:"), - "i" = ansi_collapse(x), - "i" = "Please add 'dataset' = [Name of dataset column] to your named cols vector to correct this." - ), call. = FALSE) - } - } else { - if(!"common" %in% names(cols)){ - # Get the variable common to all datasets can only be calculated with ds present - common_vars <- out %>% - group_by(dataset) %>% - select(dataset, variable) %>% - group_split(.keep = FALSE) %>% - reduce(inner_join, by = "variable") %>% - mutate(common = TRUE) - out <- out %>% - left_join(common_vars, by = "variable") %>% - replace_na(list(common = FALSE)) - } - - # Remove any multiples and add ds if different metadata for different ds's - out <- out %>% - group_by(variable) %>% - mutate(unique = n_distinct(length, label, type), - variable = if_else(unique == 1, variable, - paste0(dataset, ".", variable)), - length = as.numeric(length)) %>% - distinct(variable, length, label, type, .keep_all = TRUE) %>% - select(-dataset, -unique) - } - - # Get missing columns - missing <- col_vars()$.var_spec %>% - discard(~. %in% names(out)) - out %>% - `is.na<-`(missing) %>% +spec_type_to_var_spec <- function(doc, cols = c( + "variable" = "[N|n]ame|[V|v]ariables?", + "length" = "[L|l]ength", + "label" = "[L|l]abel", + "type" = "[T|t]ype", + "dataset" = "[D|d]ataset|[D|d]omain", + "format" = "[F|f]ormat" + ), + sheet = "[V|v]ar") { + # Check the names + name_check <- names(cols) %in% c( + "variable", "length", "label", + "type", "dataset", "common", "format" + ) %>% + all() + if (!name_check | is.null(names(cols))) { + cli_abort(paste( + "Supplied column vector must be named using the following names:", + "'variable', 'length', 'label', 'type', 'dataset', 'common', 'format'", + "If common is not avaliable it can be excluded and will be automatically filled in.", + "Additionally, dataset is only used to clarify if information differs by domain." + )) + } + + # Check if sheet is specified + if (!is.null(sheet)) { + sheet_ls <- str_subset(names(doc), sheet) + doc <- doc[sheet_ls] + } + out <- create_tbl(doc, cols) + if (!"dataset" %in% names(out)) { + dups <- out %>% distinct() %>% - ungroup() %>% - mutate(length = as.integer(length)) + group_by(variable) %>% + summarise(n = n(), .groups = "drop") %>% + filter(n > 1) + if (nrow(dups) > 0) { + x <- dups %>% pull(variable) + cli_abort(c( + col_red("The following variables are repeated with different metadata for different datasets:"), + "i" = ansi_collapse(x), + "i" = "Please add 'dataset' = [Name of dataset column] to your named cols vector to correct this." + ), call. = FALSE) + } + } else { + if (!"common" %in% names(cols)) { + # Get the variable common to all datasets can only be calculated with ds present + common_vars <- out %>% + group_by(dataset) %>% + select(dataset, variable) %>% + group_split(.keep = FALSE) %>% + reduce(inner_join, by = "variable") %>% + mutate(common = TRUE) + out <- out %>% + left_join(common_vars, by = "variable") %>% + replace_na(list(common = FALSE)) + } + + # Remove any multiples and add ds if different metadata for different ds's + out <- out %>% + group_by(variable) %>% + mutate( + unique = n_distinct(length, label, type), + variable = if_else(unique == 1, variable, + paste0(dataset, ".", variable) + ), + length = as.numeric(length) + ) %>% + distinct(variable, length, label, type, .keep_all = TRUE) %>% + select(-dataset, -unique) + } + + # Get missing columns + missing <- col_vars()$.var_spec %>% + discard(~ . %in% names(out)) + out %>% + `is.na<-`(missing) %>% + distinct() %>% + ungroup() %>% + mutate(length = as.integer(length)) } #' Spec to value_spec @@ -334,120 +349,132 @@ spec_type_to_var_spec <- function(doc, cols = c("variable" = "[N|n]ame|[V|v]aria #' @export #' #' @family spec builders -spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]omain", - "variable" = "[N|n]ame|[V|v]ariables?", - "origin" = "[O|o]rigin", - "type" = "[T|t]ype", - "code_id" = "[C|c]odelist|Controlled Term", - "sig_dig" = "[S|s]ignificant", - "where" = "[W|w]here", - "derivation_id" = "[M|m]ethod", - "predecessor" = "[P|p]redecessor"), +spec_type_to_value_spec <- function(doc, cols = c( + "dataset" = "[D|d]ataset|[D|d]omain", + "variable" = "[N|n]ame|[V|v]ariables?", + "origin" = "[O|o]rigin", + "type" = "[T|t]ype", + "code_id" = "[C|c]odelist|Controlled Term", + "sig_dig" = "[S|s]ignificant", + "where" = "[W|w]here", + "derivation_id" = "[M|m]ethod", + "predecessor" = "[P|p]redecessor" + ), sheet = NULL, where_sep_sheet = TRUE, - where_cols = c("id" = "ID", - "where" = c("Variable", "Comparator", "Value")), - var_sheet = "[V|v]ar"){ - name_check <- names(cols) %in% c("variable", "origin", "code_id", "sig_dig", - "type", "dataset", "where", "derivation_id", - "predecessor") %>% - all() - - if(!name_check| is.null(names(cols))){ - cli_abort(c( - "Supplied column vector must be named using the following names:", - "i" = "'dataset', 'variable', 'origin', 'code_id', 'type', 'where', 'sig_dig', 'derivation_id','predecessor'", - "i" = paste("If derivation_id is not avaliable it can be excluded and dataset.variable will be used.", - "If the where information is on a seperate sheet, put the column with cross ref as where.") - ), call = FALSE) - } - - # Select a subset of sheets if specified - if(!is.null(sheet)){ - sheet_ls <- str_subset(names(doc), sheet) - doc <- doc[sheet_ls] - } - - out <- create_tbl(doc, cols) - - # Does a var sheet exsist? - if(!is.null(var_sheet)){ - var_sheet <- names(doc) %>% - keep(~str_detect(., var_sheet)) - } - - # If so, add any variables not in the value sheet - if(length(var_sheet) > 0){ - var_out <- doc[var_sheet] %>% - map_dfr(function(x){ - var_out <- x %>% - select_rename_w_dups(cols) %>% - mutate(where = "TRUE") - if(nrow(out) > 0){ - var_out %>% - anti_join(out, by = c("dataset", "variable")) - } else { - var_out - } - }) - - # THIS ISN'T VERY PRETTY, IF SOMEONE HAS A BETTER IDEA PLEASE FIX - # Needed in cause the value sheet is empty - if(nrow(out) > 0 & nrow(var_out) > 0){ - out <- bind_rows(out, var_out) - } else if(nrow(var_out) > 0) { - out <- var_out - } else { - out - } + where_cols = c( + "id" = "ID", + "where" = c("Variable", "Comparator", "Value") + ), + var_sheet = "[V|v]ar") { + name_check <- names(cols) %in% c( + "variable", "origin", "code_id", "sig_dig", + "type", "dataset", "where", "derivation_id", + "predecessor" + ) %>% + all() + + if (!name_check | is.null(names(cols))) { + cli_abort(c( + "Supplied column vector must be named using the following names:", + "i" = "'dataset', 'variable', 'origin', 'code_id', 'type', 'where', 'sig_dig', 'derivation_id','predecessor'", + "i" = paste( + "If derivation_id is not avaliable it can be excluded and dataset.variable will be used.", + "If the where information is on a seperate sheet, put the column with cross ref as where." + ) + ), call = FALSE) + } + + # Select a subset of sheets if specified + if (!is.null(sheet)) { + sheet_ls <- str_subset(names(doc), sheet) + doc <- doc[sheet_ls] + } + + out <- create_tbl(doc, cols) + + # Does a var sheet exsist? + if (!is.null(var_sheet)) { + var_sheet <- names(doc) %>% + keep(~ str_detect(., var_sheet)) + } + + # If so, add any variables not in the value sheet + if (length(var_sheet) > 0) { + var_out <- doc[var_sheet] %>% + map_dfr(function(x) { + var_out <- x %>% + select_rename_w_dups(cols) %>% + mutate(where = "TRUE") + if (nrow(out) > 0) { + var_out %>% + anti_join(out, by = c("dataset", "variable")) + } else { + var_out + } + }) - } - - if(where_sep_sheet & "where" %in% names(out)){ - where_df <- create_tbl(doc, where_cols) %>% - mutate( - where_new = pmap_chr(., function(...) { - # Without c_across this gets a little weird - # Use pmap and steal out the arg names - vars <- list(...) - # Filter down to only args that start with where - wheres <- as.character(vars[which(str_starts(names(vars), 'where'))]) - # collapse it together - paste(wheres, collapse=" ") - }) - ) %>% - select(id, where_new) - out <- out %>% - left_join(where_df, by = c("where" = "id")) %>% - select(-where, where = where_new) - } else if(where_sep_sheet) { - cli_warn("Not able to add where information from seperate sheet cause a where column is needed to cross-reference the information", - call. = FALSE) - } - - if(!"derivation_id" %in% names(cols)){ - out <- out %>% - mutate(derivation_id = - if_else(str_to_lower(.data$origin) == "assigned", - paste0(dataset, ".", variable), - paste0("pred.", dataset, ".", variable))) - } - - # Get missing columns - missing <- col_vars()$.value_spec %>% - discard(~. %in% names(out)) - - out %>% - `is.na<-`(missing) %>% - distinct() %>% - mutate(sig_dig = as.integer(.data$sig_dig), - derivation_id = case_when( - !is.na(.data$derivation_id) ~ .data$derivation_id, - str_to_lower(.data$origin) == "predecessor" ~ paste0("pred.", as.character(.data$predecessor)), - str_to_lower(.data$origin) == "assigned" ~ paste0(.data$dataset, ".", .data$variable)) + # THIS ISN'T VERY PRETTY, IF SOMEONE HAS A BETTER IDEA PLEASE FIX + # Needed in cause the value sheet is empty + if (nrow(out) > 0 & nrow(var_out) > 0) { + out <- bind_rows(out, var_out) + } else if (nrow(var_out) > 0) { + out <- var_out + } else { + out + } + } + + if (where_sep_sheet & "where" %in% names(out)) { + where_df <- create_tbl(doc, where_cols) %>% + mutate( + where_new = pmap_chr(., function(...) { + # Without c_across this gets a little weird + # Use pmap and steal out the arg names + vars <- list(...) + # Filter down to only args that start with where + wheres <- as.character(vars[which(str_starts(names(vars), "where"))]) + # collapse it together + paste(wheres, collapse = " ") + }) ) %>% - select(-.data$predecessor) - + select(id, where_new) + out <- out %>% + left_join(where_df, by = c("where" = "id")) %>% + select(-where, where = where_new) + } else if (where_sep_sheet) { + cli_warn("Not able to add where information from seperate sheet cause a where column is needed to cross-reference the information", + call. = FALSE + ) + } + + if (!"derivation_id" %in% names(cols)) { + out <- out %>% + mutate( + derivation_id = + if_else(str_to_lower(.data$origin) == "assigned", + paste0(dataset, ".", variable), + paste0("pred.", dataset, ".", variable) + ) + ) + } + + # Get missing columns + missing <- col_vars()$.value_spec %>% + discard(~ . %in% names(out)) + + out %>% + `is.na<-`(missing) %>% + distinct() %>% + mutate( + sig_dig = as.integer(.data$sig_dig), + derivation_id = case_when( + !is.na(.data$derivation_id) ~ .data$derivation_id, + str_to_lower(.data$origin) == "predecessor" ~ paste0("pred.", as.character(.data$predecessor)), + str_to_lower(.data$origin) == "assigned" ~ paste0(.data$dataset, ".", .data$variable) + ) + ) %>% + select(-.data$predecessor) } #' Spec to codelist @@ -476,89 +503,98 @@ spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d] #' @export #' #' @family spec builders -spec_type_to_codelist <- function(doc, codelist_cols = c("code_id" = "ID", - "name" = "[N|n]ame", - "code" = "^[C|c]ode|^[T|t]erm", - "decode" = "[D|d]ecode"), +spec_type_to_codelist <- function(doc, codelist_cols = c( + "code_id" = "ID", + "name" = "[N|n]ame", + "code" = "^[C|c]ode|^[T|t]erm", + "decode" = "[D|d]ecode" + ), permitted_val_cols = NULL, - dict_cols = c("code_id" = "ID", - "name" = "[N|n]ame", - "dictionary" = "[D|d]ictionary", - "version" = "[V|v]ersion"), - sheets = NULL, simplify = FALSE){ - if(is.null(codelist_cols)){ - cli_abort("Codelist column names must be provided", call. = FALSE) - } else { - name_check <- names(codelist_cols) %in% c("code_id", "name", "code", "decode") %>% - all() - if(!name_check| is.null(names(codelist_cols))){ - cli_abort("Supplied column vector for codelist_cols must be named using the following names: + dict_cols = c( + "code_id" = "ID", + "name" = "[N|n]ame", + "dictionary" = "[D|d]ictionary", + "version" = "[V|v]ersion" + ), + sheets = NULL, simplify = FALSE) { + if (is.null(codelist_cols)) { + cli_abort("Codelist column names must be provided", call. = FALSE) + } else { + name_check <- names(codelist_cols) %in% c("code_id", "name", "code", "decode") %>% + all() + if (!name_check | is.null(names(codelist_cols))) { + cli_abort("Supplied column vector for codelist_cols must be named using the following names: 'code_id', 'name', 'code', 'decode'", - call. = FALSE - ) - } - } + call. = FALSE + ) + } + } - if (!is.null(permitted_val_cols)){ - name_check <- names(permitted_val_cols) %in% c("code_id", "name", "code") %>% - all() - if(!name_check){ - cli_abort("Supplied column vector for permitted_val_cols must be named using the following names: + if (!is.null(permitted_val_cols)) { + name_check <- names(permitted_val_cols) %in% c("code_id", "name", "code") %>% + all() + if (!name_check) { + cli_abort("Supplied column vector for permitted_val_cols must be named using the following names: 'code_id', 'name', 'code'", - call. = FALSE) - } - } - if(!is.null(dict_cols)){ - name_check <- names(dict_cols) %in% c("code_id", "name", "dictionary", "version") %>% - all() - if(!name_check){ - cli_abort(paste0( - "Supplied column vector for `dict_cols` must be named using the following names:", - "'code_id', 'name', 'dictionary', 'version'. If a dictionary sheet isn't avaliable", - "set `dict_cols` to NULL"), call. = FALSE) - } - } - - # Select a subset of sheets if specified - if(!is.null(sheets)){ - sheet_ls <- str_subset(names(doc), sheets) - doc <- doc[sheet_ls] - } - - # Create the base table with codes and decodes (min req output) - cd_out <- create_tbl(doc, codelist_cols) %>% + call. = FALSE + ) + } + } + if (!is.null(dict_cols)) { + name_check <- names(dict_cols) %in% c("code_id", "name", "dictionary", "version") %>% + all() + if (!name_check) { + cli_abort(paste0( + "Supplied column vector for `dict_cols` must be named using the following names:", + "'code_id', 'name', 'dictionary', 'version'. If a dictionary sheet isn't avaliable", + "set `dict_cols` to NULL" + ), call. = FALSE) + } + } + + # Select a subset of sheets if specified + if (!is.null(sheets)) { + sheet_ls <- str_subset(names(doc), sheets) + doc <- doc[sheet_ls] + } + + # Create the base table with codes and decodes (min req output) + cd_out <- create_tbl(doc, codelist_cols) %>% + group_by(code_id) %>% + mutate(type = case_when( + simplify & all(code == decode) ~ "permitted_val", + TRUE ~ "code_decode" + )) %>% + nest(codes = c(code, decode)) %>% + mutate(codes = if_else(type == "permitted_val", + lapply(codes, function(df) df %>% pull(code)), + codes + )) + # If available get a permitted value sheet + if (!is.null(permitted_val_cols)) { + pv_out <- create_tbl(doc, permitted_val_cols) %>% + mutate(type = "permitted_val") %>% group_by(code_id) %>% - mutate(type = case_when(simplify & all(code == decode) ~ "permitted_val", - TRUE ~ "code_decode")) %>% - nest(codes = c(code, decode)) %>% - mutate(codes = if_else(type == "permitted_val", - lapply(codes, function(df) df %>% pull(code)), - codes)) - # If available get a permitted value sheet - if(!is.null(permitted_val_cols)){ - pv_out <- create_tbl(doc, permitted_val_cols) %>% - mutate(type = "permitted_val") %>% - group_by(code_id) %>% - nest(codes = c(code, decode)) - cd_out <- bind_rows(cd_out, pv_out) - } - # Add dictionary if avaliable - if(!is.null(dict_cols)){ - dic_out <- create_tbl(doc, dict_cols) %>% - mutate(type = "external_library") %>% - group_by(code_id) %>% - nest(codes = c(dictionary, version)) - cd_out <- bind_rows(cd_out, dic_out) - } - # Get missing columns - missing <- col_vars()$.codelist %>% - discard(~. %in% names(cd_out)) - - cd_out %>% - `is.na<-`(missing) %>% - distinct() %>% - filter(!is.na(code_id)) %>% - ungroup() + nest(codes = c(code, decode)) + cd_out <- bind_rows(cd_out, pv_out) + } + # Add dictionary if avaliable + if (!is.null(dict_cols)) { + dic_out <- create_tbl(doc, dict_cols) %>% + mutate(type = "external_library") %>% + group_by(code_id) %>% + nest(codes = c(dictionary, version)) + cd_out <- bind_rows(cd_out, dic_out) + } + # Get missing columns + missing <- col_vars()$.codelist %>% + discard(~ . %in% names(cd_out)) + + cd_out %>% + `is.na<-`(missing) %>% + distinct() %>% + filter(!is.na(code_id)) %>% + ungroup() } #' Spec to derivation @@ -582,81 +618,86 @@ spec_type_to_codelist <- function(doc, codelist_cols = c("code_id" = "ID", #' #' @family spec builders #' @importFrom purrr quietly -spec_type_to_derivations <- function(doc, cols = c("derivation_id" = "ID", - "derivation" = "[D|d]efinition|[D|d]escription"), +spec_type_to_derivations <- function(doc, cols = c( + "derivation_id" = "ID", + "derivation" = "[D|d]efinition|[D|d]escription" + ), sheet = "Method|Derivations?", - var_cols = c("dataset" = "[D|d]ataset|[D|d]omain", - "variable" = "[N|n]ame|[V|v]ariables?", - "origin" = "[O|o]rigin", - "predecessor" = "[P|p]redecessor", - "comment" = "[C|c]omment")){ - - name_check <- names(cols) %in% c("derivation_id", "derivation") %>% - all() - if(!name_check| is.null(names(cols))){ - cli_abort(c( - "Supplied column vector must be named using the following names:", - "'derivation_id', 'derivation'")) - } - - name_check <- names(var_cols) %in% c('dataset', 'variable', 'origin', 'predecessor', 'comment') %>% - all() - if(!name_check| is.null(names(var_cols))){ - cli_abort("Supplied variable column vector must be named using the following names: + var_cols = c( + "dataset" = "[D|d]ataset|[D|d]omain", + "variable" = "[N|n]ame|[V|v]ariables?", + "origin" = "[O|o]rigin", + "predecessor" = "[P|p]redecessor", + "comment" = "[C|c]omment" + )) { + name_check <- names(cols) %in% c("derivation_id", "derivation") %>% + all() + if (!name_check | is.null(names(cols))) { + cli_abort(c( + "Supplied column vector must be named using the following names:", + "'derivation_id', 'derivation'" + )) + } + + name_check <- names(var_cols) %in% c("dataset", "variable", "origin", "predecessor", "comment") %>% + all() + if (!name_check | is.null(names(var_cols))) { + cli_abort("Supplied variable column vector must be named using the following names: 'dataset', 'variable', 'origin', 'predecessor', 'comment'") - } - # Get the predecessor - ls_derivations <- quietly(create_tbl)(doc, var_cols)$result - if(class(ls_derivations)[1] == "list"){ + } + # Get the predecessor + ls_derivations <- quietly(create_tbl)(doc, var_cols)$result + if (class(ls_derivations)[1] == "list") { + ls_derivations <- ls_derivations %>% + reduce(bind_rows) + # Get the comments + if (any(str_detect(names(doc), "[C|c]omment"))) { + comments <- doc[str_detect(names(doc), "[C|c]omment")][[1]] %>% + select(matches("ID|Description")) + with_comments <- ls_derivations %>% + filter(str_to_lower(.data$origin) == "assigned") %>% + left_join(comments, by = c("comment" = "ID")) %>% + mutate(comment = .data$Description) %>% + select(-.data$Description) ls_derivations <- ls_derivations %>% - reduce(bind_rows) - # Get the comments - if(any(str_detect(names(doc), "[C|c]omment"))){ - comments <- doc[str_detect(names(doc), "[C|c]omment")][[1]] %>% - select(matches("ID|Description")) - with_comments <- ls_derivations %>% - filter(str_to_lower(.data$origin) == "assigned") %>% - left_join(comments, by = c("comment" = "ID" )) %>% - mutate(comment = .data$Description) %>% - select(-.data$Description) - ls_derivations <- ls_derivations %>% - filter(str_to_lower(.data$origin) != "assigned") %>% - bind_rows(with_comments) - } - } - - other_derivations <- ls_derivations %>% - mutate( - derivation_id = case_when( - str_to_lower(.data$origin) == "predecessor" ~ paste0("pred.", as.character(.data$predecessor)), - str_to_lower(.data$origin) == "assigned" ~ paste0(.data$dataset, ".", .data$variable), - TRUE ~ NA_character_ - ), - derivation = case_when( - str_to_lower(.data$origin) == "predecessor" ~ as.character(.data$predecessor), - str_to_lower(.data$origin) == "assigned" ~ .data$comment, - TRUE ~ NA_character_ - )) %>% - filter(!is.na(.data$derivation_id)) %>% - select(.data$derivation, .data$derivation_id) - - # Select a subset of sheets if specified - if(!is.null(sheet)){ - sheet_ls <- str_subset(names(doc), sheet) - doc <- doc[sheet_ls] - } - out <- create_tbl(doc, cols) - - # Get missing columns - missing <- col_vars()$.derivations %>% - discard(~. %in% names(out)) - - - out %>% - `is.na<-`(missing) %>% - bind_rows(other_derivations) %>% - distinct() %>% - filter(!is.na(derivation_id)) + filter(str_to_lower(.data$origin) != "assigned") %>% + bind_rows(with_comments) + } + } + + other_derivations <- ls_derivations %>% + mutate( + derivation_id = case_when( + str_to_lower(.data$origin) == "predecessor" ~ paste0("pred.", as.character(.data$predecessor)), + str_to_lower(.data$origin) == "assigned" ~ paste0(.data$dataset, ".", .data$variable), + TRUE ~ NA_character_ + ), + derivation = case_when( + str_to_lower(.data$origin) == "predecessor" ~ as.character(.data$predecessor), + str_to_lower(.data$origin) == "assigned" ~ .data$comment, + TRUE ~ NA_character_ + ) + ) %>% + filter(!is.na(.data$derivation_id)) %>% + select(.data$derivation, .data$derivation_id) + + # Select a subset of sheets if specified + if (!is.null(sheet)) { + sheet_ls <- str_subset(names(doc), sheet) + doc <- doc[sheet_ls] + } + out <- create_tbl(doc, cols) + + # Get missing columns + missing <- col_vars()$.derivations %>% + discard(~ . %in% names(out)) + + + out %>% + `is.na<-`(missing) %>% + bind_rows(other_derivations) %>% + distinct() %>% + filter(!is.na(derivation_id)) } ### Helper Functions @@ -671,83 +712,89 @@ spec_type_to_derivations <- function(doc, cols = c("derivation_id" = "ID", #' #' @return dataset (or list of datasets if not specific enough) #' @export -create_tbl <- function(doc, cols){ - matches <- doc %>% - keep(function(x){ - cols %>% - map_lgl(~any(str_detect(names(x), .))) %>% - all() +create_tbl <- function(doc, cols) { + matches <- doc %>% + keep(function(x) { + cols %>% + map_lgl(~ any(str_detect(names(x), .))) %>% + all() + }) + if (length(matches) == 0) { + # Get which variable can't be matches + mismatch_per_sheet <- doc %>% + map(function(x) { + cols %>% + map_lgl(~ any(str_detect(names(x), .))) %>% + discard(~.) # Remove the matched values }) - if(length(matches) == 0) { - # Get which variable can't be matches - mismatch_per_sheet <- doc %>% - map(function(x){ - cols %>% - map_lgl(~any(str_detect(names(x), .))) %>% - discard(~.) # Remove the matched values - }) - # Find the closest sheet by looking for the sheet(s) with the fewest mismatches - mis_lens <- mismatch_per_sheet %>% - map_int(length) - closest_sheets <- mis_lens %>% - keep(~ . == min(mis_lens)) %>% - names() - # Get the name of the sheets and which columns don't match - sheets_to_error <- mismatch_per_sheet %>% - keep(names(.) %in% closest_sheets) - - # Write out the error - sheets_to_error %>% - map2_chr(names(sheets_to_error), function(vars, sheet_name){ - paste0("Sheet '", sheet_name, "' is the closest match, but unable to match the following column(s)\n", - paste(names(vars), collapse = "\n")) - }) %>% - paste0(collapse = "\n") %>% - paste0("Unable to identify a sheet with all columns.\n", . ) %>% - (call. = FALSE) - - } else if(length(matches) == 1){ - # Check names and write a better warning message if names don't work - ds_nm <- matches[[1]] %>% names() - nm_test <- cols %>% - map(~str_detect(ds_nm, .)) %>% - map(~ds_nm[.]) %>% - keep(~length(.) > 1) - - if(length(nm_test) > 0) { - # See if an exact match will - test_exact <- cols[names(nm_test)] %>% - paste0("^", ., "$") %>% - map_int(~sum(str_detect(ds_nm, .))) %>% - keep(~ . != 1) - if(length(test_exact) == 0){ - cols[names(nm_test)] <- cols[names(nm_test)] %>% - paste0("^", ., "$") - } else { - errors <- NULL - for(i in 1:length(nm_test)) { - errors <- c(errors, str_glue("{names(nm_test[i])} matches {length(nm_test[[i]])} columns: {nm_test[i]}")) - } - msg <- c( - "Unable to rename the following columns in {names(matches)}", - set_names(errors, rep("x", length(errors))), - "i" = "Please check your regular expression" - ) - cli_abort(msg, .call = FALSE) - } + # Find the closest sheet by looking for the sheet(s) with the fewest mismatches + mis_lens <- mismatch_per_sheet %>% + map_int(length) + closest_sheets <- mis_lens %>% + keep(~ . == min(mis_lens)) %>% + names() + # Get the name of the sheets and which columns don't match + sheets_to_error <- mismatch_per_sheet %>% + keep(names(.) %in% closest_sheets) + + # Write out the error + sheets_to_error %>% + map2_chr(names(sheets_to_error), function(vars, sheet_name) { + paste0( + "Sheet '", sheet_name, "' is the closest match, but unable to match the following column(s)\n", + paste(names(vars), collapse = "\n") + ) + }) %>% + paste0(collapse = "\n") %>% + paste0("Unable to identify a sheet with all columns.\n", .) %>% + (call. <- FALSE) + } else if (length(matches) == 1) { + # Check names and write a better warning message if names don't work + ds_nm <- matches[[1]] %>% names() + nm_test <- cols %>% + map(~ str_detect(ds_nm, .)) %>% + map(~ ds_nm[.]) %>% + keep(~ length(.) > 1) + + if (length(nm_test) > 0) { + # See if an exact match will + test_exact <- cols[names(nm_test)] %>% + paste0("^", ., "$") %>% + map_int(~ sum(str_detect(ds_nm, .))) %>% + keep(~ . != 1) + if (length(test_exact) == 0) { + cols[names(nm_test)] <- cols[names(nm_test)] %>% + paste0("^", ., "$") + } else { + errors <- NULL + for (i in 1:length(nm_test)) { + errors <- c(errors, str_glue("{names(nm_test[i])} matches {length(nm_test[[i]])} columns: {nm_test[i]}")) + } + msg <- c( + "Unable to rename the following columns in {names(matches)}", + set_names(errors, rep("x", length(errors))), + "i" = "Please check your regular expression" + ) + cli_abort(msg, .call = FALSE) } - - # This needs to be done columnwise to allow for duplicate selection of the same column - select_rename_w_dups(matches[[1]], cols) - - } else { - sheets_mats <- matches %>% names() - cli_warn(c(paste( - "Column names are not specific enough to identify a single sheet."), - "The following {length(sheets_mats)} match the criteria set:"), - ansi_collapse(sheets_mats), call. = FALSE) - matches %>% map(~select_rename_w_dups(., cols)) - } + } + + # This needs to be done columnwise to allow for duplicate selection of the same column + select_rename_w_dups(matches[[1]], cols) + } else { + sheets_mats <- matches %>% names() + cli_warn( + c( + paste( + "Column names are not specific enough to identify a single sheet." + ), + "The following {length(sheets_mats)} match the criteria set:" + ), + ansi_collapse(sheets_mats), + call. = FALSE + ) + matches %>% map(~ select_rename_w_dups(., cols)) + } } @@ -758,18 +805,21 @@ create_tbl <- function(doc, cols){ #' @return returns a logical vector or normal vector with warning #' @noRd #' -yn_to_tf <- function(x){ - if(all(is.na(x) | str_detect(x, regex("^y$|^n$|^yes$|^no$", ignore_case = T)))){ - case_when(str_detect(x, regex("^y$|^yes$", ignore_case = T)) ~ TRUE, - str_detect(x, regex("^n$|^no$", ignore_case = T)) ~ FALSE, - is.na(x) ~ NA) - } else if(is.logical(x)){ - x - } else { - cli_warn("Keep column needs to be True or False, please correct before converting to a Metacore object", - call. = FALSE) - x - } +yn_to_tf <- function(x) { + if (all(is.na(x) | str_detect(x, regex("^y$|^n$|^yes$|^no$", ignore_case = T)))) { + case_when( + str_detect(x, regex("^y$|^yes$", ignore_case = T)) ~ TRUE, + str_detect(x, regex("^n$|^no$", ignore_case = T)) ~ FALSE, + is.na(x) ~ NA + ) + } else if (is.logical(x)) { + x + } else { + cli_warn("Keep column needs to be True or False, please correct before converting to a Metacore object", + call. = FALSE + ) + x + } } @@ -782,17 +832,17 @@ yn_to_tf <- function(x){ #' #' @return dataset #' @noRd -select_rename_w_dups <- function(.data, cols){ - pull_safe <- safely(~select(.x, matches(.y, ignore.case = FALSE))) - cols %>% - map_dfr(function(col){ - out <- pull_safe(.data, col) %>% - .$result - if(ncol(out) == 1){ - out <- out %>% pull(1) - } else { - out <- NULL - } - out - }) +select_rename_w_dups <- function(.data, cols) { + pull_safe <- safely(~ select(.x, matches(.y, ignore.case = FALSE))) + cols %>% + map_dfr(function(col) { + out <- pull_safe(.data, col) %>% + .$result + if (ncol(out) == 1) { + out <- out %>% pull(1) + } else { + out <- NULL + } + out + }) } diff --git a/R/utils.R b/R/utils.R index 9864a78..2ea8832 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,11 +6,11 @@ #' @noRd add_lab <- function(x, label) { - if (length(label) == 0) { - label <- NULL - } - attr(x, "label") <- label - x + if (length(label) == 0) { + label <- NULL + } + attr(x, "label") <- label + x } #' Add Labels to Dataframe @@ -22,22 +22,22 @@ add_lab <- function(x, label) { #' @importFrom dplyr filter pull #' @return Dataframe with labels #' @noRd -add_labs <- function(.data,...) { - name_list <- c(...) - df <- tibble(col = names(name_list), lab = name_list) - .data %>% - purrr::map2(names(.data), function(x, name) { - label <- df %>% - filter(col == name) %>% - pull(lab) %>% - unname() - if(length(label) > 0) { - add_lab(x, label) - } else { - x - } - }) %>% - as_tibble() +add_labs <- function(.data, ...) { + name_list <- c(...) + df <- tibble(col = names(name_list), lab = name_list) + .data %>% + purrr::map2(names(.data), function(x, name) { + label <- df %>% + filter(col == name) %>% + pull(lab) %>% + unname() + if (length(label) > 0) { + add_lab(x, label) + } else { + x + } + }) %>% + as_tibble() } @@ -50,41 +50,36 @@ add_labs <- function(.data,...) { #' @param nm name of column to check (for warning and error clarification) #' check_structure <- function(.data, col, func, any_na_acceptable, nm) { - - column <- as_string(col) - vec <- .data %>% pull(!!col) - warning_string <- NULL - error_message <- NULL - - if(any(is.na(vec)) & !any_na_acceptable) { - error_message <- str_glue("`{column}` from the `{nm}` table contains missing values. Actual values are needed.") - } else if (all(is.na(vec))){ - warning_string <- str_glue("`{column}` from the `{nm}` table only contains missing values.") - } else { - - failures <- vec %>% - discard(~do.call(func, list(.))) %>% - unique() - - all_fails <- paste(failures) - - if (length(failures) > 0 ) { - - if (is.primitive(func)) { - - assertion_func <- prim_name(func) - warning_string <- str_glue("{nm}${column} fails {assertion_func} check") - - } else { - cli_warn(c( - "The following {qty(all_fails)} word{?s} in {nm}${column} {qty(all_fails)} {?is/are} not allowed:", - "i" = ansi_collapse(all_fails, last = ", ") - ), call. = FALSE) - } + column <- as_string(col) + vec <- .data %>% pull(!!col) + warning_string <- NULL + error_message <- NULL + + if (any(is.na(vec)) & !any_na_acceptable) { + error_message <- str_glue("`{column}` from the `{nm}` table contains missing values. Actual values are needed.") + } else if (all(is.na(vec))) { + warning_string <- str_glue("`{column}` from the `{nm}` table only contains missing values.") + } else { + failures <- vec %>% + discard(~ do.call(func, list(.))) %>% + unique() + + all_fails <- paste(failures) + + if (length(failures) > 0) { + if (is.primitive(func)) { + assertion_func <- prim_name(func) + warning_string <- str_glue("{nm}${column} fails {assertion_func} check") + } else { + cli_warn(c( + "The following {qty(all_fails)} word{?s} in {nm}${column} {qty(all_fails)} {?is/are} not allowed:", + "i" = ansi_collapse(all_fails, last = ", ") + ), call. = FALSE) } - } + } + } - list(warning = warning_string, error = error_message) + list(warning = warning_string, error = error_message) } #' Check Words in Column @@ -92,13 +87,13 @@ check_structure <- function(.data, col, func, any_na_acceptable, nm) { #' @param ... permissible words in the column #' @param col the column to check for specific words check_words <- function(..., col) { - accepted_words <- unlist(c(...)) - expr <- expr(function(col) col %in% !!accepted_words) - make_function(body = expr, env = parent.frame())() + accepted_words <- unlist(c(...)) + expr <- expr(function(col) col %in% !!accepted_words) + make_function(body = expr, env = parent.frame())() } -make_function <- function(args = pairlist(), body, env = parent.frame()) { - eval(call("function", args, body), env) +make_function <- function(args = pairlist(), body, env = parent.frame()) { + eval(call("function", args, body), env) } @@ -114,23 +109,23 @@ make_function <- function(args = pairlist(), body, env = parent.frame()) { #' metacore_example() #' metacore_example("mock_spec.xlsx") metacore_example <- function(file = NULL) { - if (is.null(file)) { - dir(system.file("extdata", package = "metacore")) - } else { - system.file("extdata", file, package = "metacore", mustWork = TRUE) - } + if (is.null(file)) { + dir(system.file("extdata", package = "metacore")) + } else { + system.file("extdata", file, package = "metacore", mustWork = TRUE) + } } #' Conditionally suppress messages and warnings #' @keywords internal quiet_if_true <- function(expr, quiet = FALSE) { - if (isTRUE(quiet)) { - suppressWarnings( - suppressMessages( - force(expr) - ) + if (isTRUE(quiet)) { + suppressWarnings( + suppressMessages( + force(expr) ) - } else { - force(expr) - } + ) + } else { + force(expr) + } } diff --git a/R/validators.R b/R/validators.R index 298006e..7b3bf7f 100644 --- a/R/validators.R +++ b/R/validators.R @@ -5,28 +5,28 @@ #' #' @return writes warning to console if there is an issue #' @noRd -ds_vars_check <- function(ds_vars, var_spec){ - var_check <- anti_join(ds_vars, var_spec, by = "variable") - - if(var_check %>% nrow()){ - var_ls <- var_check %>% - pull(.data$variable) %>% - unique() - - var_check_dbl <- ds_vars %>% - filter(.data$variable %in% var_ls) %>% - mutate(var_name = paste0(.data$dataset, ".", .data$variable)) %>% - anti_join(var_spec, by = c("var_name" = "variable")) %>% - pull(.data$variable) %>% - unique() - - if(var_check_dbl %>% length() != 0){ - cli_warn(c( - "The following variable(s) do not have labels and lengths:", - "i" = ansi_collapse(var_check_dbl, last = ", ") - ), call. = FALSE) - } - } +ds_vars_check <- function(ds_vars, var_spec) { + var_check <- anti_join(ds_vars, var_spec, by = "variable") + + if (var_check %>% nrow()) { + var_ls <- var_check %>% + pull(.data$variable) %>% + unique() + + var_check_dbl <- ds_vars %>% + filter(.data$variable %in% var_ls) %>% + mutate(var_name = paste0(.data$dataset, ".", .data$variable)) %>% + anti_join(var_spec, by = c("var_name" = "variable")) %>% + pull(.data$variable) %>% + unique() + + if (var_check_dbl %>% length() != 0) { + cli_warn(c( + "The following variable(s) do not have labels and lengths:", + "i" = ansi_collapse(var_check_dbl, last = ", ") + ), call. = FALSE) + } + } } @@ -41,31 +41,31 @@ ds_vars_check <- function(ds_vars, var_spec){ #' #' @return writes warning to console if there is an issue #' @noRd -value_check <- function(ds_vars, value_spec){ - value_vars <- value_spec %>% - distinct(.data$dataset, .data$variable) - - #Check the variables in ds_vars that don't have value specs - not_in_val <- anti_join(ds_vars, value_vars, by = c("dataset", "variable")) - if(nrow(not_in_val) != 0){ - variables <- not_in_val %>% - mutate(full = str_c(.data$dataset, .data$variable, sep = ".")) %>% - pull(.data$full) - cli_warn(c( - "The following variables are in the ds_vars table, but don't have value specs:", - "i" = ansi_collapse(variables, last = ", ") - ), call. = FALSE) - } - # Check the variables in value spec that aren't in ds_vars - not_in_ds <- anti_join(value_vars, ds_vars, by = c("dataset", "variable")) - if(nrow(not_in_ds) != 0){ - variables <- not_in_ds %>% - pull(.data$variable) - cli_warn(c( - "The following variables have value specifications, but aren't in the ds_vars table:", - "i" = ansi_collapse(variables, last = ", ") - ), call. = FALSE) - } +value_check <- function(ds_vars, value_spec) { + value_vars <- value_spec %>% + distinct(.data$dataset, .data$variable) + + # Check the variables in ds_vars that don't have value specs + not_in_val <- anti_join(ds_vars, value_vars, by = c("dataset", "variable")) + if (nrow(not_in_val) != 0) { + variables <- not_in_val %>% + mutate(full = str_c(.data$dataset, .data$variable, sep = ".")) %>% + pull(.data$full) + cli_warn(c( + "The following variables are in the ds_vars table, but don't have value specs:", + "i" = ansi_collapse(variables, last = ", ") + ), call. = FALSE) + } + # Check the variables in value spec that aren't in ds_vars + not_in_ds <- anti_join(value_vars, ds_vars, by = c("dataset", "variable")) + if (nrow(not_in_ds) != 0) { + variables <- not_in_ds %>% + pull(.data$variable) + cli_warn(c( + "The following variables have value specifications, but aren't in the ds_vars table:", + "i" = ansi_collapse(variables, last = ", ") + ), call. = FALSE) + } } @@ -76,33 +76,32 @@ value_check <- function(ds_vars, value_spec){ #' #' @return writes warning to console if there is an issue #' @noRd -derivation_check <- function(value_spec, derivations){ - deriv_vars <- value_spec %>% - filter(!is.na(.data$derivation_id)) %>% - distinct(.data$variable,.data$ derivation_id) - - #Check the variables that don't have derivations in derivations - not_in_val <- anti_join(deriv_vars, derivations, by = c("derivation_id")) - if(nrow(not_in_val) != 0){ - variables <- not_in_val %>% - pull(.data$variable) - cli_warn(c( - "The following variables have derivation ids not found in the derivations table:", - "i" = ansi_collapse(variables, last = ", ") - ), call. = FALSE) - } - # Check the derivations in deriavtion that aren't in value spec - not_in_deriv <- anti_join(derivations, deriv_vars, by = c("derivation_id")) - if(nrow(not_in_deriv) != 0){ - deriv <- not_in_deriv %>% - mutate(message = paste0(.data$derivation_id)) %>% - pull(.data$message) - cli_warn(c( - "The following derivations are never used:", - "i" = ansi_collapse(deriv, last = ", ") - ), call. = FALSE) - } - +derivation_check <- function(value_spec, derivations) { + deriv_vars <- value_spec %>% + filter(!is.na(.data$derivation_id)) %>% + distinct(.data$variable, .data$derivation_id) + + # Check the variables that don't have derivations in derivations + not_in_val <- anti_join(deriv_vars, derivations, by = c("derivation_id")) + if (nrow(not_in_val) != 0) { + variables <- not_in_val %>% + pull(.data$variable) + cli_warn(c( + "The following variables have derivation ids not found in the derivations table:", + "i" = ansi_collapse(variables, last = ", ") + ), call. = FALSE) + } + # Check the derivations in deriavtion that aren't in value spec + not_in_deriv <- anti_join(derivations, deriv_vars, by = c("derivation_id")) + if (nrow(not_in_deriv) != 0) { + deriv <- not_in_deriv %>% + mutate(message = paste0(.data$derivation_id)) %>% + pull(.data$message) + cli_warn(c( + "The following derivations are never used:", + "i" = ansi_collapse(deriv, last = ", ") + ), call. = FALSE) + } } #' Codelist Check @@ -112,33 +111,33 @@ derivation_check <- function(value_spec, derivations){ #' #' @return writes warning to console if there is an issue #' @noRd -codelist_check <- function(value_spec, codelist){ - code_vars <- value_spec %>% - filter(!is.na(code_id)) %>% - distinct(dataset, variable, code_id) - - #Check the variables in don't codelists have codelist - not_in_val <- anti_join(code_vars, codelist, by = c("code_id")) - if(nrow(not_in_val)){ - variables <- not_in_val %>% - mutate(cat = str_c(dataset, variable, sep = ".")) %>% - pull(cat) %>% - unique() - cli_warn(c( - "The following variables have code ids not found in the codelist(s):", - "i" = ansi_collapse(variables, last = ", ") - ), call. = FALSE) - } - # Check the code_ids in codelist that aren't in value spec - not_in_cl <- anti_join(codelist, code_vars, by = c("code_id")) - if(nrow(not_in_cl)){ - cl_nm <- not_in_cl %>% - pull(.data$name) - cli_warn(c( - "The following codelists are never used:", - "i" = ansi_collapse(cl_nm, last = ", ") - ), call. = FALSE) - } +codelist_check <- function(value_spec, codelist) { + code_vars <- value_spec %>% + filter(!is.na(code_id)) %>% + distinct(dataset, variable, code_id) + + # Check the variables in don't codelists have codelist + not_in_val <- anti_join(code_vars, codelist, by = c("code_id")) + if (nrow(not_in_val)) { + variables <- not_in_val %>% + mutate(cat = str_c(dataset, variable, sep = ".")) %>% + pull(cat) %>% + unique() + cli_warn(c( + "The following variables have code ids not found in the codelist(s):", + "i" = ansi_collapse(variables, last = ", ") + ), call. = FALSE) + } + # Check the code_ids in codelist that aren't in value spec + not_in_cl <- anti_join(codelist, code_vars, by = c("code_id")) + if (nrow(not_in_cl)) { + cl_nm <- not_in_cl %>% + pull(.data$name) + cli_warn(c( + "The following codelists are never used:", + "i" = ansi_collapse(cl_nm, last = ", ") + ), call. = FALSE) + } } @@ -153,38 +152,38 @@ codelist_check <- function(value_spec, codelist){ #' #' @return writes warning to console if there is an issue #' @noRd -supp_check <- function(ds_vars, supp){ - dist_test <- supp %>% - distinct(.data$dataset, .data$variable) %>% - nrow() == nrow(supp) - if(!dist_test){ - cli_warn("Supp table contains non-unique dataset/variable combinations") - } - - ds_vars <- ds_vars %>% - filter(.data$supp_flag) - - #Check the variables in ds_vars that don't have value specs - not_in_supp <- anti_join(ds_vars, supp, by = c("dataset", "variable")) - if(nrow(not_in_supp) != 0){ - variables <- not_in_supp %>% - mutate(full = str_c(.data$dataset, .data$variable, sep = ".")) %>% - pull(.data$full) - cli_warn(c( - "The following variables are in the ds_vars table and tagged as supplement, but don't have supp specs:", - "i" = ansi_collapse(variables, last = ", ") - ), call. = FALSE) - } - # Check the variables in value spec that aren't in ds_vars - not_in_ds <- anti_join(supp, ds_vars, by = c("dataset", "variable")) - if(nrow(not_in_ds) != 0){ - variables <- not_in_ds %>% - pull(.data$variable) - cli_warn(c( - "The following variables are have supp specifications, but aren't in the ds_vars table:", - "i" = ansi_collapse(variables, last = ", ") - ), call. = FALSE) - } +supp_check <- function(ds_vars, supp) { + dist_test <- supp %>% + distinct(.data$dataset, .data$variable) %>% + nrow() == nrow(supp) + if (!dist_test) { + cli_warn("Supp table contains non-unique dataset/variable combinations") + } + + ds_vars <- ds_vars %>% + filter(.data$supp_flag) + + # Check the variables in ds_vars that don't have value specs + not_in_supp <- anti_join(ds_vars, supp, by = c("dataset", "variable")) + if (nrow(not_in_supp) != 0) { + variables <- not_in_supp %>% + mutate(full = str_c(.data$dataset, .data$variable, sep = ".")) %>% + pull(.data$full) + cli_warn(c( + "The following variables are in the ds_vars table and tagged as supplement, but don't have supp specs:", + "i" = ansi_collapse(variables, last = ", ") + ), call. = FALSE) + } + # Check the variables in value spec that aren't in ds_vars + not_in_ds <- anti_join(supp, ds_vars, by = c("dataset", "variable")) + if (nrow(not_in_ds) != 0) { + variables <- not_in_ds %>% + pull(.data$variable) + cli_warn(c( + "The following variables are have supp specifications, but aren't in the ds_vars table:", + "i" = ansi_collapse(variables, last = ", ") + ), call. = FALSE) + } } @@ -192,14 +191,16 @@ supp_check <- function(ds_vars, supp){ #' #' @return list of column names by dataset #' @noRd -col_vars <- function(){ - list(.ds_spec = c("dataset", "structure", "label"), - .ds_vars = c("dataset", "variable", "key_seq", "order","mandatory", "core", "supp_flag"), - .var_spec = c("variable", "length", "label", "type", "common", "format"), - .value_spec = c("dataset", "variable", "type", "origin","sig_dig", "code_id", "where", "derivation_id"), - .derivations = c("derivation_id", "derivation"), - .codelist= c("code_id", "name","type", "codes"), - .supp = c("dataset", "variable", "idvar", "qeval")) +col_vars <- function() { + list( + .ds_spec = c("dataset", "structure", "label"), + .ds_vars = c("dataset", "variable", "key_seq", "order", "mandatory", "core", "supp_flag"), + .var_spec = c("variable", "length", "label", "type", "common", "format"), + .value_spec = c("dataset", "variable", "type", "origin", "sig_dig", "code_id", "where", "derivation_id"), + .derivations = c("derivation_id", "derivation"), + .codelist = c("code_id", "name", "type", "codes"), + .supp = c("dataset", "variable", "idvar", "qeval") + ) } @@ -209,83 +210,87 @@ col_vars <- function(){ #' #' @return warning messages to the console if there is an issue #' @noRd -var_name_check <- function(envrionment){ - # Set the name as they should be - col_names <- col_vars() - # Get the tables and table names from the environment - tbl_name <- ls(envrionment, all.names = TRUE) - tbls <- map(tbl_name, get, envir = envrionment) - # Checks is names match the table above, returns T if so F else. If the names - # don't match, will also produce a warning of what the names should be - map2_lgl(tbl_name, tbls, function(name, tbl){ - name - if(is.null(tbl)){ - # Checks for null tables - print_message <- name %>% - str_remove("[:punct:]") %>% - paste("is null") - cli_warn(print_message, call. = FALSE) - FALSE - } else if(!setequal(names(tbl),col_names[[name]])){ - # writes a message if the column names don't match - print_message <- name %>% - str_remove("[:punct:]") %>% - paste0("'", ., "' has incorrect column names. It should be:\n", - str_c(col_names[[name]], collapse = ", "), "\n") - cli_warn(print_message, call. = FALSE) - FALSE - } else { - TRUE - } - }) %>% - all() - +var_name_check <- function(envrionment) { + # Set the name as they should be + col_names <- col_vars() + # Get the tables and table names from the environment + tbl_name <- ls(envrionment, all.names = TRUE) + tbls <- map(tbl_name, get, envir = envrionment) + # Checks is names match the table above, returns T if so F else. If the names + # don't match, will also produce a warning of what the names should be + map2_lgl(tbl_name, tbls, function(name, tbl) { + name + if (is.null(tbl)) { + # Checks for null tables + print_message <- name %>% + str_remove("[:punct:]") %>% + paste("is null") + cli_warn(print_message, call. = FALSE) + FALSE + } else if (!setequal(names(tbl), col_names[[name]])) { + # writes a message if the column names don't match + print_message <- name %>% + str_remove("[:punct:]") %>% + paste0( + "'", ., "' has incorrect column names. It should be:\n", + str_c(col_names[[name]], collapse = ", "), "\n" + ) + cli_warn(print_message, call. = FALSE) + FALSE + } else { + TRUE + } + }) %>% + all() } - #' Column Data Check <- lol horrible name #' #' @return a data frame of the datasets, column #' @noRd #' all_message <- function() { - tribble( - ~dataset, ~var, ~test, ~any_na_acceptable, - "ds_spec", "dataset", is.character, FALSE, - "ds_spec", "structure", is.character, TRUE, - "ds_spec", "label", is.character, TRUE, - "ds_vars", "dataset", is.character, FALSE, - "ds_vars", "variable", is.character, FALSE, - "ds_vars", "key_seq", is.numeric, TRUE, - "ds_vars", "order", is.numeric, TRUE, - "ds_vars", "mandatory", is.logical, TRUE, - "ds_vars", "core", check_words("Expected", "Required", "Permissible", "Conditionally Required", "Conditionally Expected", NA), TRUE, - "ds_vars", "supp_flag", is.logical, TRUE, - "var_spec", "variable", is.character, FALSE, - "var_spec", "type", is.character, TRUE, - "var_spec", "length", is.numeric, TRUE, - "var_spec", "label", is.character, TRUE, - "var_spec", "format", is.character, TRUE, - "var_spec", "common", is.logical, TRUE, - "value_spec", "type", is.character, TRUE, - "value_spec", "sig_dig", is.integer, TRUE, - "value_spec", "origin", function(x){str_detect(x, "collected|derived|assigned|protocol|predecessor|crf.*")||is.na(x)}, TRUE, - "value_spec", "code_id", is.character, TRUE, - "value_spec", "dataset", is.character, FALSE, - "value_spec", "where", is.character, TRUE, - "value_spec", "derivation_id", is.character, TRUE, - "derivations", "derivation_id", is.character, FALSE, - "derivations", "derivation", is.character, TRUE, - "codelist", "code_id", is.character, FALSE, - "codelist", "name", is.character, TRUE, - "codelist", "codes", function(x){!is.null(x)}, TRUE, - "codelist", "type", is.character, TRUE, - "supp", "dataset", is.character, FALSE, - "supp", "variable", is.character, FALSE, - "supp", "idvar", is.character, TRUE, - "supp", "qeval", is.character, TRUE, -) + tribble( + ~dataset, ~var, ~test, ~any_na_acceptable, + "ds_spec", "dataset", is.character, FALSE, + "ds_spec", "structure", is.character, TRUE, + "ds_spec", "label", is.character, TRUE, + "ds_vars", "dataset", is.character, FALSE, + "ds_vars", "variable", is.character, FALSE, + "ds_vars", "key_seq", is.numeric, TRUE, + "ds_vars", "order", is.numeric, TRUE, + "ds_vars", "mandatory", is.logical, TRUE, + "ds_vars", "core", check_words("Expected", "Required", "Permissible", "Conditionally Required", "Conditionally Expected", NA), TRUE, + "ds_vars", "supp_flag", is.logical, TRUE, + "var_spec", "variable", is.character, FALSE, + "var_spec", "type", is.character, TRUE, + "var_spec", "length", is.numeric, TRUE, + "var_spec", "label", is.character, TRUE, + "var_spec", "format", is.character, TRUE, + "var_spec", "common", is.logical, TRUE, + "value_spec", "type", is.character, TRUE, + "value_spec", "sig_dig", is.integer, TRUE, + "value_spec", "origin", function(x) { + str_detect(x, "collected|derived|assigned|protocol|predecessor|crf.*") || is.na(x) + }, TRUE, + "value_spec", "code_id", is.character, TRUE, + "value_spec", "dataset", is.character, FALSE, + "value_spec", "where", is.character, TRUE, + "value_spec", "derivation_id", is.character, TRUE, + "derivations", "derivation_id", is.character, FALSE, + "derivations", "derivation", is.character, TRUE, + "codelist", "code_id", is.character, FALSE, + "codelist", "name", is.character, TRUE, + "codelist", "codes", function(x) { + !is.null(x) + }, TRUE, + "codelist", "type", is.character, TRUE, + "supp", "dataset", is.character, FALSE, + "supp", "variable", is.character, FALSE, + "supp", "idvar", is.character, TRUE, + "supp", "qeval", is.character, TRUE, + ) } @@ -302,33 +307,33 @@ all_message <- function() { #' @param supp supp information #' check_columns <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp) { - - - messages <- purrr::pmap(all_message(), - ~check_structure( - get(..1), sym(..2), ..3, ..4, ..1) - ) - - # errors - errors <- map(messages, "error") %>% - compact() %>% - vapply(`[[`, character(1), 1) - if (length(errors) > 0) { - msg <- c( - "Tried to load dataset metadata but exited with errors", - set_names(errors, rep("x", length(errors))) - ) - cli_abort(msg, call. = FALSE) - } - - # warnings - warnings <- map(messages, "warning") %>% - compact() - if (length(warnings) > 0) { - for (warning in warnings) { - cli_warn(warning[1], call. = FALSE) - } - } + messages <- purrr::pmap( + all_message(), + ~ check_structure( + get(..1), sym(..2), ..3, ..4, ..1 + ) + ) + + # errors + errors <- map(messages, "error") %>% + compact() %>% + vapply(`[[`, character(1), 1) + if (length(errors) > 0) { + msg <- c( + "Tried to load dataset metadata but exited with errors", + set_names(errors, rep("x", length(errors))) + ) + cli_abort(msg, call. = FALSE) + } + + # warnings + warnings <- map(messages, "warning") %>% + compact() + if (length(warnings) > 0) { + for (warning in warnings) { + cli_warn(warning[1], call. = FALSE) + } + } } #' Is metacore object @@ -343,8 +348,8 @@ check_columns <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, c #' load(metacore_example("pilot_ADaM.rda")) #' is_metacore(metacore) #' -is_metacore <- function(x){ - inherits(x, "Metacore") +is_metacore <- function(x) { + inherits(x, "Metacore") } @@ -358,11 +363,11 @@ is_metacore <- function(x){ #' @examples #' load(metacore_example("pilot_ADaM.rda")) #' adsl <- select_dataset(metacore, "ADSL", quiet = TRUE) -#' is_DatasetMeta("DUMMY") # Expect FALSE -#' is_DatasetMeta(metacore) # Expect FALSE -#' is_DatasetMeta(adsl) # Expect TRUE -is_DatasetMeta <- function(x){ - inherits(x, "DatasetMeta") +#' is_DatasetMeta("DUMMY") # Expect FALSE +#' is_DatasetMeta(metacore) # Expect FALSE +#' is_DatasetMeta(adsl) # Expect TRUE +is_DatasetMeta <- function(x) { + inherits(x, "DatasetMeta") } @@ -388,18 +393,18 @@ is_DatasetMeta <- function(x){ #' load(metacore_example("pilot_ADaM.rda")) #' adsl <- select_dataset(metacore, "ADSL", quiet = TRUE) #' \dontrun{ -#' verify_DatasetMeta("DUMMY") # Expect error -#' verify_DatasetMeta(metacore) # Expect error +#' verify_DatasetMeta("DUMMY") # Expect error +#' verify_DatasetMeta(metacore) # Expect error #' } -#' verify_DatasetMeta(adsl) # Expect valid, i.e., return TRUE +#' verify_DatasetMeta(adsl) # Expect valid, i.e., return TRUE verify_DatasetMeta <- function(metacore) { - if (!is_metacore(metacore)) { - cli_abort(col_red("The object supplied to the argument {.arg metacore} is not a Metacore object. You have supplied an object of class {class(metacore)}.")) - } + if (!is_metacore(metacore)) { + cli_abort(col_red("The object supplied to the argument {.arg metacore} is not a Metacore object. You have supplied an object of class {class(metacore)}.")) + } - if (!is_DatasetMeta(metacore)) { - cli_abort(col_red("The object supplied to the argument {.arg metacore} is not a subsetted Metacore object. Use {.fn metacore::select_dataset} to subset metadata for the required dataset.")) - } + if (!is_DatasetMeta(metacore)) { + cli_abort(col_red("The object supplied to the argument {.arg metacore} is not a subsetted Metacore object. Use {.fn metacore::select_dataset} to subset metadata for the required dataset.")) + } - return(TRUE) + return(TRUE) } diff --git a/R/xml_builders.R b/R/xml_builders.R index 0955389..99b5bc8 100644 --- a/R/xml_builders.R +++ b/R/xml_builders.R @@ -8,37 +8,37 @@ #' #' @return Metacore/DataDef object #' @export -define_to_metacore <- function(path, quiet = FALSE){ - - test <- quiet_if_true({ - +define_to_metacore <- function(path, quiet = FALSE) { + test <- quiet_if_true( + { xml <- read_xml(path) xml_ns_strip(xml) define_version <- xml_find_all(xml, "//MetaDataVersion") %>% - xml_attr("DefineVersion") %>% - as.numeric_version() - - ds_spec <- xml_to_ds_spec(xml) - ds_vars <- xml_to_ds_vars(xml) - var_spec <- xml_to_var_spec(xml) - value_spec <- xml_to_value_spec(xml) - code_list <- xml_to_codelist(xml) + xml_attr("DefineVersion") %>% + as.numeric_version() + + ds_spec <- xml_to_ds_spec(xml) + ds_vars <- xml_to_ds_vars(xml) + var_spec <- xml_to_var_spec(xml) + value_spec <- xml_to_value_spec(xml) + code_list <- xml_to_codelist(xml) derivations <- xml_to_derivations(xml) metacore( - ds_spec, - ds_vars, - var_spec, - value_spec, - derivations, - codelist = code_list, - quiet = quiet + ds_spec, + ds_vars, + var_spec, + value_spec, + derivations, + codelist = code_list, + quiet = quiet ) + }, + quiet = quiet + ) - }, quiet = quiet) - - if (quiet) invisible(test) else test + if (quiet) invisible(test) else test } #' XML to Data Set Spec @@ -51,15 +51,15 @@ define_to_metacore <- function(path, quiet = FALSE){ #' @export #' xml_to_ds_spec <- function(doc) { - # Read in the dataset level nodes - xml_find_all(doc, "//MetaDataVersion/ItemGroupDef[contains(@OID, 'IG')]") %>% - map_dfr(function(node){ + # Read in the dataset level nodes + xml_find_all(doc, "//MetaDataVersion/ItemGroupDef[contains(@OID, 'IG')]") %>% + map_dfr(function(node) { tibble( - dataset = xml_attr(node, "Name"), - structure = xml_attr(node, "Structure"), - label = xml_find_first(node, "./Description") %>% xml_text() + dataset = xml_attr(node, "Name"), + structure = xml_attr(node, "Structure"), + label = xml_find_first(node, "./Description") %>% xml_text() ) - }) + }) } @@ -73,30 +73,32 @@ xml_to_ds_spec <- function(doc) { #' @export #' xml_to_ds_vars <- function(doc) { - # Each dataset is an ItemGroupDef - xml_find_all(doc, "//ItemGroupDef") %>% - map_dfr(function(node){ - # Each Variable is a Item Ref - child_node <- xml_find_all(node, "./ItemRef") - tibble( - dataset = xml_attr(node, "Name"), - oid = xml_attr(child_node, "ItemOID"), - mandatory = xml_attr(child_node, "Mandatory"), - key_seq = xml_attr(child_node, "KeySequence") %>% - as.integer(), - order = xml_attr(child_node, "OrderNumber") %>% - as.integer() - ) - }) %>% - mutate( - variable = id_to_var(.data$oid), - mandatory = .data$mandatory == "Yes", - core = NA_character_, - supp_flag = NA - ) %>% - select(.data$dataset, .data$variable, .data$key_seq, - .data$order, .data$mandatory, .data$core, .data$supp_flag, - .data$mandatory, -.data$oid) + # Each dataset is an ItemGroupDef + xml_find_all(doc, "//ItemGroupDef") %>% + map_dfr(function(node) { + # Each Variable is a Item Ref + child_node <- xml_find_all(node, "./ItemRef") + tibble( + dataset = xml_attr(node, "Name"), + oid = xml_attr(child_node, "ItemOID"), + mandatory = xml_attr(child_node, "Mandatory"), + key_seq = xml_attr(child_node, "KeySequence") %>% + as.integer(), + order = xml_attr(child_node, "OrderNumber") %>% + as.integer() + ) + }) %>% + mutate( + variable = id_to_var(.data$oid), + mandatory = .data$mandatory == "Yes", + core = NA_character_, + supp_flag = NA + ) %>% + select( + .data$dataset, .data$variable, .data$key_seq, + .data$order, .data$mandatory, .data$core, .data$supp_flag, + .data$mandatory, -.data$oid + ) } @@ -113,55 +115,56 @@ xml_to_ds_vars <- function(doc) { #' @export #' xml_to_var_spec <- function(doc) { - - # Gets the name, id, and length from the variable node and the description from the child - var_info <- xml_find_all(doc, "//ItemDef") %>% - map_dfr(function(node){ - tibble( - oid = xml_attr(node,"OID") %>% as.character(), - variable = xml_attr(node, "Name") %>% as.character(), - type = xml_attr(node, "DataType"), - length = xml_attr(node, "Length") %>% as.integer(), - format = xml_attr(node, "DisplayFormat"), - label = xml_find_first(node, "./Description/TranslatedText") %>% - xml_text() - ) - }) - - possible_vars <- xml_find_all(doc, "//ItemGroupDef/ItemRef") %>% - map_chr(function(node){ - oid = xml_attr(node, "ItemOID") - }) - - # Get for each variable, get the number of distinct lengths and labels - dist_df <- var_info %>% - filter(.data$oid %in% possible_vars) %>% - distinct(.data$variable, .data$length, .data$label, .data$type, .keep_all = TRUE) %>% - group_by(.data$variable) %>% - mutate( - n = n(), - common = NA - ) %>% - ungroup() - - # For variables with more than one distinct label, this gets all the full - # variable names with that root. Sometimes 3 variables will have the same root - # (i.e. ARMCD), 2 of them will match, but one of them won't. This means the - # two matching will have been collapsed to one in the distinct and we have to - # bring back the one that got dropped. Cause all of them need to be DS.var - full_name_vars <- dist_df %>% - filter(n > 1) %>% - select(.data$variable) %>% - inner_join(var_info, by = "variable") %>% - mutate(variable = str_remove(.data$oid, "^IT\\.")) %>% - distinct() - - # Combine the variables that need full names with the variables that don't - dist_df %>% - filter(n == 1) %>% - bind_rows(full_name_vars) %>% - select(.data$variable, .data$type, .data$length, .data$label, - .data$format, .data$common, -.data$n, -.data$oid) + # Gets the name, id, and length from the variable node and the description from the child + var_info <- xml_find_all(doc, "//ItemDef") %>% + map_dfr(function(node) { + tibble( + oid = xml_attr(node, "OID") %>% as.character(), + variable = xml_attr(node, "Name") %>% as.character(), + type = xml_attr(node, "DataType"), + length = xml_attr(node, "Length") %>% as.integer(), + format = xml_attr(node, "DisplayFormat"), + label = xml_find_first(node, "./Description/TranslatedText") %>% + xml_text() + ) + }) + + possible_vars <- xml_find_all(doc, "//ItemGroupDef/ItemRef") %>% + map_chr(function(node) { + oid <- xml_attr(node, "ItemOID") + }) + + # Get for each variable, get the number of distinct lengths and labels + dist_df <- var_info %>% + filter(.data$oid %in% possible_vars) %>% + distinct(.data$variable, .data$length, .data$label, .data$type, .keep_all = TRUE) %>% + group_by(.data$variable) %>% + mutate( + n = n(), + common = NA + ) %>% + ungroup() + + # For variables with more than one distinct label, this gets all the full + # variable names with that root. Sometimes 3 variables will have the same root + # (i.e. ARMCD), 2 of them will match, but one of them won't. This means the + # two matching will have been collapsed to one in the distinct and we have to + # bring back the one that got dropped. Cause all of them need to be DS.var + full_name_vars <- dist_df %>% + filter(n > 1) %>% + select(.data$variable) %>% + inner_join(var_info, by = "variable") %>% + mutate(variable = str_remove(.data$oid, "^IT\\.")) %>% + distinct() + + # Combine the variables that need full names with the variables that don't + dist_df %>% + filter(n == 1) %>% + bind_rows(full_name_vars) %>% + select( + .data$variable, .data$type, .data$length, .data$label, + .data$format, .data$common, -.data$n, -.data$oid + ) } @@ -178,133 +181,151 @@ xml_to_var_spec <- function(doc) { #' #' @importFrom xml2 xml_attr xml_find_first xml_parent xml_find_all xml_to_value_spec <- function(doc) { - # Get information in the item definition - item_def <- xml_find_all(doc, "//ItemDef") %>% - map_dfr(function(node){ - tibble( - oid = xml_attr(node,"OID") %>% as.character(), - variable = xml_attr(node, "Name") %>% as.character(), - type = xml_attr(node, "DataType"), - sig_dig = xml_attr(node, "SignificantDigits") %>% as.integer(), - origin = xml_find_first(node, "./def:Origin") %>% xml_attr("Type"), - page_num = xml_find_first(node, "./def:Origin/def:DocumentRef/def:PDFPageRef") %>% xml_attr("PageRefs"), - predecessor = xml_find_first(node, "./def:Origin") %>% xml_text(), - comment_id = xml_attr(node,"CommentOID"), - code_id = xml_find_first(node, "CodeListRef") %>% xml_attr("CodeListOID"), - varname = xml_attr(node, "SASFieldName") %>% as.character() - ) - }) %>% - mutate( - origin = if_else(.data$origin == "Collected" & !is.na(.data$page_num), - paste0(.data$origin,", page_num = ", .data$page_num), - .data$origin) - ) %>% - select(-.data$page_num) - - # Pull the information from the item reference only for dataset variable, not where level information - derivations <- xml_find_all(doc, "//ItemGroupDef/ItemRef") %>% - map_dfr(function(node){ - tibble( - oid = xml_attr(node, "ItemOID") %>% as.character(), - dataset = xml_parent(node) %>% xml_attr("Name") %>% as.character(), - derivation_id = xml_attr(node, "MethodOID") - ) - }) - # Combine all the item information but - item_info <- left_join(derivations, item_def, by = "oid") - - where_to_merge <- xml_find_all(doc, "//def:ValueListDef/ItemRef") %>% - map_dfr(function(node){ - tibble( - oid = xml_parent(node) %>% xml_attr("OID") %>% as.character(), - item_oid = xml_attr(node, "ItemOID"), - ord = xml_attr(node, "OrderNumber"), - where_oid = xml_find_all(node, "./def:WhereClauseRef") %>% - xml_attr("WhereClauseOID"), - derivation_id = xml_attr(node, "MethodOID") - ) - } + # Get information in the item definition + item_def <- xml_find_all(doc, "//ItemDef") %>% + map_dfr(function(node) { + tibble( + oid = xml_attr(node, "OID") %>% as.character(), + variable = xml_attr(node, "Name") %>% as.character(), + type = xml_attr(node, "DataType"), + sig_dig = xml_attr(node, "SignificantDigits") %>% as.integer(), + origin = xml_find_first(node, "./def:Origin") %>% xml_attr("Type"), + page_num = xml_find_first(node, "./def:Origin/def:DocumentRef/def:PDFPageRef") %>% xml_attr("PageRefs"), + predecessor = xml_find_first(node, "./def:Origin") %>% xml_text(), + comment_id = xml_attr(node, "CommentOID"), + code_id = xml_find_first(node, "CodeListRef") %>% xml_attr("CodeListOID"), + varname = xml_attr(node, "SASFieldName") %>% as.character() + ) + }) %>% + mutate( + origin = if_else(.data$origin == "Collected" & !is.na(.data$page_num), + paste0(.data$origin, ", page_num = ", .data$page_num), + .data$origin ) + ) %>% + select(-.data$page_num) - where_eqs <- xml_find_all(doc, "//def:WhereClauseDef[@OID]/RangeCheck") %>% - map_dfr(function(node){ - tibble( - where_oid = xml_parent(node) %>% xml_attr("OID"), - left = xml_attr(node, "ItemOID"), - test = xml_attr(node, "Comparator"), - right = xml_find_all(node, "./CheckValue") %>% xml_text() - ) - } + # Pull the information from the item reference only for dataset variable, not where level information + derivations <- xml_find_all(doc, "//ItemGroupDef/ItemRef") %>% + map_dfr(function(node) { + tibble( + oid = xml_attr(node, "ItemOID") %>% as.character(), + dataset = xml_parent(node) %>% xml_attr("Name") %>% as.character(), + derivation_id = xml_attr(node, "MethodOID") ) - # create 0x4 tibble if where_eqs is 0x0 - # tmp workaround until below bug is resolved in purrr - # https://github.com/tidyverse/purrr/issues/824 - if(nrow(where_eqs) == 0){ - where_eqs <- tibble(where_oid=character(), - left=character(), - test=character(), - right = character()) - } - - if(nrow(where_to_merge) == 0){ - where_eqs <- where_eqs %>% - mutate(item_oid = .data$left, - derivation_id = paste0("MT", str_remove(.data$left, "IT"), ".", .data$right), - ord = NA, - oid = .data$left) %>% - left_join(item_def, by = c("oid")) %>% - left_join(select(derivations, -.data$derivation_id), by = c("oid")) - - } else{ - where_eqs<- full_join(where_to_merge, where_eqs, by = "where_oid") %>% - left_join(item_def, by = c("item_oid" = "oid")) %>% - # Allow for merging with the derivations to get the dataset - mutate(oid = paste0("IT", str_remove(.data$oid, "^VL")), - variable = .data$varname) %>% - left_join(select(derivations, -.data$derivation_id), by = c("oid")) - } - - all_where_eqs <- where_eqs %>% - group_by(.data$where_oid) %>% - mutate(var = str_extract(.data$left, "\\w*$"), - right = paste0("'", .data$right, "'"), - test = case_when(.data$test == "EQ" ~ "==", - .data$test == "LT" ~ "<", - .data$test == "LE" ~ "<=", - .data$test == "GT" ~ ">", - .data$test == "GE" ~ ">=", - .data$test == "NE" ~ "!=", - TRUE ~ .data$test), - eq = case_when( test == "IN" ~ paste(.data$var, "%in%", "c(", - paste(.data$right, collapse = ","), - ")"), - test == "NOTIN" ~ paste("!", .data$var, "%in%", "c(", - paste(.data$right, collapse = ","), - ")"), - TRUE ~ paste(.data$var, .data$test, .data$right, collapse = " & ")) + }) + # Combine all the item information but + item_info <- left_join(derivations, item_def, by = "oid") + + where_to_merge <- xml_find_all(doc, "//def:ValueListDef/ItemRef") %>% + map_dfr(function(node) { + tibble( + oid = xml_parent(node) %>% xml_attr("OID") %>% as.character(), + item_oid = xml_attr(node, "ItemOID"), + ord = xml_attr(node, "OrderNumber"), + where_oid = xml_find_all(node, "./def:WhereClauseRef") %>% + xml_attr("WhereClauseOID"), + derivation_id = xml_attr(node, "MethodOID") + ) + }) + + where_eqs <- xml_find_all(doc, "//def:WhereClauseDef[@OID]/RangeCheck") %>% + map_dfr(function(node) { + tibble( + where_oid = xml_parent(node) %>% xml_attr("OID"), + left = xml_attr(node, "ItemOID"), + test = xml_attr(node, "Comparator"), + right = xml_find_all(node, "./CheckValue") %>% xml_text() + ) + }) + # create 0x4 tibble if where_eqs is 0x0 + # tmp workaround until below bug is resolved in purrr + # https://github.com/tidyverse/purrr/issues/824 + if (nrow(where_eqs) == 0) { + where_eqs <- tibble( + where_oid = character(), + left = character(), + test = character(), + right = character() + ) + } + + if (nrow(where_to_merge) == 0) { + where_eqs <- where_eqs %>% + mutate( + item_oid = .data$left, + derivation_id = paste0("MT", str_remove(.data$left, "IT"), ".", .data$right), + ord = NA, + oid = .data$left ) %>% - select(-.data$left, -.data$var, -.data$test, -.data$right) %>% - distinct() %>% - group_by(.data$item_oid, .data$derivation_id) %>% - mutate(full_eq = str_c(.data$eq, collapse = "||")) %>% - filter(!is.na(.data$item_oid)) %>% - ungroup() %>% - select(-.data$eq, where = .data$full_eq, .data$derivation_id, - -.data$where_oid, -.data$ord, -.data$item_oid) - - val_spec <- item_info %>% - anti_join(all_where_eqs, by = c("oid")) %>% #remove any variables with a where - bind_rows(all_where_eqs) %>% - mutate(derivation_id = case_when( - .data$origin == "Predecessor" & !is.na(.data$predecessor) ~ .data$predecessor, - .data$origin == "Assigned" & !is.na(.data$comment_id) ~ .data$comment_id, - TRUE ~ .data$derivation_id)) %>% - select(.data$dataset, .data$variable, .data$code_id, .data$derivation_id, - .data$type, .data$origin, .data$where, .data$sig_dig, - -.data$predecessor, -.data$comment_id, -.data$varname, - -.data$oid) - - val_spec + left_join(item_def, by = c("oid")) %>% + left_join(select(derivations, -.data$derivation_id), by = c("oid")) + } else { + where_eqs <- full_join(where_to_merge, where_eqs, by = "where_oid") %>% + left_join(item_def, by = c("item_oid" = "oid")) %>% + # Allow for merging with the derivations to get the dataset + mutate( + oid = paste0("IT", str_remove(.data$oid, "^VL")), + variable = .data$varname + ) %>% + left_join(select(derivations, -.data$derivation_id), by = c("oid")) + } + + all_where_eqs <- where_eqs %>% + group_by(.data$where_oid) %>% + mutate( + var = str_extract(.data$left, "\\w*$"), + right = paste0("'", .data$right, "'"), + test = case_when( + .data$test == "EQ" ~ "==", + .data$test == "LT" ~ "<", + .data$test == "LE" ~ "<=", + .data$test == "GT" ~ ">", + .data$test == "GE" ~ ">=", + .data$test == "NE" ~ "!=", + TRUE ~ .data$test + ), + eq = case_when( + test == "IN" ~ paste( + .data$var, "%in%", "c(", + paste(.data$right, collapse = ","), + ")" + ), + test == "NOTIN" ~ paste( + "!", .data$var, "%in%", "c(", + paste(.data$right, collapse = ","), + ")" + ), + TRUE ~ paste(.data$var, .data$test, .data$right, collapse = " & ") + ) + ) %>% + select(-.data$left, -.data$var, -.data$test, -.data$right) %>% + distinct() %>% + group_by(.data$item_oid, .data$derivation_id) %>% + mutate(full_eq = str_c(.data$eq, collapse = "||")) %>% + filter(!is.na(.data$item_oid)) %>% + ungroup() %>% + select(-.data$eq, + where = .data$full_eq, .data$derivation_id, + -.data$where_oid, -.data$ord, -.data$item_oid + ) + + val_spec <- item_info %>% + anti_join(all_where_eqs, by = c("oid")) %>% # remove any variables with a where + bind_rows(all_where_eqs) %>% + mutate(derivation_id = case_when( + .data$origin == "Predecessor" & !is.na(.data$predecessor) ~ .data$predecessor, + .data$origin == "Assigned" & !is.na(.data$comment_id) ~ .data$comment_id, + TRUE ~ .data$derivation_id + )) %>% + select( + .data$dataset, .data$variable, .data$code_id, .data$derivation_id, + .data$type, .data$origin, .data$where, .data$sig_dig, + -.data$predecessor, -.data$comment_id, -.data$varname, + -.data$oid + ) + + val_spec } @@ -321,61 +342,65 @@ xml_to_value_spec <- function(doc) { #' @family xml builder #' @export xml_to_codelist <- function(doc) { - cl_pv <- xml_find_all(doc, "//CodeList") %>% - map_dfr(function(node){ - #Values within the code/decode - node_children_EI <- xml_find_all(node, "./EnumeratedItem") - node_children_CL <- xml_find_all(node, "./CodeListItem") - - decodes <- c(xml_find_all(node_children_EI, "./Decode") %>% xml_text(), - xml_find_all(node_children_CL, "./Decode") %>% xml_text()) - if(length(decodes) == 0){ - decodes <- NA_character_ - } - - tibble( - code_id = xml_attr(node, "OID"), - name = xml_attr(node, "Name"), - code = c(xml_attr(node_children_EI, "CodedValue"), - xml_attr(node_children_CL, "CodedValue")), - decode = decodes - ) - }) %>% - group_by(code_id) %>% - mutate(type = if_else(all(is.na(decode)), - "permitted_val", "code_decode")) - - permitted_val <- cl_pv %>% - filter(type == "permitted_val") %>% - select(-.data$decode) %>% - nest(codes = c(.data$code)) - - code_decode <- cl_pv %>% - filter(type == "code_decode") %>% - nest(codes = c(.data$code, .data$decode)) - - - external_libs <- xml_find_all(doc, "//CodeList/ExternalCodeList") %>% - map_dfr(function(node){ - tibble( - code_id = xml_parent(node) %>% xml_attr("OID"), - name = xml_parent(node) %>% xml_attr("Name"), - dictionary = xml_attr(node, "Dictionary"), - version = xml_attr(node, "Version"), - type = "external_library" - ) - }) - if(nrow(external_libs) > 0){ - external_libs <- external_libs %>% - nest(codes = c(.data$dictionary, .data$version)) - } - - - # Combinging the code decode with the permitted values - bind_rows(code_decode, permitted_val, external_libs) %>% - ungroup() -} + cl_pv <- xml_find_all(doc, "//CodeList") %>% + map_dfr(function(node) { + # Values within the code/decode + node_children_EI <- xml_find_all(node, "./EnumeratedItem") + node_children_CL <- xml_find_all(node, "./CodeListItem") + + decodes <- c( + xml_find_all(node_children_EI, "./Decode") %>% xml_text(), + xml_find_all(node_children_CL, "./Decode") %>% xml_text() + ) + if (length(decodes) == 0) { + decodes <- NA_character_ + } + tibble( + code_id = xml_attr(node, "OID"), + name = xml_attr(node, "Name"), + code = c( + xml_attr(node_children_EI, "CodedValue"), + xml_attr(node_children_CL, "CodedValue") + ), + decode = decodes + ) + }) %>% + group_by(code_id) %>% + mutate(type = if_else(all(is.na(decode)), + "permitted_val", "code_decode" + )) + + permitted_val <- cl_pv %>% + filter(type == "permitted_val") %>% + select(-.data$decode) %>% + nest(codes = c(.data$code)) + + code_decode <- cl_pv %>% + filter(type == "code_decode") %>% + nest(codes = c(.data$code, .data$decode)) + + + external_libs <- xml_find_all(doc, "//CodeList/ExternalCodeList") %>% + map_dfr(function(node) { + tibble( + code_id = xml_parent(node) %>% xml_attr("OID"), + name = xml_parent(node) %>% xml_attr("Name"), + dictionary = xml_attr(node, "Dictionary"), + version = xml_attr(node, "Version"), + type = "external_library" + ) + }) + if (nrow(external_libs) > 0) { + external_libs <- external_libs %>% + nest(codes = c(.data$dictionary, .data$version)) + } + + + # Combinging the code decode with the permitted values + bind_rows(code_decode, permitted_val, external_libs) %>% + ungroup() +} #' XML to derivation table @@ -389,35 +414,40 @@ xml_to_codelist <- function(doc) { #' @export #' xml_to_derivations <- function(doc) { + derivation <- + xml_find_all(doc, "//MethodDef") %>% + map_dfr(function(node) { + tibble( + derivation_id = xml_attr(node, "OID"), + derivation = xml_find_first(node, "./Description/TranslatedText") %>% + xml_text() + ) + }) - derivation <- - xml_find_all(doc, "//MethodDef") %>% - map_dfr(function(node){ - tibble(derivation_id = xml_attr(node, "OID"), - derivation = xml_find_first(node, "./Description/TranslatedText") %>% - xml_text()) - }) - - comment <- - xml_find_all(doc, "//def:CommentDef") %>% - map_dfr(function(node){ - tibble(derivation_id = xml_attr(node, "OID"), - derivation = xml_find_first(node, "./Description/TranslatedText") %>% - xml_text()) - }) - - - predecessor <- xml_find_all(doc, "//ItemDef") %>% - map_dfr(function(node){ - tibble( - derivation_id = xml_find_first(node, "./def:Origin") %>% xml_text(), - derivation = derivation_id - ) - }) %>% - filter(!is.na(.data$derivation) & str_length(.data$derivation) > 0) - - bind_rows(derivation, - comment, - predecessor) %>% - distinct() + comment <- + xml_find_all(doc, "//def:CommentDef") %>% + map_dfr(function(node) { + tibble( + derivation_id = xml_attr(node, "OID"), + derivation = xml_find_first(node, "./Description/TranslatedText") %>% + xml_text() + ) + }) + + + predecessor <- xml_find_all(doc, "//ItemDef") %>% + map_dfr(function(node) { + tibble( + derivation_id = xml_find_first(node, "./def:Origin") %>% xml_text(), + derivation = derivation_id + ) + }) %>% + filter(!is.na(.data$derivation) & str_length(.data$derivation) > 0) + + bind_rows( + derivation, + comment, + predecessor + ) %>% + distinct() } diff --git a/R/xml_helpers.R b/R/xml_helpers.R index f2deeb8..8738f3c 100644 --- a/R/xml_helpers.R +++ b/R/xml_helpers.R @@ -1,4 +1,3 @@ - #' id to dataset #' #' @param id vector of id's @@ -6,7 +5,7 @@ #' @return vector of datasets #' @noRd id_to_ds <- function(id) { - id %>% str_extract("(?<=^IT\\.)[:alnum:]+(?=\\..*)") + id %>% str_extract("(?<=^IT\\.)[:alnum:]+(?=\\..*)") } #' id to variable @@ -17,13 +16,10 @@ id_to_ds <- function(id) { #' @return vector of variable names #' @noRd id_to_var <- function(id) { - ds <- id %>% str_extract("(?<=^IT\\.)[:alnum:]+(?=\\..*)") - extract <- if_else(is.na(ds), "(?<=^IT\\.)[:alnum:]*", - str_c("(?<=^IT\\.", ds, "\\.)[:alnum:]*") - ) - id %>% - str_extract(extract) + ds <- id %>% str_extract("(?<=^IT\\.)[:alnum:]+(?=\\..*)") + extract <- if_else(is.na(ds), "(?<=^IT\\.)[:alnum:]*", + str_c("(?<=^IT\\.", ds, "\\.)[:alnum:]*") + ) + id %>% + str_extract(extract) } - - - diff --git a/R/zzz.R b/R/zzz.R index 9d16d6b..e6e292b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -14,32 +14,34 @@ #' @importFrom xml2 read_xml xml_find_all xml_find_first xml_attr xml_ns_strip xml_text NULL -globalVariables(c("private", - "self", - "keep", - ".", - ".data", - "code", - "code_id", - "codes", - "core", - "dataset", - "decode", - "derivation_id", - "dictionary", - "id", - "key_seq", - "lab", - "label", - "matches", - "spec_type_to_code_list", - "type", - "variable", - "where", - "where_new", - "var1", - "n_lab")) +globalVariables(c( + "private", + "self", + "keep", + ".", + ".data", + "code", + "code_id", + "codes", + "core", + "dataset", + "decode", + "derivation_id", + "dictionary", + "id", + "key_seq", + "lab", + "label", + "matches", + "spec_type_to_code_list", + "type", + "variable", + "where", + "where_new", + "var1", + "n_lab" +)) .onAttach <- function(libname, pkgname) { - packageStartupMessage("Attaching package `metacore`\n\nAs of metacore 0.3.0 the `keep` variable in the `ds_vars` table has been renamed to `mandatory`. Please see release documentation for details.") + packageStartupMessage("Attaching package `metacore`\n\nAs of metacore 0.3.0 the `keep` variable in the `ds_vars` table has been renamed to `mandatory`. Please see release documentation for details.") } diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 0000000..aba217c --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,38 @@ +ADaM +AE +Bugfix +CDISC +CMD +DataDef +DatasetMeta +Hotfix +LBORRES +Lifecycle +README +SDTM +VLM +XPT +behaviour +cli +codelist +codelists +dplyr +ds +favour +github +hgb +idvar +metatools +multisheet +ormat +qeval +rda +seealso +sig +supplementals +tbl +tibble +tibbles +tidyselect +wbc +yn diff --git a/man/is_DatasetMeta.Rd b/man/is_DatasetMeta.Rd index df66e07..bcf6144 100644 --- a/man/is_DatasetMeta.Rd +++ b/man/is_DatasetMeta.Rd @@ -18,7 +18,7 @@ Is DatasetMeta object \examples{ load(metacore_example("pilot_ADaM.rda")) adsl <- select_dataset(metacore, "ADSL", quiet = TRUE) -is_DatasetMeta("DUMMY") # Expect FALSE -is_DatasetMeta(metacore) # Expect FALSE -is_DatasetMeta(adsl) # Expect TRUE +is_DatasetMeta("DUMMY") # Expect FALSE +is_DatasetMeta(metacore) # Expect FALSE +is_DatasetMeta(adsl) # Expect TRUE } diff --git a/man/verify_DatasetMeta.Rd b/man/verify_DatasetMeta.Rd index d458c39..5c378cd 100644 --- a/man/verify_DatasetMeta.Rd +++ b/man/verify_DatasetMeta.Rd @@ -27,8 +27,8 @@ execution is stopped and an appropriate error message is displayed. load(metacore_example("pilot_ADaM.rda")) adsl <- select_dataset(metacore, "ADSL", quiet = TRUE) \dontrun{ -verify_DatasetMeta("DUMMY") # Expect error -verify_DatasetMeta(metacore) # Expect error +verify_DatasetMeta("DUMMY") # Expect error +verify_DatasetMeta(metacore) # Expect error } -verify_DatasetMeta(adsl) # Expect valid, i.e., return TRUE +verify_DatasetMeta(adsl) # Expect valid, i.e., return TRUE } diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index db1279f..2de6d36 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -1,41 +1,45 @@ test_that("Test label Checks", { - load(metacore_example("pilot_ADaM.rda")) - man_label <- tibble::tribble( - ~variable, ~label, ~n_vars, ~ls_of_vars, - "ANL01FL", "Analysis Flag 01" , 1L, "ADADAS.ANL01FL", - "ANL01FL", "Analysis Record Flag 01" , 1L, "ADLBC.ANL01FL", - "USUBJID", "Unique Subject Identified", 3L, c("ADLBC.USUBJID", "ADTTE.USUBJID", "ADAE.USUBJID"), - "USUBJID", "Unique Subject Identifier", 2L, c("ADSL.USUBJID", "ADADAS.USUBJID"), - ) %>% - dplyr::arrange(variable, label) - label_df <- check_inconsistent_labels(metacore) %>% - dplyr::arrange(variable, label) - expect_equal(label_df, man_label) + load(metacore_example("pilot_ADaM.rda")) + man_label <- tibble::tribble( + ~variable, ~label, ~n_vars, ~ls_of_vars, + "ANL01FL", "Analysis Flag 01", 1L, "ADADAS.ANL01FL", + "ANL01FL", "Analysis Record Flag 01", 1L, "ADLBC.ANL01FL", + "USUBJID", "Unique Subject Identified", 3L, c("ADLBC.USUBJID", "ADTTE.USUBJID", "ADAE.USUBJID"), + "USUBJID", "Unique Subject Identifier", 2L, c("ADSL.USUBJID", "ADADAS.USUBJID"), + ) %>% + dplyr::arrange(variable, label) + label_df <- check_inconsistent_labels(metacore) %>% + dplyr::arrange(variable, label) + expect_equal(label_df, man_label) - load(metacore_example("pilot_SDTM.rda")) - expect_message(check_inconsistent_labels(metacore), - "No mismatch labels detected") + load(metacore_example("pilot_SDTM.rda")) + expect_message( + check_inconsistent_labels(metacore), + "No mismatch labels detected" + ) - expect_error(check_inconsistent_labels("metacore"), - "Expects a metacore object") + expect_error( + check_inconsistent_labels("metacore"), + "Expects a metacore object" + ) }) test_that("Check formats and types", { - load(metacore_example("pilot_ADaM.rda")) - expect_message(check_inconsistent_formats(metacore), - "No mismatch formats detected" - ) - man_types <- tibble::tribble( - ~variable, ~type, ~n_vars, ~ls_of_vars, - "AVAL", "float" , 1L, "ADADAS.AVAL", - "AVAL", "integer", 2L, c("ADLBC.AVAL", "ADTTE.AVAL"), - "BASE", "float" , 1L, "ADLBC.BASE", - "BASE", "integer", 1L, "ADADAS.BASE", - "CHG" , "float" , 1L, "ADLBC.CHG", - "CHG" , "integer", 1L, "ADADAS.CHG", - ) + load(metacore_example("pilot_ADaM.rda")) + expect_message( + check_inconsistent_formats(metacore), + "No mismatch formats detected" + ) + man_types <- tibble::tribble( + ~variable, ~type, ~n_vars, ~ls_of_vars, + "AVAL", "float", 1L, "ADADAS.AVAL", + "AVAL", "integer", 2L, c("ADLBC.AVAL", "ADTTE.AVAL"), + "BASE", "float", 1L, "ADLBC.BASE", + "BASE", "integer", 1L, "ADADAS.BASE", + "CHG", "float", 1L, "ADLBC.CHG", + "CHG", "integer", 1L, "ADADAS.CHG", + ) - type_df <- check_inconsistent_types(metacore) + type_df <- check_inconsistent_types(metacore) - expect_equal(type_df, man_types) + expect_equal(type_df, man_types) }) - diff --git a/tests/testthat/test-metacore.R b/tests/testthat/test-metacore.R index bc0c4c4..306ed1f 100644 --- a/tests/testthat/test-metacore.R +++ b/tests/testthat/test-metacore.R @@ -1,324 +1,359 @@ # where should this function go empty_df <- function(nms, fill) { - df <- as.data.frame(matrix(fill,1,length(nms))) - names(df) <- nms - return(df) + df <- as.data.frame(matrix(fill, 1, length(nms))) + names(df) <- nms + return(df) } dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>% - setNames(c("ds_spec", - "ds_vars", - "var_spec", - "value_spec", - "derivations", - "codelist", - "supp")) + setNames(c( + "ds_spec", + "ds_vars", + "var_spec", + "value_spec", + "derivations", + "codelist", + "supp" + )) dfs$ds_vars <- dfs$ds_vars %>% - mutate(supp_flag = FALSE) + mutate(supp_flag = FALSE) # function from the withr package -with_dir <- function (new, code) { - old <- setwd(dir = new) - on.exit(setwd(old)) - force(code) +with_dir <- function(new, code) { + old <- setwd(dir = new) + on.exit(setwd(old)) + force(code) } test_that("readonly function factory", { - a <- readonly("a") - expect_equal(class(a), "function") - expect_equal(attr(a, "name"), "a") + a <- readonly("a") + expect_equal(class(a), "function") + expect_equal(attr(a, "name"), "a") }) test_that("metacore wrapper function works", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) - - r6 <- suppressWarnings( - MetaCore$new(dfs$ds_spec, - dfs$ds_vars, - dfs$var_spec, - dfs$value_spec, - dfs$derivations, - dfs$codelist, - dfs$supp) - ) - - expect_equal(wrapper, r6) - - expect_warning(define_to_metacore(metacore_example("ADaM_define_CDISC_pilot3.xml"))) - expect_warning(spec_to_metacore(metacore_example("p21_mock.xlsx"))) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) + + r6 <- suppressWarnings( + MetaCore$new( + dfs$ds_spec, + dfs$ds_vars, + dfs$var_spec, + dfs$value_spec, + dfs$derivations, + dfs$codelist, + dfs$supp + ) + ) + + expect_equal(wrapper, r6) + + expect_warning(define_to_metacore(metacore_example("ADaM_define_CDISC_pilot3.xml"))) + expect_warning(spec_to_metacore(metacore_example("p21_mock.xlsx"))) }) test_that("Can pass metacore NULL df's", { - wrapper <- suppressWarnings(metacore(dfs$ds_spec, NULL, dfs$var_spec, - dfs$value_spec, dfs$derivations, dfs$codelist, dfs$supp)) - dummy <- list(character(), character(), numeric(), numeric(), - logical(), character(), logical()) - names(dummy) <- c("dataset", "variable", "key_seq", "order", - "mandatory", "core", "supp_flag") - dummy <- as_tibble(dummy) - #Because of the labels the dfs are slightly different so checking - # the insides match - expect_equal(names(wrapper$ds_vars), names(dummy)) - expect_equal(map_chr(wrapper$ds_vars, mode), - map_chr(dummy, mode)) + wrapper <- suppressWarnings(metacore( + dfs$ds_spec, NULL, dfs$var_spec, + dfs$value_spec, dfs$derivations, dfs$codelist, dfs$supp + )) + dummy <- list( + character(), character(), numeric(), numeric(), + logical(), character(), logical() + ) + names(dummy) <- c( + "dataset", "variable", "key_seq", "order", + "mandatory", "core", "supp_flag" + ) + dummy <- as_tibble(dummy) + # Because of the labels the dfs are slightly different so checking + # the insides match + expect_equal(names(wrapper$ds_vars), names(dummy)) + expect_equal( + map_chr(wrapper$ds_vars, mode), + map_chr(dummy, mode) + ) }) test_that("subsetting works", { - test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE) - subset <- test %>% select_dataset("DM", quiet = TRUE) - expect_equal(unique(subset$ds_spec$dataset), "DM") + test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE) + subset <- test %>% select_dataset("DM", quiet = TRUE) + expect_equal(unique(subset$ds_spec$dataset), "DM") }) test_that("save_metacore creates .rds with no file path", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) - my_temp_dir <- tempdir() - with_dir(my_temp_dir, save_metacore(wrapper)) - expect_true("wrapper.rds" %in% list.files(my_temp_dir)) - unlink(my_temp_dir) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) + my_temp_dir <- tempdir() + with_dir(my_temp_dir, save_metacore(wrapper)) + expect_true("wrapper.rds" %in% list.files(my_temp_dir)) + unlink(my_temp_dir) }) test_that("save_metacore replaces file path", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) - my_temp_dir <- tempdir() - save_metacore(wrapper, file.path(my_temp_dir, "wrapper.csv")) - expect_true("wrapper.rds" %in% list.files(my_temp_dir)) - unlink(my_temp_dir) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) + my_temp_dir <- tempdir() + save_metacore(wrapper, file.path(my_temp_dir, "wrapper.csv")) + expect_true("wrapper.rds" %in% list.files(my_temp_dir)) + unlink(my_temp_dir) }) test_that("save_metacore uses file path", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) - my_temp_dir <- tempdir() - save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds")) - expect_true("wrapper.rds" %in% list.files(my_temp_dir)) - unlink(my_temp_dir) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) + my_temp_dir <- tempdir() + save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds")) + expect_true("wrapper.rds" %in% list.files(my_temp_dir)) + unlink(my_temp_dir) }) test_that("load_metacore loads .rds", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) - my_temp_dir <- tempdir() - save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds")) - wrapper <- load_metacore(file.path(my_temp_dir, "wrapper.rds")) - expect_equal(class(wrapper), c("Metacore", "R6")) - unlink(my_temp_dir) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) + my_temp_dir <- tempdir() + save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds")) + wrapper <- load_metacore(file.path(my_temp_dir, "wrapper.rds")) + expect_equal(class(wrapper), c("Metacore", "R6")) + unlink(my_temp_dir) }) test_that("load metacore fails with no path", { - expect_error(load_metacore()) + expect_error(load_metacore()) }) test_that("load metacore fails with no path and rdss in wd", { - wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) - my_temp_dir <- tempdir() - save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds")) - expect_error( - with_dir(my_temp_dir, load_metacore()) - ) - unlink(my_temp_dir) + wrapper <- suppressWarnings(do.call(metacore, dfs[1:7])) + my_temp_dir <- tempdir() + save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds")) + expect_error( + with_dir(my_temp_dir, load_metacore()) + ) + unlink(my_temp_dir) }) test_that("pulling out control terminology works", { - test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE) - #Testing Errors - ## Not specific enough - expect_error(get_control_term(test, QVAL)) - ## Wrong Dataset name - expect_error(get_control_term(test, QVAL, LB)) - ## Wrong variable name - expect_error(get_control_term(test, QVA)) - expect_equal( - get_control_term(test, QVAL, SUPPAE), - tibble(code = c("N", "Y"), decode = c("No", "Yes")) - ) - expect_equal( - get_control_term(test, "QVAL", "SUPPAE"), - tibble(code = c("N", "Y"), decode = c("No", "Yes")) - ) - }) + test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE) + # Testing Errors + ## Not specific enough + expect_error(get_control_term(test, QVAL)) + ## Wrong Dataset name + expect_error(get_control_term(test, QVAL, LB)) + ## Wrong variable name + expect_error(get_control_term(test, QVA)) + expect_equal( + get_control_term(test, QVAL, SUPPAE), + tibble(code = c("N", "Y"), decode = c("No", "Yes")) + ) + expect_equal( + get_control_term(test, "QVAL", "SUPPAE"), + tibble(code = c("N", "Y"), decode = c("No", "Yes")) + ) +}) test_that("get_keys works", { - test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE) - #Testing Errors - ## Domain not in ds_vars table - expect_error(get_keys(test, DS)) - ## Missing dataset name - expect_error(get_keys(test)) - #Testing Correct Output - expect_equal( - get_keys(test, DM), - tibble(variable = c("STUDYID", "USUBJID"), key_seq = c(1L, 2L)) %>% - add_labs(variable = "Variable Name", - key_seq = "Sequence Key") - ) + test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE) + # Testing Errors + ## Domain not in ds_vars table + expect_error(get_keys(test, DS)) + ## Missing dataset name + expect_error(get_keys(test)) + # Testing Correct Output + expect_equal( + get_keys(test, DM), + tibble(variable = c("STUDYID", "USUBJID"), key_seq = c(1L, 2L)) %>% + add_labs( + variable = "Variable Name", + key_seq = "Sequence Key" + ) + ) }) test_that("spec_to_metacore() is silent when quiet = TRUE", { - test <- metacore_example("p21_mock.xlsx") + test <- metacore_example("p21_mock.xlsx") - expect_silent({ - out <- spec_to_metacore(test , quiet = TRUE) - }) + expect_silent({ + out <- spec_to_metacore(test, quiet = TRUE) + }) - expect_true(inherits(out, "Metacore")) + expect_true(inherits(out, "Metacore")) }) test_that("spec_to_metacore() quiet = TRUE is silent and returns Metacore", { - path_try <- try(metacore_example("p21_mock.xlsx"), silent = TRUE) - if (inherits(path_try, "try-error") || path_try == "") { - skip("p21_mock.xlsx example spec not available") - } - path <- path_try - - expect_silent({ - mc_q <- spec_to_metacore(path, quiet = TRUE) - expect_true(inherits(mc_q, "Metacore")) - }) + path_try <- try(metacore_example("p21_mock.xlsx"), silent = TRUE) + if (inherits(path_try, "try-error") || path_try == "") { + skip("p21_mock.xlsx example spec not available") + } + path <- path_try + + expect_silent({ + mc_q <- spec_to_metacore(path, quiet = TRUE) + expect_true(inherits(mc_q, "Metacore")) + }) }) test_that("spec_to_metacore() quiet = TRUE returns invisibly", { - path_try <- try(metacore_example("p21_mock.xlsx"), silent = TRUE) - if (inherits(path_try, "try-error") || path_try == "") { - skip("p21_mock.xlsx example spec not available") - } - path <- path_try - - expect_invisible( - spec_to_metacore(path, quiet = TRUE) - ) + path_try <- try(metacore_example("p21_mock.xlsx"), silent = TRUE) + if (inherits(path_try, "try-error") || path_try == "") { + skip("p21_mock.xlsx example spec not available") + } + path <- path_try + + expect_invisible( + spec_to_metacore(path, quiet = TRUE) + ) }) test_that("spec_to_metacore() quiet = FALSE returns a Metacore object", { - path_try <- try(metacore_example("p21_mock.xlsx"), silent = TRUE) - if (inherits(path_try, "try-error") || path_try == "") { - skip("p21_mock.xlsx example spec not available") - } - path <- path_try - - # We don't assert on printed output here; just on the return type. - mc_n <- suppressWarnings(spec_to_metacore(path, quiet = FALSE)) - expect_true(inherits(mc_n, "Metacore")) + path_try <- try(metacore_example("p21_mock.xlsx"), silent = TRUE) + if (inherits(path_try, "try-error") || path_try == "") { + skip("p21_mock.xlsx example spec not available") + } + path <- path_try + + # We don't assert on printed output here; just on the return type. + mc_n <- suppressWarnings(spec_to_metacore(path, quiet = FALSE)) + expect_true(inherits(mc_n, "Metacore")) }) test_that("spec_to_metacore() returns structurally similar objects for quiet TRUE/FALSE", { - path_try <- try(metacore_example("p21_mock.xlsx"), silent = TRUE) - if (inherits(path_try, "try-error") || path_try == "") { - skip("p21_mock.xlsx example spec not available") - } - path <- path_try + path_try <- try(metacore_example("p21_mock.xlsx"), silent = TRUE) + if (inherits(path_try, "try-error") || path_try == "") { + skip("p21_mock.xlsx example spec not available") + } + path <- path_try - mc_q <- suppressWarnings(spec_to_metacore(path, quiet = TRUE)) - mc_n <- suppressWarnings(spec_to_metacore(path, quiet = FALSE)) + mc_q <- suppressWarnings(spec_to_metacore(path, quiet = TRUE)) + mc_n <- suppressWarnings(spec_to_metacore(path, quiet = FALSE)) - expect_true(inherits(mc_q, "Metacore")) - expect_true(inherits(mc_n, "Metacore")) + expect_true(inherits(mc_q, "Metacore")) + expect_true(inherits(mc_n, "Metacore")) - # Basic structural check: same component tables - expect_identical(names(mc_q$data), names(mc_n$data)) + # Basic structural check: same component tables + expect_identical(names(mc_q$data), names(mc_n$data)) }) test_that("select_dataset() is silent when quiet = TRUE", { - test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE) - subset <- test %>% select_dataset("DM", quiet = TRUE) - expect_silent({ - subset <- test %>% select_dataset("DM", quiet = TRUE) - }) - + test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE) + subset <- test %>% select_dataset("DM", quiet = TRUE) + expect_silent({ + subset <- test %>% select_dataset("DM", quiet = TRUE) + }) }) test_that("metacore() quiet = TRUE is silent and returns Metacore object", { - - # simplest small valid inputs - ds_spec <- tibble::tibble(dataset = "AE", structure = "OneRowPerRecord", label = "Adverse Events") - ds_vars <- tibble::tibble(dataset = "AE", variable = "AETERM", keep = TRUE, - key_seq = 1L, order = 1L, core = "Req", supp_flag = FALSE) - var_spec <- tibble::tibble(variable = "AETERM", label = "Reported Term", length = 200L, - type = "character", common = NA_character_, format = NA_character_) - value_spec <- tibble::tibble(dataset = "AE", variable = "AETERM", where = NA_character_, - type = "character", sig_dig = NA_integer_, - code_id = NA_character_, origin = "Collected", derivation_id = NA_integer_) - derivations <- tibble::tibble(derivation_id = integer(), derivation = character()) - codelist <- tibble::tibble(code_id = character(), name = character(), type = character(), codes = list()) - supp <- tibble::tibble(dataset = character(), variable = character(), idvar = character(), qeval = character()) - - expect_silent({ - mc_q <- metacore( - ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp, - quiet = TRUE - ) - expect_true(inherits(mc_q, "Metacore")) - }) + # simplest small valid inputs + ds_spec <- tibble::tibble(dataset = "AE", structure = "OneRowPerRecord", label = "Adverse Events") + ds_vars <- tibble::tibble( + dataset = "AE", variable = "AETERM", keep = TRUE, + key_seq = 1L, order = 1L, core = "Req", supp_flag = FALSE + ) + var_spec <- tibble::tibble( + variable = "AETERM", label = "Reported Term", length = 200L, + type = "character", common = NA_character_, format = NA_character_ + ) + value_spec <- tibble::tibble( + dataset = "AE", variable = "AETERM", where = NA_character_, + type = "character", sig_dig = NA_integer_, + code_id = NA_character_, origin = "Collected", derivation_id = NA_integer_ + ) + derivations <- tibble::tibble(derivation_id = integer(), derivation = character()) + codelist <- tibble::tibble(code_id = character(), name = character(), type = character(), codes = list()) + supp <- tibble::tibble(dataset = character(), variable = character(), idvar = character(), qeval = character()) + + expect_silent({ + mc_q <- metacore( + ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp, + quiet = TRUE + ) + expect_true(inherits(mc_q, "Metacore")) + }) }) test_that("metacore() quiet = TRUE returns invisibly", { - - ds_spec <- tibble::tibble(dataset = "AE", structure = "Row", label = "AE") - ds_vars <- tibble::tibble(dataset = "AE", variable = "AETERM", keep = TRUE, - key_seq = 1L, order = 1L, core = "Req", supp_flag = FALSE) - var_spec <- tibble::tibble(variable = "AETERM", label = "Term", length = 200L, - type = "character", common = NA_character_, format = NA_character_) - value_spec <- tibble::tibble(dataset = "AE", variable = "AETERM", where = NA_character_, - type = "character", sig_dig = NA_integer_, - code_id = NA_character_, origin = "Collected", derivation_id = NA_integer_) - - expect_invisible( - metacore( - ds_spec, ds_vars, var_spec, value_spec, - derivations = tibble::tibble(), - codelist = tibble::tibble(), - supp = tibble::tibble(), - quiet = TRUE - ) - ) + ds_spec <- tibble::tibble(dataset = "AE", structure = "Row", label = "AE") + ds_vars <- tibble::tibble( + dataset = "AE", variable = "AETERM", keep = TRUE, + key_seq = 1L, order = 1L, core = "Req", supp_flag = FALSE + ) + var_spec <- tibble::tibble( + variable = "AETERM", label = "Term", length = 200L, + type = "character", common = NA_character_, format = NA_character_ + ) + value_spec <- tibble::tibble( + dataset = "AE", variable = "AETERM", where = NA_character_, + type = "character", sig_dig = NA_integer_, + code_id = NA_character_, origin = "Collected", derivation_id = NA_integer_ + ) + + expect_invisible( + metacore( + ds_spec, ds_vars, var_spec, value_spec, + derivations = tibble::tibble(), + codelist = tibble::tibble(), + supp = tibble::tibble(), + quiet = TRUE + ) + ) }) test_that("metacore() quiet = FALSE returns a Metacore object", { - - ds_spec <- tibble::tibble(dataset = "AE", structure = "Row", label = "AE") - ds_vars <- tibble::tibble(dataset = "AE", variable = "AETERM", keep = TRUE, - key_seq = 1L, order = 1L, core = "Req", supp_flag = FALSE) - var_spec <- tibble::tibble(variable = "AETERM", label = "Term", length = 200L, - type = "character", common = NA_character_, format = NA_character_) - value_spec <- tibble::tibble(dataset = "AE", variable = "AETERM", where = NA_character_, - type = "character", sig_dig = NA_integer_, - code_id = NA_character_, origin = "Collected", derivation_id = NA_integer_) - - mc <- suppressWarnings( - metacore( - ds_spec, ds_vars, var_spec, value_spec, - derivations = tibble::tibble(), - codelist = tibble::tibble(), - supp = tibble::tibble(), - quiet = FALSE - ) - ) - - expect_true(inherits(mc, "Metacore")) + ds_spec <- tibble::tibble(dataset = "AE", structure = "Row", label = "AE") + ds_vars <- tibble::tibble( + dataset = "AE", variable = "AETERM", keep = TRUE, + key_seq = 1L, order = 1L, core = "Req", supp_flag = FALSE + ) + var_spec <- tibble::tibble( + variable = "AETERM", label = "Term", length = 200L, + type = "character", common = NA_character_, format = NA_character_ + ) + value_spec <- tibble::tibble( + dataset = "AE", variable = "AETERM", where = NA_character_, + type = "character", sig_dig = NA_integer_, + code_id = NA_character_, origin = "Collected", derivation_id = NA_integer_ + ) + + mc <- suppressWarnings( + metacore( + ds_spec, ds_vars, var_spec, value_spec, + derivations = tibble::tibble(), + codelist = tibble::tibble(), + supp = tibble::tibble(), + quiet = FALSE + ) + ) + + expect_true(inherits(mc, "Metacore")) }) test_that("metacore() quiet TRUE/FALSE paths produce similar structure", { - - ds_spec <- tibble::tibble(dataset = "AE", structure = "Row", label = "AE") - ds_vars <- tibble::tibble(dataset = "AE", variable = "AETERM", keep = TRUE, - key_seq = 1L, order = 1L, core = "Req", supp_flag = FALSE) - var_spec <- tibble::tibble(variable = "AETERM", label = "Term", length = 200L, - type = "character", common = NA_character_, format = NA_character_) - value_spec <- tibble::tibble(dataset = "AE", variable = "AETERM", where = NA_character_, - type = "character", sig_dig = NA_integer_, - code_id = NA_character_, origin = "Collected", derivation_id = NA_integer_) - - mc_q <- suppressWarnings( - metacore(ds_spec, ds_vars, var_spec, value_spec, - tibble::tibble(), tibble::tibble(), tibble::tibble(), - quiet = TRUE) - ) - - mc_n <- suppressWarnings( - metacore(ds_spec, ds_vars, var_spec, value_spec, - tibble::tibble(), tibble::tibble(), tibble::tibble(), - quiet = FALSE) - ) - - expect_identical(names(mc_q$data), names(mc_n$data)) + ds_spec <- tibble::tibble(dataset = "AE", structure = "Row", label = "AE") + ds_vars <- tibble::tibble( + dataset = "AE", variable = "AETERM", keep = TRUE, + key_seq = 1L, order = 1L, core = "Req", supp_flag = FALSE + ) + var_spec <- tibble::tibble( + variable = "AETERM", label = "Term", length = 200L, + type = "character", common = NA_character_, format = NA_character_ + ) + value_spec <- tibble::tibble( + dataset = "AE", variable = "AETERM", where = NA_character_, + type = "character", sig_dig = NA_integer_, + code_id = NA_character_, origin = "Collected", derivation_id = NA_integer_ + ) + + mc_q <- suppressWarnings( + metacore(ds_spec, ds_vars, var_spec, value_spec, + tibble::tibble(), tibble::tibble(), tibble::tibble(), + quiet = TRUE + ) + ) + + mc_n <- suppressWarnings( + metacore(ds_spec, ds_vars, var_spec, value_spec, + tibble::tibble(), tibble::tibble(), tibble::tibble(), + quiet = FALSE + ) + ) + + expect_identical(names(mc_q$data), names(mc_n$data)) }) diff --git a/tests/testthat/test-reader.R b/tests/testthat/test-reader.R index bd06518..965ee84 100644 --- a/tests/testthat/test-reader.R +++ b/tests/testthat/test-reader.R @@ -7,567 +7,582 @@ spec <- read_all_sheets(metacore_example("p21_mock.xlsx")) #### Fist checking some reading in test_that("Check spec_type", { - expect_equal(spec_type(metacore_example("p21_mock.xlsx")), "by_type") - expect_equal(spec_type(metacore_example("mock_spec.xlsx")), "by_type") - # Check it errors when format is not acceptable - expect_error(spec_type("example_spec.xlsx")) + expect_equal(spec_type(metacore_example("p21_mock.xlsx")), "by_type") + expect_equal(spec_type(metacore_example("mock_spec.xlsx")), "by_type") + # Check it errors when format is not acceptable + expect_error(spec_type("example_spec.xlsx")) }) #### Check reads are consistent between formats test_that("Test ds_spec readers", { - # Create a reference spec to match to - ref_ds_spec <- - tribble(~dataset, ~structure, ~label, - "DM" ,"One record per subject", "Demographics", - "EX" ,"One record per constant dosing interval per subject", "Exposure", - "AE" ,"One record per adverse event per subject", "Adverse Events", - "SUPPAE" ,"One record per IDVAR, IDVARVAL, and QNAM value per subject", "Supplemental Qualifiers for AE", - "SUPPDM" ,"One record per IDVAR, IDVARVAL, and QNAM value per subject", "Supplemental Qualifiers for DM") - # Read from define - def_ds_spec <- xml_to_ds_spec(define) - - # Read from spec - spec_ds_spec <- spec_type_to_ds_spec(spec) - spec_ds_spec2 <- spec_type_to_ds_spec(spec, sheet = "D") - - # Test against reference - expect_equal(def_ds_spec, ref_ds_spec) - expect_equal(spec_ds_spec, arrange(ref_ds_spec, dataset)) - expect_equal(spec_ds_spec2, arrange(ref_ds_spec, dataset)) + # Create a reference spec to match to + ref_ds_spec <- + tribble( + ~dataset, ~structure, ~label, + "DM", "One record per subject", "Demographics", + "EX", "One record per constant dosing interval per subject", "Exposure", + "AE", "One record per adverse event per subject", "Adverse Events", + "SUPPAE", "One record per IDVAR, IDVARVAL, and QNAM value per subject", "Supplemental Qualifiers for AE", + "SUPPDM", "One record per IDVAR, IDVARVAL, and QNAM value per subject", "Supplemental Qualifiers for DM" + ) + # Read from define + def_ds_spec <- xml_to_ds_spec(define) + + # Read from spec + spec_ds_spec <- spec_type_to_ds_spec(spec) + spec_ds_spec2 <- spec_type_to_ds_spec(spec, sheet = "D") + + # Test against reference + expect_equal(def_ds_spec, ref_ds_spec) + expect_equal(spec_ds_spec, arrange(ref_ds_spec, dataset)) + expect_equal(spec_ds_spec2, arrange(ref_ds_spec, dataset)) }) test_that("Test ds_vars readers", { - # Create a reference ds_vars - ref_ds_vars <- tibble::tribble( - ~dataset, ~variable, ~key_seq, ~order, ~mandatory, ~core, ~supp_flag, - "AE", "AEACN", NA, 21L, FALSE, NA_character_, NA, - "AE", "AEBDSYCD", NA, 16L, FALSE, NA_character_, NA, - "AE", "AEBODSYS", NA, 15L, FALSE, NA_character_, NA, - "AE", "AEDECOD", NA, 9L, FALSE, NA_character_, NA, - "AE", "AEDTC", NA, 32L, FALSE, NA_character_, NA, - "AE", "AEDY", NA, 35L, FALSE, NA_character_, NA, - "AE", "AEENDTC", NA, 34L, FALSE, NA_character_, NA, - "AE", "AEENDY", NA, 37L, FALSE, NA_character_, NA, - "AE", "AEHLGT", NA, 13L, FALSE, NA_character_, NA, - "AE", "AEHLGTCD", NA, 14L, FALSE, NA_character_, NA, - "AE", "AEHLT", NA, 11L, FALSE, NA_character_, NA, - "AE", "AEHLTCD", NA, 12L, FALSE, NA_character_, NA, - "AE", "AELLT", NA, 7L, FALSE, NA_character_, NA, - "AE", "AELLTCD", NA, 8L, FALSE, NA_character_, NA, - "AE", "AEOUT", NA, 23L, FALSE, NA_character_, NA, - "AE", "AEPTCD", NA, 10L, FALSE, NA_character_, NA, - "AE", "AEREL", NA, 22L, FALSE, NA_character_, NA, - "AE", "AESCAN", NA, 24L, FALSE, NA_character_, NA, - "AE", "AESCONG", NA, 25L, FALSE, NA_character_, NA, - "AE", "AESDISAB", NA, 26L, FALSE, NA_character_, NA, - "AE", "AESDTH", NA, 27L, FALSE, NA_character_, NA, - "AE", "AESEQ", 5L, 4L, TRUE, NA_character_, NA, - "AE", "AESER", NA, 20L, FALSE, NA_character_, NA, - "AE", "AESEV", NA, 19L, FALSE, NA_character_, NA, - "AE", "AESHOSP", NA, 28L, FALSE, NA_character_, NA, - "AE", "AESLIFE", NA, 29L, FALSE, NA_character_, NA, - "AE", "AESOC", NA, 17L, FALSE, NA_character_, NA, - "AE", "AESOCCD", NA, 18L, FALSE, NA_character_, NA, - "AE", "AESOD", NA, 30L, FALSE, NA_character_, NA, - "AE", "AESPID", NA, 5L, FALSE, NA_character_, NA, - "AE", "AESTDTC", 4L, 33L, FALSE, NA_character_, NA, - "AE", "AESTDY", NA, 36L, FALSE, NA_character_, NA, - "AE", "AETERM", 3L, 6L, TRUE, NA_character_, NA, - "AE", "DOMAIN", NA, 2L, TRUE, NA_character_, NA, - "AE", "EPOCH", NA, 31L, FALSE, NA_character_, NA, - "AE", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, - "AE", "USUBJID", 2L, 3L, TRUE, NA_character_, NA, - "DM", "ACTARM", NA, 22L, TRUE, NA_character_, NA, - "DM", "ACTARMCD", NA, 21L, TRUE, NA_character_, NA, - "DM", "AGE", NA, 14L, FALSE, NA_character_, NA, - "DM", "AGEU", NA, 15L, FALSE, NA_character_, NA, - "DM", "ARM", NA, 20L, TRUE, NA_character_, NA, - "DM", "ARMCD", NA, 19L, TRUE, NA_character_, NA, - "DM", "COUNTRY", NA, 23L, TRUE, NA_character_, NA, - "DM", "DMDTC", NA, 24L, FALSE, NA_character_, NA, - "DM", "DMDY", NA, 25L, FALSE, NA_character_, NA, - "DM", "DOMAIN", NA, 2L, TRUE, NA_character_, NA, - "DM", "DTHDTC", NA, 11L, FALSE, NA_character_, NA, - "DM", "DTHFL", NA, 12L, FALSE, NA_character_, NA, - "DM", "ETHNIC", NA, 18L, FALSE, NA_character_, NA, - "DM", "RACE", NA, 17L, FALSE, NA_character_, NA, - "DM", "RFENDTC", NA, 6L, FALSE, NA_character_, NA, - "DM", "RFICDTC", NA, 9L, FALSE, NA_character_, NA, - "DM", "RFPENDTC", NA, 10L, FALSE, NA_character_, NA, - "DM", "RFSTDTC", NA, 5L, FALSE, NA_character_, NA, - "DM", "RFXENDTC", NA, 8L, FALSE, NA_character_, NA, - "DM", "RFXSTDTC", NA, 7L, FALSE, NA_character_, NA, - "DM", "SEX", NA, 16L, TRUE, NA_character_, NA, - "DM", "SITEID", NA, 13L, TRUE, NA_character_, NA, - "DM", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, - "DM", "SUBJID", NA, 4L, TRUE, NA_character_, NA, - "DM", "USUBJID", 2L, 3L, TRUE, NA_character_, NA, - "EX", "DOMAIN", NA, 2L, TRUE, NA_character_, NA, - "EX", "EPOCH", NA, 14L, FALSE, NA_character_, NA, - "EX", "EXDOSE", NA, 6L, FALSE, NA_character_, NA, - "EX", "EXDOSFRM", NA, 8L, FALSE, NA_character_, NA, - "EX", "EXDOSFRQ", NA, 9L, FALSE, NA_character_, NA, - "EX", "EXDOSU", NA, 7L, FALSE, NA_character_, NA, - "EX", "EXENDTC", NA, 16L, FALSE, NA_character_, NA, - "EX", "EXENDY", NA, 18L, FALSE, NA_character_, NA, - "EX", "EXROUTE", NA, 10L, FALSE, NA_character_, NA, - "EX", "EXSEQ", NA, 4L, TRUE, NA_character_, NA, - "EX", "EXSTDTC", 4L, 15L, FALSE, NA_character_, NA, - "EX", "EXSTDY", NA, 17L, FALSE, NA_character_, NA, - "EX", "EXTRT", 3L, 5L, TRUE, NA_character_, NA, - "EX", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, - "EX", "USUBJID", 2L, 3L, TRUE, NA_character_, NA, - "EX", "VISIT", NA, 12L, FALSE, NA_character_, NA, - "EX", "VISITDY", NA, 13L, FALSE, NA_character_, NA, - "EX", "VISITNUM", NA, 11L, FALSE, NA_character_, NA, - "SUPPAE", "IDVAR", 4L, 4L, FALSE, NA_character_, NA, - "SUPPAE", "IDVARVAL", 5L, 5L, FALSE, NA_character_, NA, - "SUPPAE", "QEVAL", NA, 10L, FALSE, NA_character_, NA, - "SUPPAE", "QLABEL", NA, 7L, TRUE, NA_character_, NA, - "SUPPAE", "QNAM", 6L, 6L, TRUE, NA_character_, NA, - "SUPPAE", "QORIG", NA, 9L, TRUE, NA_character_, NA, - "SUPPAE", "QVAL", NA, 8L, TRUE, NA_character_, NA, - "SUPPAE", "RDOMAIN", 2L, 2L, TRUE, NA_character_, NA, - "SUPPAE", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, - "SUPPAE", "USUBJID", 3L, 3L, TRUE, NA_character_, NA, - "SUPPDM", "IDVAR", 4L, 4L, FALSE, NA_character_, NA, - "SUPPDM", "IDVARVAL", 5L, 5L, FALSE, NA_character_, NA, - "SUPPDM", "QEVAL", NA, 10L, FALSE, NA_character_, NA, - "SUPPDM", "QLABEL", NA, 7L, TRUE, NA_character_, NA, - "SUPPDM", "QNAM", 6L, 6L, TRUE, NA_character_, NA, - "SUPPDM", "QORIG", NA, 9L, TRUE, NA_character_, NA, - "SUPPDM", "QVAL", NA, 8L, TRUE, NA_character_, NA, - "SUPPDM", "RDOMAIN", 2L, 2L, TRUE, NA_character_, NA, - "SUPPDM", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, - "SUPPDM", "USUBJID", 3L, 3L, TRUE, NA_character_, NA, - ) - - # Read from define - def_ds_vars <- xml_to_ds_vars(define) %>% - arrange(dataset, variable) - - # Read from spec - spec_ds_vars <- spec_type_to_ds_vars(spec) %>% - arrange(dataset, variable) %>% - select(dataset, variable, key_seq, order, mandatory, core, supp_flag) - - - # Tests - expect_equal(def_ds_vars, ref_ds_vars) - expect_equal(spec_ds_vars, ref_ds_vars) - + # Create a reference ds_vars + ref_ds_vars <- tibble::tribble( + ~dataset, ~variable, ~key_seq, ~order, ~mandatory, ~core, ~supp_flag, + "AE", "AEACN", NA, 21L, FALSE, NA_character_, NA, + "AE", "AEBDSYCD", NA, 16L, FALSE, NA_character_, NA, + "AE", "AEBODSYS", NA, 15L, FALSE, NA_character_, NA, + "AE", "AEDECOD", NA, 9L, FALSE, NA_character_, NA, + "AE", "AEDTC", NA, 32L, FALSE, NA_character_, NA, + "AE", "AEDY", NA, 35L, FALSE, NA_character_, NA, + "AE", "AEENDTC", NA, 34L, FALSE, NA_character_, NA, + "AE", "AEENDY", NA, 37L, FALSE, NA_character_, NA, + "AE", "AEHLGT", NA, 13L, FALSE, NA_character_, NA, + "AE", "AEHLGTCD", NA, 14L, FALSE, NA_character_, NA, + "AE", "AEHLT", NA, 11L, FALSE, NA_character_, NA, + "AE", "AEHLTCD", NA, 12L, FALSE, NA_character_, NA, + "AE", "AELLT", NA, 7L, FALSE, NA_character_, NA, + "AE", "AELLTCD", NA, 8L, FALSE, NA_character_, NA, + "AE", "AEOUT", NA, 23L, FALSE, NA_character_, NA, + "AE", "AEPTCD", NA, 10L, FALSE, NA_character_, NA, + "AE", "AEREL", NA, 22L, FALSE, NA_character_, NA, + "AE", "AESCAN", NA, 24L, FALSE, NA_character_, NA, + "AE", "AESCONG", NA, 25L, FALSE, NA_character_, NA, + "AE", "AESDISAB", NA, 26L, FALSE, NA_character_, NA, + "AE", "AESDTH", NA, 27L, FALSE, NA_character_, NA, + "AE", "AESEQ", 5L, 4L, TRUE, NA_character_, NA, + "AE", "AESER", NA, 20L, FALSE, NA_character_, NA, + "AE", "AESEV", NA, 19L, FALSE, NA_character_, NA, + "AE", "AESHOSP", NA, 28L, FALSE, NA_character_, NA, + "AE", "AESLIFE", NA, 29L, FALSE, NA_character_, NA, + "AE", "AESOC", NA, 17L, FALSE, NA_character_, NA, + "AE", "AESOCCD", NA, 18L, FALSE, NA_character_, NA, + "AE", "AESOD", NA, 30L, FALSE, NA_character_, NA, + "AE", "AESPID", NA, 5L, FALSE, NA_character_, NA, + "AE", "AESTDTC", 4L, 33L, FALSE, NA_character_, NA, + "AE", "AESTDY", NA, 36L, FALSE, NA_character_, NA, + "AE", "AETERM", 3L, 6L, TRUE, NA_character_, NA, + "AE", "DOMAIN", NA, 2L, TRUE, NA_character_, NA, + "AE", "EPOCH", NA, 31L, FALSE, NA_character_, NA, + "AE", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, + "AE", "USUBJID", 2L, 3L, TRUE, NA_character_, NA, + "DM", "ACTARM", NA, 22L, TRUE, NA_character_, NA, + "DM", "ACTARMCD", NA, 21L, TRUE, NA_character_, NA, + "DM", "AGE", NA, 14L, FALSE, NA_character_, NA, + "DM", "AGEU", NA, 15L, FALSE, NA_character_, NA, + "DM", "ARM", NA, 20L, TRUE, NA_character_, NA, + "DM", "ARMCD", NA, 19L, TRUE, NA_character_, NA, + "DM", "COUNTRY", NA, 23L, TRUE, NA_character_, NA, + "DM", "DMDTC", NA, 24L, FALSE, NA_character_, NA, + "DM", "DMDY", NA, 25L, FALSE, NA_character_, NA, + "DM", "DOMAIN", NA, 2L, TRUE, NA_character_, NA, + "DM", "DTHDTC", NA, 11L, FALSE, NA_character_, NA, + "DM", "DTHFL", NA, 12L, FALSE, NA_character_, NA, + "DM", "ETHNIC", NA, 18L, FALSE, NA_character_, NA, + "DM", "RACE", NA, 17L, FALSE, NA_character_, NA, + "DM", "RFENDTC", NA, 6L, FALSE, NA_character_, NA, + "DM", "RFICDTC", NA, 9L, FALSE, NA_character_, NA, + "DM", "RFPENDTC", NA, 10L, FALSE, NA_character_, NA, + "DM", "RFSTDTC", NA, 5L, FALSE, NA_character_, NA, + "DM", "RFXENDTC", NA, 8L, FALSE, NA_character_, NA, + "DM", "RFXSTDTC", NA, 7L, FALSE, NA_character_, NA, + "DM", "SEX", NA, 16L, TRUE, NA_character_, NA, + "DM", "SITEID", NA, 13L, TRUE, NA_character_, NA, + "DM", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, + "DM", "SUBJID", NA, 4L, TRUE, NA_character_, NA, + "DM", "USUBJID", 2L, 3L, TRUE, NA_character_, NA, + "EX", "DOMAIN", NA, 2L, TRUE, NA_character_, NA, + "EX", "EPOCH", NA, 14L, FALSE, NA_character_, NA, + "EX", "EXDOSE", NA, 6L, FALSE, NA_character_, NA, + "EX", "EXDOSFRM", NA, 8L, FALSE, NA_character_, NA, + "EX", "EXDOSFRQ", NA, 9L, FALSE, NA_character_, NA, + "EX", "EXDOSU", NA, 7L, FALSE, NA_character_, NA, + "EX", "EXENDTC", NA, 16L, FALSE, NA_character_, NA, + "EX", "EXENDY", NA, 18L, FALSE, NA_character_, NA, + "EX", "EXROUTE", NA, 10L, FALSE, NA_character_, NA, + "EX", "EXSEQ", NA, 4L, TRUE, NA_character_, NA, + "EX", "EXSTDTC", 4L, 15L, FALSE, NA_character_, NA, + "EX", "EXSTDY", NA, 17L, FALSE, NA_character_, NA, + "EX", "EXTRT", 3L, 5L, TRUE, NA_character_, NA, + "EX", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, + "EX", "USUBJID", 2L, 3L, TRUE, NA_character_, NA, + "EX", "VISIT", NA, 12L, FALSE, NA_character_, NA, + "EX", "VISITDY", NA, 13L, FALSE, NA_character_, NA, + "EX", "VISITNUM", NA, 11L, FALSE, NA_character_, NA, + "SUPPAE", "IDVAR", 4L, 4L, FALSE, NA_character_, NA, + "SUPPAE", "IDVARVAL", 5L, 5L, FALSE, NA_character_, NA, + "SUPPAE", "QEVAL", NA, 10L, FALSE, NA_character_, NA, + "SUPPAE", "QLABEL", NA, 7L, TRUE, NA_character_, NA, + "SUPPAE", "QNAM", 6L, 6L, TRUE, NA_character_, NA, + "SUPPAE", "QORIG", NA, 9L, TRUE, NA_character_, NA, + "SUPPAE", "QVAL", NA, 8L, TRUE, NA_character_, NA, + "SUPPAE", "RDOMAIN", 2L, 2L, TRUE, NA_character_, NA, + "SUPPAE", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, + "SUPPAE", "USUBJID", 3L, 3L, TRUE, NA_character_, NA, + "SUPPDM", "IDVAR", 4L, 4L, FALSE, NA_character_, NA, + "SUPPDM", "IDVARVAL", 5L, 5L, FALSE, NA_character_, NA, + "SUPPDM", "QEVAL", NA, 10L, FALSE, NA_character_, NA, + "SUPPDM", "QLABEL", NA, 7L, TRUE, NA_character_, NA, + "SUPPDM", "QNAM", 6L, 6L, TRUE, NA_character_, NA, + "SUPPDM", "QORIG", NA, 9L, TRUE, NA_character_, NA, + "SUPPDM", "QVAL", NA, 8L, TRUE, NA_character_, NA, + "SUPPDM", "RDOMAIN", 2L, 2L, TRUE, NA_character_, NA, + "SUPPDM", "STUDYID", 1L, 1L, TRUE, NA_character_, NA, + "SUPPDM", "USUBJID", 3L, 3L, TRUE, NA_character_, NA, + ) + + # Read from define + def_ds_vars <- xml_to_ds_vars(define) %>% + arrange(dataset, variable) + + # Read from spec + spec_ds_vars <- spec_type_to_ds_vars(spec) %>% + arrange(dataset, variable) %>% + select(dataset, variable, key_seq, order, mandatory, core, supp_flag) + + + # Tests + expect_equal(def_ds_vars, ref_ds_vars) + expect_equal(spec_ds_vars, ref_ds_vars) }) test_that("Test var_spec readers", { - ref_var_spec <- - tibble::tribble( - ~variable, ~type, ~length, ~label, ~format, ~common, - "ACTARM", "text", 20L, "Description of Actual Arm", NA, NA, - "ACTARMCD", "text", 8L, "Actual Arm Code", NA, NA, - "AEACN", "text", 30L, "Action Taken with Study Treatment", NA, NA, - "AEBDSYCD", "integer", 8L, "Body System or Organ Class Code", NA, NA, - "AEBODSYS", "text", 67L, "Body System or Organ Class", NA, NA, - "AEDECOD", "text", 200L, "Dictionary-Derived Term", NA, NA, - "AEDTC", "date", 10L, "Date/Time of Collection", NA, NA, - "AEDY", "integer", 8L, "Study Day of Visit/Collection/Exam", NA, NA, - "AEENDTC", "date", 10L, "End Date/Time of Adverse Event", NA, NA, - "AEENDY", "integer", 8L, "Study Day of End of Adverse Event", NA, NA, - "AEHLGT", "text", 100L, "High Level Group Term", NA, NA, - "AEHLGTCD", "integer", 8L, "High Level Group Term Code", NA, NA, - "AEHLT", "text", 100L, "High Level Term", NA, NA, - "AEHLTCD", "integer", 8L, "High Level Term Code", NA, NA, - "AELLT", "text", 100L, "Lowest Level Term", NA, NA, - "AELLTCD", "integer", 8L, "Lowest Level Term Code", NA, NA, - "AEOUT", "text", 200L, "Outcome of Adverse Event", NA, NA, - "AEPTCD", "integer", 8L, "Preferred Term Code", NA, NA, - "AEREL", "text", 8L, "Causality", NA, NA, - "AESCAN", "text", 1L, "Involves Cancer", NA, NA, - "AESCONG", "text", 1L, "Congenital Anomaly or Birth Defect", NA, NA, - "AESDISAB", "text", 1L, "Persist or Signif Disability/Incapacity", NA, NA, - "AESDTH", "text", 1L, "Results in Death", NA, NA, - "AESEQ", "integer", 8L, "Sequence Number", NA, NA, - "AESER", "text", 1L, "Serious Event", NA, NA, - "AESEV", "text", 8L, "Severity/Intensity", NA, NA, - "AESHOSP", "text", 1L, "Requires or Prolongs Hospitalization", NA, NA, - "AESLIFE", "text", 1L, "Is Life Threatening", NA, NA, - "AESOC", "text", 100L, "Primary System Organ Class", NA, NA, - "AESOCCD", "integer", 8L, "Primary System Organ Class Code", NA, NA, - "AESOD", "text", 1L, "Occurred with Overdose", NA, NA, - "AESPID", "text", 3L, "Sponsor-Defined Identifier", NA, NA, - "AESTDTC", "date", 10L, "Start Date/Time of Adverse Event", NA, NA, - "AESTDY", "integer", 8L, "Study Day of Start of Adverse Event", NA, NA, - "AETERM", "text", 200L, "Reported Term for the Adverse Event", NA, NA, - "AGE", "integer", 8L, "Age", NA, NA, - "AGEU", "text", 6L, "Age Units", NA, NA, - "ARM", "text", 20L, "Description of Planned Arm", NA, NA, - "ARMCD", "text", 8L, "Planned Arm Code", NA, NA, - "COUNTRY", "text", 3L, "Country", NA, NA, - "DMDTC", "date", 10L, "Date/Time of Collection", NA, NA, - "DMDY", "integer", 8L, "Study Day of Collection", NA, NA, - "DOMAIN", "text", 2L, "Domain Abbreviation", NA, NA, - "DTHDTC", "datetime", 20L, "Date/Time of Death", NA, NA, - "DTHFL", "text", 1L, "Subject Death Flag", NA, NA, - "EPOCH", "text", 9L, "Epoch", NA, NA, - "ETHNIC", "text", 25L, "Ethnicity", NA, NA, - "EXDOSE", "integer", 8L, "Dose", NA, NA, - "EXDOSFRM", "text", 5L, "Dose Form", NA, NA, - "EXDOSFRQ", "text", 2L, "Dosing Frequency per Interval", NA, NA, - "EXDOSU", "text", 2L, "Dose Units", NA, NA, - "EXENDTC", "date", 10L, "End Date/Time of Treatment", NA, NA, - "EXENDY", "integer", 8L, "Study Day of End of Treatment", NA, NA, - "EXROUTE", "text", 11L, "Route of Administration", NA, NA, - "EXSEQ", "integer", 8L, "Sequence Number", NA, NA, - "EXSTDTC", "date", 10L, "Start Date/Time of Treatment", NA, NA, - "EXSTDY", "integer", 8L, "Study Day of Start of Treatment", NA, NA, - "EXTRT", "text", 10L, "Name of Treatment", NA, NA, - "IDVAR", "text", 8L, "Identifying Variable", NA, NA, - "IDVARVAL", "text", 200L, "Identifying Variable Value", NA, NA, - "QEVAL", "text", 200L, "Evaluator", NA, NA, - "QLABEL", "text", 40L, "Qualifier Variable Label", NA, NA, - "QNAM", "text", 8L, "Qualifier Variable Name", NA, NA, - "QORIG", "text", 200L, "Origin", NA, NA, - "QVAL", "text", 200L, "Data Value", NA, NA, - "RACE", "text", 78L, "Race", NA, NA, - "RDOMAIN", "text", 2L, "Related Domain Abbreviation", NA, NA, - "RFENDTC", "date", 10L, "Subject Reference End Date/Time", NA, NA, - "RFICDTC", "datetime", 20L, "Date/Time of Informed Consent", NA, NA, - "RFPENDTC", "datetime", 20L, "Date/Time of End of Participation", NA, NA, - "RFSTDTC", "date", 10L, "Subject Reference Start Date/Time", NA, NA, - "RFXENDTC", "datetime", 20L, "Date/Time of Last Study Treatment", NA, NA, - "RFXSTDTC", "datetime", 20L, "Date/Time of First Study Treatment", NA, NA, - "SEX", "text", 1L, "Sex", NA, NA, - "SITEID", "text", 3L, "Study Site Identifier", NA, NA, - "STUDYID", "text", 12L, "Study Identifier", NA, NA, - "SUBJID", "text", 4L, "Subject Identifier for the Study", NA, NA, - "USUBJID", "text", 11L, "Unique Subject Identifier", NA, NA, - "VISIT", "text", 19L, "Visit Name", NA, NA, - "VISITDY", "integer", 8L, "Planned Study Day of Visit", NA, NA, - "VISITNUM", "float", 8L, "Visit Number", "8.1", NA - ) - - # Read from define - def_var_spec <- xml_to_var_spec(define) %>% - arrange(variable) - - # Read from spec - spec_var_spec <- spec_type_to_var_spec(spec) %>% - arrange(variable) %>% - select(variable, type, length, label, format) - - spec2 <- spec - spec2$Variables |> - select(-Dataset) - no_ds <- spec_type_to_var_spec(spec2) |> - arrange(variable) %>% - select(variable, type, length, label, format) - expect_equal(no_ds, spec_var_spec) - - # Tests - expect_equal(def_var_spec, ref_var_spec) - # remove common as it is derived when reading in specs but left alone from defines - expect_equal(spec_var_spec, - ref_var_spec %>% - select(-common)) - + ref_var_spec <- + tibble::tribble( + ~variable, ~type, ~length, ~label, ~format, ~common, + "ACTARM", "text", 20L, "Description of Actual Arm", NA, NA, + "ACTARMCD", "text", 8L, "Actual Arm Code", NA, NA, + "AEACN", "text", 30L, "Action Taken with Study Treatment", NA, NA, + "AEBDSYCD", "integer", 8L, "Body System or Organ Class Code", NA, NA, + "AEBODSYS", "text", 67L, "Body System or Organ Class", NA, NA, + "AEDECOD", "text", 200L, "Dictionary-Derived Term", NA, NA, + "AEDTC", "date", 10L, "Date/Time of Collection", NA, NA, + "AEDY", "integer", 8L, "Study Day of Visit/Collection/Exam", NA, NA, + "AEENDTC", "date", 10L, "End Date/Time of Adverse Event", NA, NA, + "AEENDY", "integer", 8L, "Study Day of End of Adverse Event", NA, NA, + "AEHLGT", "text", 100L, "High Level Group Term", NA, NA, + "AEHLGTCD", "integer", 8L, "High Level Group Term Code", NA, NA, + "AEHLT", "text", 100L, "High Level Term", NA, NA, + "AEHLTCD", "integer", 8L, "High Level Term Code", NA, NA, + "AELLT", "text", 100L, "Lowest Level Term", NA, NA, + "AELLTCD", "integer", 8L, "Lowest Level Term Code", NA, NA, + "AEOUT", "text", 200L, "Outcome of Adverse Event", NA, NA, + "AEPTCD", "integer", 8L, "Preferred Term Code", NA, NA, + "AEREL", "text", 8L, "Causality", NA, NA, + "AESCAN", "text", 1L, "Involves Cancer", NA, NA, + "AESCONG", "text", 1L, "Congenital Anomaly or Birth Defect", NA, NA, + "AESDISAB", "text", 1L, "Persist or Signif Disability/Incapacity", NA, NA, + "AESDTH", "text", 1L, "Results in Death", NA, NA, + "AESEQ", "integer", 8L, "Sequence Number", NA, NA, + "AESER", "text", 1L, "Serious Event", NA, NA, + "AESEV", "text", 8L, "Severity/Intensity", NA, NA, + "AESHOSP", "text", 1L, "Requires or Prolongs Hospitalization", NA, NA, + "AESLIFE", "text", 1L, "Is Life Threatening", NA, NA, + "AESOC", "text", 100L, "Primary System Organ Class", NA, NA, + "AESOCCD", "integer", 8L, "Primary System Organ Class Code", NA, NA, + "AESOD", "text", 1L, "Occurred with Overdose", NA, NA, + "AESPID", "text", 3L, "Sponsor-Defined Identifier", NA, NA, + "AESTDTC", "date", 10L, "Start Date/Time of Adverse Event", NA, NA, + "AESTDY", "integer", 8L, "Study Day of Start of Adverse Event", NA, NA, + "AETERM", "text", 200L, "Reported Term for the Adverse Event", NA, NA, + "AGE", "integer", 8L, "Age", NA, NA, + "AGEU", "text", 6L, "Age Units", NA, NA, + "ARM", "text", 20L, "Description of Planned Arm", NA, NA, + "ARMCD", "text", 8L, "Planned Arm Code", NA, NA, + "COUNTRY", "text", 3L, "Country", NA, NA, + "DMDTC", "date", 10L, "Date/Time of Collection", NA, NA, + "DMDY", "integer", 8L, "Study Day of Collection", NA, NA, + "DOMAIN", "text", 2L, "Domain Abbreviation", NA, NA, + "DTHDTC", "datetime", 20L, "Date/Time of Death", NA, NA, + "DTHFL", "text", 1L, "Subject Death Flag", NA, NA, + "EPOCH", "text", 9L, "Epoch", NA, NA, + "ETHNIC", "text", 25L, "Ethnicity", NA, NA, + "EXDOSE", "integer", 8L, "Dose", NA, NA, + "EXDOSFRM", "text", 5L, "Dose Form", NA, NA, + "EXDOSFRQ", "text", 2L, "Dosing Frequency per Interval", NA, NA, + "EXDOSU", "text", 2L, "Dose Units", NA, NA, + "EXENDTC", "date", 10L, "End Date/Time of Treatment", NA, NA, + "EXENDY", "integer", 8L, "Study Day of End of Treatment", NA, NA, + "EXROUTE", "text", 11L, "Route of Administration", NA, NA, + "EXSEQ", "integer", 8L, "Sequence Number", NA, NA, + "EXSTDTC", "date", 10L, "Start Date/Time of Treatment", NA, NA, + "EXSTDY", "integer", 8L, "Study Day of Start of Treatment", NA, NA, + "EXTRT", "text", 10L, "Name of Treatment", NA, NA, + "IDVAR", "text", 8L, "Identifying Variable", NA, NA, + "IDVARVAL", "text", 200L, "Identifying Variable Value", NA, NA, + "QEVAL", "text", 200L, "Evaluator", NA, NA, + "QLABEL", "text", 40L, "Qualifier Variable Label", NA, NA, + "QNAM", "text", 8L, "Qualifier Variable Name", NA, NA, + "QORIG", "text", 200L, "Origin", NA, NA, + "QVAL", "text", 200L, "Data Value", NA, NA, + "RACE", "text", 78L, "Race", NA, NA, + "RDOMAIN", "text", 2L, "Related Domain Abbreviation", NA, NA, + "RFENDTC", "date", 10L, "Subject Reference End Date/Time", NA, NA, + "RFICDTC", "datetime", 20L, "Date/Time of Informed Consent", NA, NA, + "RFPENDTC", "datetime", 20L, "Date/Time of End of Participation", NA, NA, + "RFSTDTC", "date", 10L, "Subject Reference Start Date/Time", NA, NA, + "RFXENDTC", "datetime", 20L, "Date/Time of Last Study Treatment", NA, NA, + "RFXSTDTC", "datetime", 20L, "Date/Time of First Study Treatment", NA, NA, + "SEX", "text", 1L, "Sex", NA, NA, + "SITEID", "text", 3L, "Study Site Identifier", NA, NA, + "STUDYID", "text", 12L, "Study Identifier", NA, NA, + "SUBJID", "text", 4L, "Subject Identifier for the Study", NA, NA, + "USUBJID", "text", 11L, "Unique Subject Identifier", NA, NA, + "VISIT", "text", 19L, "Visit Name", NA, NA, + "VISITDY", "integer", 8L, "Planned Study Day of Visit", NA, NA, + "VISITNUM", "float", 8L, "Visit Number", "8.1", NA + ) + + # Read from define + def_var_spec <- xml_to_var_spec(define) %>% + arrange(variable) + + # Read from spec + spec_var_spec <- spec_type_to_var_spec(spec) %>% + arrange(variable) %>% + select(variable, type, length, label, format) + + spec2 <- spec + spec2$Variables |> + select(-Dataset) + no_ds <- spec_type_to_var_spec(spec2) |> + arrange(variable) %>% + select(variable, type, length, label, format) + expect_equal(no_ds, spec_var_spec) + + # Tests + expect_equal(def_var_spec, ref_var_spec) + # remove common as it is derived when reading in specs but left alone from defines + expect_equal( + spec_var_spec, + ref_var_spec %>% + select(-common) + ) }) test_that("values_spec reader tests", { - ref_value_spec <- tibble::tribble( - ~dataset, ~variable, ~type, ~origin, ~code_id, ~sig_dig, ~where, ~derivation_id, - "AE", "AEACN", "text", "Derived", NA, NA, NA, "MT.AE.AEACN", - "AE", "AEBDSYCD", "integer", "Assigned", NA, NA, NA, NA, - "AE", "AEBODSYS", "text", "Assigned", "CL.AEDICT", NA, NA, NA, - "AE", "AEDECOD", "text", "Assigned", "CL.AEDICT", NA, NA, NA, - "AE", "AEDTC", "date", "Derived", NA, NA, NA, "MT.AE.AEDTC", - "AE", "AEDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "AE", "AEENDTC", "date", "CRF", NA, NA, NA, NA, - "AE", "AEENDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "AE", "AEHLGT", "text", "Assigned", "CL.AEDICT", NA, NA, NA, - "AE", "AEHLGTCD", "integer", "Assigned", NA, NA, NA, NA, - "AE", "AEHLT", "text", "Assigned", "CL.AEDICT", NA, NA, NA, - "AE", "AEHLTCD", "integer", "Assigned", NA, NA, NA, NA, - "AE", "AELLT", "text", "Assigned", "CL.AEDICT", NA, NA, NA, - "AE", "AELLTCD", "integer", "Assigned", NA, NA, NA, NA, - "AE", "AEOUT", "text", "CRF", "CL.OUT", NA, NA, NA, - "AE", "AEPTCD", "integer", "Assigned", NA, NA, NA, NA, - "AE", "AEREL", "text", "CRF", "CL.AECAUS", NA, NA, NA, - "AE", "AESCAN", "text", "CRF", "CL.YN", NA, NA, NA, - "AE", "AESCONG", "text", "CRF", "CL.YN", NA, NA, NA, - "AE", "AESDISAB", "text", "CRF", "CL.YN", NA, NA, NA, - "AE", "AESDTH", "text", "CRF", "CL.YN", NA, NA, NA, - "AE", "AESEQ", "integer", "Derived", NA, NA, NA, "MT.AE.AESEQ", - "AE", "AESER", "text", "CRF", "CL.YN", NA, NA, NA, - "AE", "AESEV", "text", "CRF", "CL.SEV", NA, NA, NA, - "AE", "AESHOSP", "text", "CRF", "CL.YN", NA, NA, NA, - "AE", "AESLIFE", "text", "CRF", "CL.YN", NA, NA, NA, - "AE", "AESOC", "text", "Assigned", "CL.AEDICT", NA, NA, NA, - "AE", "AESOCCD", "integer", "Assigned", NA, NA, NA, NA, - "AE", "AESOD", "text", "CRF", "CL.YN", NA, NA, NA, - "AE", "AESPID", "text", "CRF", NA, NA, NA, NA, - "AE", "AESTDTC", "date", "CRF", NA, NA, NA, NA, - "AE", "AESTDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "AE", "AETERM", "text", "CRF", NA, NA, NA, NA, - "AE", "DOMAIN", "text", "Assigned", NA, NA, NA, NA, - "AE", "EPOCH", "text", "Derived", "CL.EPOCH", NA, NA, "MT.AE.EPOCH", - "AE", "STUDYID", "text", "CRF", NA, NA, NA, NA, - "AE", "USUBJID", "text", "Derived", NA, NA, NA, "MT.AE.USUBJID", - "DM", "ACTARM", "text", "Derived", "CL.ARM", NA, NA, "MT.DM.ACTARM", - "DM", "ACTARMCD", "text", "Derived", "CL.ARMCD", NA, NA, "MT.DM.ACTARMCD", - "DM", "AGE", "integer", "Derived", NA, NA, NA, "MT.DM.AGE", - "DM", "AGEU", "text", "Assigned", "CL.AGEU", NA, NA, "COM.DM.AGEU", - "DM", "ARM", "text", "Assigned", "CL.ARM", NA, NA, "COM.DM.ARM", - "DM", "ARMCD", "text", "Assigned", "CL.ARMCD", NA, NA, "COM.DM.ARMCD", - "DM", "COUNTRY", "text", "Derived", "CL.COUNTRY", NA, NA, "MT.DM.COUNTRY", - "DM", "DMDTC", "date", "CRF", NA, NA, NA, NA, - "DM", "DMDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "DM", "DOMAIN", "text", "Assigned", NA, NA, NA, NA, - "DM", "DTHDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.DTHDTC", - "DM", "DTHFL", "text", "Derived", "CL.Y_BLANK", NA, NA, "MT.DM.DTHFL", - "DM", "ETHNIC", "text", "Derived", "CL.ETHNIC", NA, NA, "MT.DM.ETHNIC", - "DM", "RACE", "text", "CRF", "CL.RACE", NA, NA, NA, - "DM", "RFENDTC", "date", "Derived", NA, NA, NA, "MT.DM.RFENDTC", - "DM", "RFICDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFICDTC", - "DM", "RFPENDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFPENDTC", - "DM", "RFSTDTC", "date", "Derived", NA, NA, NA, "MT.DM.RFSTDTC", - "DM", "RFXENDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFXENDTC", - "DM", "RFXSTDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFXSTDTC", - "DM", "SEX", "text", "CRF", "CL.SEX", NA, NA, NA, - "DM", "SITEID", "text", "Assigned", NA, NA, NA, NA, - "DM", "STUDYID", "text", "CRF", NA, NA, NA, NA, - "DM", "SUBJID", "text", "CRF", NA, NA, NA, NA, - "DM", "USUBJID", "text", "Derived", NA, NA, NA, "MT.DM.USUBJID", - "EX", "DOMAIN", "text", "Assigned", NA, NA, NA, NA, - "EX", "EPOCH", "text", "Derived", "CL.EPOCH", NA, NA, "MT.EX.EPOCH", - "EX", "EXDOSE", "integer", "eDT", NA, NA, NA, NA, - "EX", "EXDOSFRM", "text", "eDT", "CL.EXDOSFRM", NA, NA, NA, - "EX", "EXDOSFRQ", "text", "eDT", "CL.EXFREQ", NA, NA, NA, - "EX", "EXDOSU", "text", "eDT", "CL.EXDOSEU", NA, NA, NA, - "EX", "EXENDTC", "date", "CRF", NA, NA, NA, NA, - "EX", "EXENDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "EX", "EXROUTE", "text", "eDT", "CL.EXROUTE", NA, NA, NA, - "EX", "EXSEQ", "integer", "Derived", NA, NA, NA, "MT.EX.EXSEQ", - "EX", "EXSTDTC", "date", "CRF", NA, NA, NA, NA, - "EX", "EXSTDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", - "EX", "EXTRT", "text", "eDT", "CL.EXTRT", NA, NA, NA, - "EX", "STUDYID", "text", "CRF", NA, NA, NA, NA, - "EX", "USUBJID", "text", "Derived", NA, NA, NA, "MT.EX.USUBJID", - "EX", "VISIT", "text", "CRF", "CL.VISIT", NA, NA, NA, - "EX", "VISITDY", "integer", "Derived", NA, NA, NA, "MT.EX.VISITDY", - "EX", "VISITNUM", "float", "CRF", "CL.VISITNUM", 1L, NA, NA, - "SUPPAE", "IDVAR", "text", "Assigned", NA, NA, NA, "COM.SUPPAE.IDVAR", - "SUPPAE", "IDVARVAL", "text", "Derived", NA, NA, NA, "MT.SUPPAE.IDVARVAL", - "SUPPAE", "QEVAL", "text", "Assigned", "CL.QEVAL", NA, NA, NA, - "SUPPAE", "QLABEL", "text", "Assigned", NA, NA, NA, NA, - "SUPPAE", "QNAM", "text", "Assigned", "CL.SUPPAE.QNAM", NA, NA, NA, - "SUPPAE", "QORIG", "text", "Assigned", NA, NA, NA, NA, - "SUPPAE", "QVAL", "text", "Derived", "CL.YN", NA, "QNAM == 'TRTEMFL'", "MT.SUPPAE.QNAM.TRTEMFL", - "SUPPAE", "RDOMAIN", "text", "Assigned", NA, NA, NA, "COM.SUPPAE.RDOMAIN", - "SUPPAE", "STUDYID", "text", "CRF", NA, NA, NA, NA, - "SUPPAE", "USUBJID", "text", "Derived", NA, NA, NA, "MT.SUPPAE.USUBJID", - "SUPPDM", "IDVAR", "text", "Assigned", NA, NA, NA, "COM.SUPPDM.IDVAR", - "SUPPDM", "IDVARVAL", "text", "Assigned", NA, NA, NA, "COM.SUPPDM.IDVARVAL", - "SUPPDM", "QEVAL", "text", "Assigned", "CL.QEVAL", NA, NA, NA, - "SUPPDM", "QLABEL", "text", "Assigned", NA, NA, NA, NA, - "SUPPDM", "QNAM", "text", "Assigned", "CL.SUPPDM.QNAM", NA, NA, NA, - "SUPPDM", "QORIG", "text", "Assigned", NA, NA, NA, NA, - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'COMPLT16'", "MT.SUPPDM.QNAM.COMPLT16", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'COMPLT24'", "MT.SUPPDM.QNAM.COMPLT24", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'COMPLT8'", "MT.SUPPDM.QNAM.COMPLT8", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'EFFICACY'", "MT.SUPPDM.QNAM.EFFICACY", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'SAFETY'", "MT.SUPPDM.QNAM.SAFETY", - "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'ITT'", "MT.SUPPDM.QNAM.ITT", - "SUPPDM", "RDOMAIN", "text", "Assigned", NA, NA, NA, "COM.SUPPDM.RDOMAIN", - "SUPPDM", "STUDYID", "text", "CRF", NA, NA, NA, NA, - "SUPPDM", "USUBJID", "text", "Derived", NA, NA, NA, "MT.SUPPDM.USUBJID" - ) - - # Read from define - def_value_spec <- xml_to_value_spec(define) %>% - arrange(dataset, variable) %>% - select(dataset, variable, type, origin, code_id, sig_dig, where, derivation_id ) - - # Read from spec - comments <- spec$Comments$ID - spec_value_spec <- spec_type_to_value_spec(spec) %>% - arrange(dataset, variable) %>% - select(dataset, variable, type, origin, code_id, sig_dig, where, derivation_id) %>% - #Fix naming as it is slightly different, but matches within metacore - mutate(code_id = if_else(!is.na(code_id), paste0("CL.", code_id), code_id, NA_character_), - derivation_id = case_when( - origin == "Derived" ~ paste0("MT.", derivation_id), - origin == "Assigned" & derivation_id %in% comments ~ paste0("COM.", derivation_id), - TRUE ~ NA_character_), - where = str_replace(where, "EQ", "=="), - where = str_split(where, "\\s(?=(\\w*$))") %>% - map(~paste0(.[1], " '", .[2])), - where = if_else(where == "NA 'NA", NA_character_, paste0(where, "'"))) - - # Tests - expect_equal(def_value_spec, ref_value_spec) - expect_equal(spec_value_spec, ref_value_spec) - - }) + ref_value_spec <- tibble::tribble( + ~dataset, ~variable, ~type, ~origin, ~code_id, ~sig_dig, ~where, ~derivation_id, + "AE", "AEACN", "text", "Derived", NA, NA, NA, "MT.AE.AEACN", + "AE", "AEBDSYCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AEBODSYS", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AEDECOD", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AEDTC", "date", "Derived", NA, NA, NA, "MT.AE.AEDTC", + "AE", "AEDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "AE", "AEENDTC", "date", "CRF", NA, NA, NA, NA, + "AE", "AEENDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "AE", "AEHLGT", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AEHLGTCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AEHLT", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AEHLTCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AELLT", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AELLTCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AEOUT", "text", "CRF", "CL.OUT", NA, NA, NA, + "AE", "AEPTCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AEREL", "text", "CRF", "CL.AECAUS", NA, NA, NA, + "AE", "AESCAN", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESCONG", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESDISAB", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESDTH", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESEQ", "integer", "Derived", NA, NA, NA, "MT.AE.AESEQ", + "AE", "AESER", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESEV", "text", "CRF", "CL.SEV", NA, NA, NA, + "AE", "AESHOSP", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESLIFE", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESOC", "text", "Assigned", "CL.AEDICT", NA, NA, NA, + "AE", "AESOCCD", "integer", "Assigned", NA, NA, NA, NA, + "AE", "AESOD", "text", "CRF", "CL.YN", NA, NA, NA, + "AE", "AESPID", "text", "CRF", NA, NA, NA, NA, + "AE", "AESTDTC", "date", "CRF", NA, NA, NA, NA, + "AE", "AESTDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "AE", "AETERM", "text", "CRF", NA, NA, NA, NA, + "AE", "DOMAIN", "text", "Assigned", NA, NA, NA, NA, + "AE", "EPOCH", "text", "Derived", "CL.EPOCH", NA, NA, "MT.AE.EPOCH", + "AE", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "AE", "USUBJID", "text", "Derived", NA, NA, NA, "MT.AE.USUBJID", + "DM", "ACTARM", "text", "Derived", "CL.ARM", NA, NA, "MT.DM.ACTARM", + "DM", "ACTARMCD", "text", "Derived", "CL.ARMCD", NA, NA, "MT.DM.ACTARMCD", + "DM", "AGE", "integer", "Derived", NA, NA, NA, "MT.DM.AGE", + "DM", "AGEU", "text", "Assigned", "CL.AGEU", NA, NA, "COM.DM.AGEU", + "DM", "ARM", "text", "Assigned", "CL.ARM", NA, NA, "COM.DM.ARM", + "DM", "ARMCD", "text", "Assigned", "CL.ARMCD", NA, NA, "COM.DM.ARMCD", + "DM", "COUNTRY", "text", "Derived", "CL.COUNTRY", NA, NA, "MT.DM.COUNTRY", + "DM", "DMDTC", "date", "CRF", NA, NA, NA, NA, + "DM", "DMDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "DM", "DOMAIN", "text", "Assigned", NA, NA, NA, NA, + "DM", "DTHDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.DTHDTC", + "DM", "DTHFL", "text", "Derived", "CL.Y_BLANK", NA, NA, "MT.DM.DTHFL", + "DM", "ETHNIC", "text", "Derived", "CL.ETHNIC", NA, NA, "MT.DM.ETHNIC", + "DM", "RACE", "text", "CRF", "CL.RACE", NA, NA, NA, + "DM", "RFENDTC", "date", "Derived", NA, NA, NA, "MT.DM.RFENDTC", + "DM", "RFICDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFICDTC", + "DM", "RFPENDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFPENDTC", + "DM", "RFSTDTC", "date", "Derived", NA, NA, NA, "MT.DM.RFSTDTC", + "DM", "RFXENDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFXENDTC", + "DM", "RFXSTDTC", "datetime", "Derived", NA, NA, NA, "MT.DM.RFXSTDTC", + "DM", "SEX", "text", "CRF", "CL.SEX", NA, NA, NA, + "DM", "SITEID", "text", "Assigned", NA, NA, NA, NA, + "DM", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "DM", "SUBJID", "text", "CRF", NA, NA, NA, NA, + "DM", "USUBJID", "text", "Derived", NA, NA, NA, "MT.DM.USUBJID", + "EX", "DOMAIN", "text", "Assigned", NA, NA, NA, NA, + "EX", "EPOCH", "text", "Derived", "CL.EPOCH", NA, NA, "MT.EX.EPOCH", + "EX", "EXDOSE", "integer", "eDT", NA, NA, NA, NA, + "EX", "EXDOSFRM", "text", "eDT", "CL.EXDOSFRM", NA, NA, NA, + "EX", "EXDOSFRQ", "text", "eDT", "CL.EXFREQ", NA, NA, NA, + "EX", "EXDOSU", "text", "eDT", "CL.EXDOSEU", NA, NA, NA, + "EX", "EXENDTC", "date", "CRF", NA, NA, NA, NA, + "EX", "EXENDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "EX", "EXROUTE", "text", "eDT", "CL.EXROUTE", NA, NA, NA, + "EX", "EXSEQ", "integer", "Derived", NA, NA, NA, "MT.EX.EXSEQ", + "EX", "EXSTDTC", "date", "CRF", NA, NA, NA, NA, + "EX", "EXSTDY", "integer", "Derived", NA, NA, NA, "MT.COMPMETHOD.STUDY_DAY", + "EX", "EXTRT", "text", "eDT", "CL.EXTRT", NA, NA, NA, + "EX", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "EX", "USUBJID", "text", "Derived", NA, NA, NA, "MT.EX.USUBJID", + "EX", "VISIT", "text", "CRF", "CL.VISIT", NA, NA, NA, + "EX", "VISITDY", "integer", "Derived", NA, NA, NA, "MT.EX.VISITDY", + "EX", "VISITNUM", "float", "CRF", "CL.VISITNUM", 1L, NA, NA, + "SUPPAE", "IDVAR", "text", "Assigned", NA, NA, NA, "COM.SUPPAE.IDVAR", + "SUPPAE", "IDVARVAL", "text", "Derived", NA, NA, NA, "MT.SUPPAE.IDVARVAL", + "SUPPAE", "QEVAL", "text", "Assigned", "CL.QEVAL", NA, NA, NA, + "SUPPAE", "QLABEL", "text", "Assigned", NA, NA, NA, NA, + "SUPPAE", "QNAM", "text", "Assigned", "CL.SUPPAE.QNAM", NA, NA, NA, + "SUPPAE", "QORIG", "text", "Assigned", NA, NA, NA, NA, + "SUPPAE", "QVAL", "text", "Derived", "CL.YN", NA, "QNAM == 'TRTEMFL'", "MT.SUPPAE.QNAM.TRTEMFL", + "SUPPAE", "RDOMAIN", "text", "Assigned", NA, NA, NA, "COM.SUPPAE.RDOMAIN", + "SUPPAE", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "SUPPAE", "USUBJID", "text", "Derived", NA, NA, NA, "MT.SUPPAE.USUBJID", + "SUPPDM", "IDVAR", "text", "Assigned", NA, NA, NA, "COM.SUPPDM.IDVAR", + "SUPPDM", "IDVARVAL", "text", "Assigned", NA, NA, NA, "COM.SUPPDM.IDVARVAL", + "SUPPDM", "QEVAL", "text", "Assigned", "CL.QEVAL", NA, NA, NA, + "SUPPDM", "QLABEL", "text", "Assigned", NA, NA, NA, NA, + "SUPPDM", "QNAM", "text", "Assigned", "CL.SUPPDM.QNAM", NA, NA, NA, + "SUPPDM", "QORIG", "text", "Assigned", NA, NA, NA, NA, + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'COMPLT16'", "MT.SUPPDM.QNAM.COMPLT16", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'COMPLT24'", "MT.SUPPDM.QNAM.COMPLT24", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'COMPLT8'", "MT.SUPPDM.QNAM.COMPLT8", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'EFFICACY'", "MT.SUPPDM.QNAM.EFFICACY", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'SAFETY'", "MT.SUPPDM.QNAM.SAFETY", + "SUPPDM", "QVAL", "text", "Derived", "CL.Y_BLANK", NA, "QNAM == 'ITT'", "MT.SUPPDM.QNAM.ITT", + "SUPPDM", "RDOMAIN", "text", "Assigned", NA, NA, NA, "COM.SUPPDM.RDOMAIN", + "SUPPDM", "STUDYID", "text", "CRF", NA, NA, NA, NA, + "SUPPDM", "USUBJID", "text", "Derived", NA, NA, NA, "MT.SUPPDM.USUBJID" + ) + + # Read from define + def_value_spec <- xml_to_value_spec(define) %>% + arrange(dataset, variable) %>% + select(dataset, variable, type, origin, code_id, sig_dig, where, derivation_id) + + # Read from spec + comments <- spec$Comments$ID + spec_value_spec <- spec_type_to_value_spec(spec) %>% + arrange(dataset, variable) %>% + select(dataset, variable, type, origin, code_id, sig_dig, where, derivation_id) %>% + # Fix naming as it is slightly different, but matches within metacore + mutate( + code_id = if_else(!is.na(code_id), paste0("CL.", code_id), code_id, NA_character_), + derivation_id = case_when( + origin == "Derived" ~ paste0("MT.", derivation_id), + origin == "Assigned" & derivation_id %in% comments ~ paste0("COM.", derivation_id), + TRUE ~ NA_character_ + ), + where = str_replace(where, "EQ", "=="), + where = str_split(where, "\\s(?=(\\w*$))") %>% + map(~ paste0(.[1], " '", .[2])), + where = if_else(where == "NA 'NA", NA_character_, paste0(where, "'")) + ) + + # Tests + expect_equal(def_value_spec, ref_value_spec) + expect_equal(spec_value_spec, ref_value_spec) +}) test_that("derivation reader tests", { - # Create reference derivation tibble - ref_derivation <- tibble::tribble( - ~derivation_id, ~derivation, - "COM.DM.AGEU", "AGEU='YEARS'", - "COM.DM.ARM", "According to randomization list", - "COM.DM.ARMCD", "According to randomization list", - "COM.SUPPAE.IDVAR", "IDVAR='AESEQ'", - "COM.SUPPAE.RDOMAIN", "RDOMAIN='AE'", - "COM.SUPPDM.IDVAR", "IDVAR=' '", - "COM.SUPPDM.IDVARVAL", "IDVARVAL= ' '", - "COM.SUPPDM.RDOMAIN", "RDOMAIN='AE'", - "MT.AE.AEACN" , "AEACN=Null (data on action taken concerning study treatment was not collected)" , - "MT.AE.AEDTC" , "Date of final visit (SV)" , - "MT.AE.AESEQ" , "Sequential number identifying records within each USUBJID" , - "MT.AE.EPOCH" , "Use AESTDTC to determine from the SDTM SE what EPOCH this record falls under.\nIf SE.SESTDTC <= AESTDTC < SE.SEENDTC, then use that EPOCH.", - "MT.AE.USUBJID" ,"Concatenation of STUDYID, DM.SITEID and DM.SUBJID" , - "MT.COMPMETHOD.STUDY_DAY" ,"(date portion of --DTC) minus (date portion of RFSTDTC) , add 1 if -- DTC >= RFSTDC" , - "MT.DM.ACTARM" ,"Derived from EX" , - "MT.DM.ACTARMCD" ,"Derived from EX" , - "MT.DM.AGE" ,"Subject's Age at start of study drug (RFSTDTC)." , - "MT.DM.COUNTRY" ,"Derived from site information" , - "MT.DM.DTHDTC" ,"If DS record exists with DSDECOD='DEATH' then DTHDTC=AEENDTC." , - "MT.DM.DTHFL" ,"If DS record exists with DSDECOD='DEATH' then DEATHFL=Y." , - "MT.DM.ETHNIC" ,"Derived from Origin entered on CRF: ETHINC='HISPANIC OR LATINO' if Origin='Hispanic'. Otherwise ETHNIC='NOT HISPANIC OR LATINO'" , - "MT.DM.RFENDTC" ,"Date/time of last study drug treatment derived from EX" , - "MT.DM.RFICDTC" ,"Date of informed consent was not entered in database (see annotated CRF)" , - "MT.DM.RFPENDTC" ,"DSSTDTC of last disposition event" , - "MT.DM.RFSTDTC" ,"Date/time of first study drug treatment derived from EX" , - "MT.DM.RFXENDTC" ,"RFXENDTC=RFENDTC" , - "MT.DM.RFXSTDTC" ,"RFXSTDTC=RFSTDTC" , - "MT.DM.USUBJID" ,"Concatenation of STUDYID, DM.SITEID and DM.SUBJID" , - "MT.EX.EPOCH" ,"Use EXDTC to determine from the SDTM SE what EPOCH this record falls under.\nIf SE.SESTDTC <= EXDTC < SE.SEENDTC, then use that EPOCH.", - "MT.EX.EXSEQ" ,"Sequential number identifying records within each USUBJID" , - "MT.EX.USUBJID" ,"Concatenation of STUDYID, DM.SITEID and DM.SUBJID" , - "MT.EX.VISITDY" ,"TV.VISITDY" , - "MT.SUPPAE.IDVARVAL" ,"Value of AESEQ of corresponding parent record" , - "MT.SUPPAE.QNAM.TRTEMFL" ,"see SAP" , - "MT.SUPPAE.QVAL" ,"see value level metadata" , - "MT.SUPPAE.USUBJID" ,"Concatenation of STUDYID, DM.SITEID and DM.SUBJID" , - "MT.SUPPDM.QNAM.COMPLT16" ,"see SAP" , - "MT.SUPPDM.QNAM.COMPLT24" ,"see SAP" , - "MT.SUPPDM.QNAM.COMPLT8" ,"see SAP" , - "MT.SUPPDM.QNAM.EFFICACY" ,"see SAP" , - "MT.SUPPDM.QNAM.ITT" ,"see SAP" , - "MT.SUPPDM.QNAM.SAFETY" ,"see SAP" , - "MT.SUPPDM.QVAL" ,"see value level metadata" , - "MT.SUPPDM.USUBJID" ,"Concatenation of STUDYID, DM.SITEID and DM.SUBJID" , - ) - - # Read from define - def_derivation <- xml_to_derivations(define) %>% - arrange(derivation_id) %>% - mutate(derivation = str_replace_all(derivation, '\\"', "\\'")) - - # Read from spec - - ref_deriv <- spec$Methods %>% - select(derivation_id = ID, - derivation = Description) %>% - mutate(derivation_id = paste0("MT.", derivation_id)) - - ref_deriv <- spec$Variables %>% - filter(Origin %in% c("Assigned")) %>% - left_join(select(spec$Comments, ID, Description), by = c("Comment" = "ID")) %>% - mutate(derivation_id = paste0("MT.", Dataset, ".", Variable), - derivation = Description) %>% - select(starts_with("derivation")) %>% - bind_rows(ref_deriv, .) %>% - arrange(derivation_id) %>% - distinct() - - spec_derivation <- spec_type_to_derivations(spec) %>% - arrange(derivation_id) %>% - mutate(derivation_id = paste0("MT.", derivation_id)) - - # Tests - expect_equal(def_derivation, ref_derivation) - expect_equal(spec_derivation, ref_deriv) + # Create reference derivation tibble + ref_derivation <- tibble::tribble( + ~derivation_id, ~derivation, + "COM.DM.AGEU", "AGEU='YEARS'", + "COM.DM.ARM", "According to randomization list", + "COM.DM.ARMCD", "According to randomization list", + "COM.SUPPAE.IDVAR", "IDVAR='AESEQ'", + "COM.SUPPAE.RDOMAIN", "RDOMAIN='AE'", + "COM.SUPPDM.IDVAR", "IDVAR=' '", + "COM.SUPPDM.IDVARVAL", "IDVARVAL= ' '", + "COM.SUPPDM.RDOMAIN", "RDOMAIN='AE'", + "MT.AE.AEACN", "AEACN=Null (data on action taken concerning study treatment was not collected)", + "MT.AE.AEDTC", "Date of final visit (SV)", + "MT.AE.AESEQ", "Sequential number identifying records within each USUBJID", + "MT.AE.EPOCH", "Use AESTDTC to determine from the SDTM SE what EPOCH this record falls under.\nIf SE.SESTDTC <= AESTDTC < SE.SEENDTC, then use that EPOCH.", + "MT.AE.USUBJID", "Concatenation of STUDYID, DM.SITEID and DM.SUBJID", + "MT.COMPMETHOD.STUDY_DAY", "(date portion of --DTC) minus (date portion of RFSTDTC) , add 1 if -- DTC >= RFSTDC", + "MT.DM.ACTARM", "Derived from EX", + "MT.DM.ACTARMCD", "Derived from EX", + "MT.DM.AGE", "Subject's Age at start of study drug (RFSTDTC).", + "MT.DM.COUNTRY", "Derived from site information", + "MT.DM.DTHDTC", "If DS record exists with DSDECOD='DEATH' then DTHDTC=AEENDTC.", + "MT.DM.DTHFL", "If DS record exists with DSDECOD='DEATH' then DEATHFL=Y.", + "MT.DM.ETHNIC", "Derived from Origin entered on CRF: ETHINC='HISPANIC OR LATINO' if Origin='Hispanic'. Otherwise ETHNIC='NOT HISPANIC OR LATINO'", + "MT.DM.RFENDTC", "Date/time of last study drug treatment derived from EX", + "MT.DM.RFICDTC", "Date of informed consent was not entered in database (see annotated CRF)", + "MT.DM.RFPENDTC", "DSSTDTC of last disposition event", + "MT.DM.RFSTDTC", "Date/time of first study drug treatment derived from EX", + "MT.DM.RFXENDTC", "RFXENDTC=RFENDTC", + "MT.DM.RFXSTDTC", "RFXSTDTC=RFSTDTC", + "MT.DM.USUBJID", "Concatenation of STUDYID, DM.SITEID and DM.SUBJID", + "MT.EX.EPOCH", "Use EXDTC to determine from the SDTM SE what EPOCH this record falls under.\nIf SE.SESTDTC <= EXDTC < SE.SEENDTC, then use that EPOCH.", + "MT.EX.EXSEQ", "Sequential number identifying records within each USUBJID", + "MT.EX.USUBJID", "Concatenation of STUDYID, DM.SITEID and DM.SUBJID", + "MT.EX.VISITDY", "TV.VISITDY", + "MT.SUPPAE.IDVARVAL", "Value of AESEQ of corresponding parent record", + "MT.SUPPAE.QNAM.TRTEMFL", "see SAP", + "MT.SUPPAE.QVAL", "see value level metadata", + "MT.SUPPAE.USUBJID", "Concatenation of STUDYID, DM.SITEID and DM.SUBJID", + "MT.SUPPDM.QNAM.COMPLT16", "see SAP", + "MT.SUPPDM.QNAM.COMPLT24", "see SAP", + "MT.SUPPDM.QNAM.COMPLT8", "see SAP", + "MT.SUPPDM.QNAM.EFFICACY", "see SAP", + "MT.SUPPDM.QNAM.ITT", "see SAP", + "MT.SUPPDM.QNAM.SAFETY", "see SAP", + "MT.SUPPDM.QVAL", "see value level metadata", + "MT.SUPPDM.USUBJID", "Concatenation of STUDYID, DM.SITEID and DM.SUBJID", + ) + + # Read from define + def_derivation <- xml_to_derivations(define) %>% + arrange(derivation_id) %>% + mutate(derivation = str_replace_all(derivation, '\\"', "\\'")) + + # Read from spec + + ref_deriv <- spec$Methods %>% + select( + derivation_id = ID, + derivation = Description + ) %>% + mutate(derivation_id = paste0("MT.", derivation_id)) + + ref_deriv <- spec$Variables %>% + filter(Origin %in% c("Assigned")) %>% + left_join(select(spec$Comments, ID, Description), by = c("Comment" = "ID")) %>% + mutate( + derivation_id = paste0("MT.", Dataset, ".", Variable), + derivation = Description + ) %>% + select(starts_with("derivation")) %>% + bind_rows(ref_deriv, .) %>% + arrange(derivation_id) %>% + distinct() + + spec_derivation <- spec_type_to_derivations(spec) %>% + arrange(derivation_id) %>% + mutate(derivation_id = paste0("MT.", derivation_id)) + + # Tests + expect_equal(def_derivation, ref_derivation) + expect_equal(spec_derivation, ref_deriv) }) test_that("codelist reader tests", { - # Create reference derivation tibble - ref_codelist <- tibble::tribble( - ~code_id, ~name, ~codes, ~type, - "CL.AECAUS", "AECAUS", tibble(code = c("NONE", "POSSIBLE", "PROBABLE", "REMOTE"), decode = c("NONE", "POSSIBLE", "PROBABLE", "REMOTE")), "code_decode", - "CL.AEDICT", "ADVERSE EVENT DICTIONARY", tibble(dictionary = "MEDDRA", version = "8.0"), "external_library", - "CL.AGEU", "AGEU", tibble(code = "YEARS", decode = "YEARS"), "code_decode", - "CL.ARM", "ARM", tibble(code = c("Screen Failure", "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"), decode = c("Screen Failure", "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose")), "code_decode", - "CL.ARMCD", "ARMCD", tibble(code = c("Scrnfail", "Pbo", "Xan_Lo", "Xan_Hi"), decode = c("Screen Failure", "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose")), "code_decode", - "CL.COUNTRY", "COUNTRY", tibble(code = "USA", decode = "USA"), "code_decode", - "CL.DRUGDICT", "DRUG DICTIONARY", tibble(dictionary = "WHODRUG", version = "200604"), "external_library", - "CL.EPOCH", "EPOCH", tibble(code = c("SCREENING", "TREATMENT", "FOLLOW-UP"), decode = c("Screening", "Treatment", "Follow-Up")), "code_decode", - "CL.ETHNIC", "ETHNIC", tibble(code = c("NOT HISPANIC OR LATINO", "HISPANIC OR LATINO"), decode = c("NOT HISPANIC OR LATINO", "HISPANIC OR LATINO")), "code_decode", - "CL.EXDOSEU", "EXDOSEU", tibble(code = "mg", decode = "mg"), "code_decode", - "CL.EXDOSFRM", "EXDOSFRM", tibble(code = "PATCH", decode = "PATCH"), "code_decode", - "CL.EXFREQ", "EXFREQ", tibble(code = "QD", decode = "QD"), "code_decode", - "CL.EXROUTE", "EXROUTE", tibble(code = "TRANSDERMAL", decode = "TRANSDERMAL"), "code_decode", - "CL.EXTRT", "EXTRT", tibble(code = c("PLACEBO", "XANOMELINE"), decode = c("PLACEBO", "XANOMELINE")), "code_decode", - "CL.MHDICT", "MEDICAL HISTORY DICTIONARY", tibble(dictionary = "MEDDRA", version = "8.0"), "external_library", - "CL.OUT", "OUT", tibble(code = c("RECOVERED/RESOLVED", "NOT RECOVERED/NOT RESOLVED", "FATAL"), decode = c("RECOVERED/RESOLVED", "NOT RECOVERED/NOT RESOLVED", "FATAL")), "code_decode", - "CL.QEVAL", "QEVAL", tibble(code = "CLINICAL STUDY SPONSOR", decode = "CLINICAL STUDY SPONSOR"), "code_decode", - "CL.RACE", "RACE", tibble(code = c("WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE", "ASIAN"), decode = c("WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE", "ASIAN")), "code_decode", - "CL.SEV", "SEV", tibble(code = c("MILD", "MODERATE", "SEVERE"), decode = c("MILD", "MODERATE", "SEVERE")), "code_decode", - "CL.SEX", "SEX", tibble(code = c("F", "M", "U"), decode = c("Female", "Male", "Unknown")), "code_decode", - "CL.SUPPAE.QNAM", "SUPPAE.QNAM", tibble(code = "AETRTEM", decode = "TREAMENT EMERGENT FLAG"), "code_decode", - "CL.SUPPDM.QNAM", "SUPPDM.QNAM", tibble(code = c("COMPLT16", "COMPLT24", "COMPLT8", "EFFICACY", "ITT", "SAFETY"), decode = c("Completers of Week 16 Population Flag", "Completers of Week 24 Population Flag", "Completers of Week 8 Population Flag", "Efficacy Population Flag", "Intent to Treat Population Flag", "Safety Population Flag")), "code_decode", - "CL.VISIT", "VISIT", tibble(code = c("SCREENING 1", "UNSCHEDULED 1.1", "UNSCHEDULED 1.2", "UNSCHEDULED 1.3", "SCREENING 2", "BASELINE", "UNSCHEDULED 3.1", "AMBUL ECG PLACEMENT", "WEEK 2", "UNSCHEDULED 4.1", "UNSCHEDULED 4.2", "WEEK 4", "UNSCHEDULED 5.1", "AMBUL ECG REMOVAL", "UNSCHEDULED 6.1", "WEEK 6", "UNSCHEDULED 7.1", "WEEK 8", "WEEK 10 (T)", "UNSCHEDULED 8.2", "WEEK 12", "WEEK 14 (T)", "UNSCHEDULED 9.2", "UNSCHEDULED 9.3", "WEEK 16", "WEEK 18 (T)", "UNSCHEDULED 10.2", "WEEK 20", "WEEK 22 (T)", "UNSCHEDULED 11.2", - "WEEK 24", "UNSCHEDULED 12.1", "WEEK 26", "UNSCHEDULED 13.1", "AE FOLLOW-UP", "RETRIEVAL", "Rash followup"), decode = c("SCREENING 1", "UNSCHEDULED 1.1", "UNSCHEDULED 1.2", "UNSCHEDULED 1.3", "SCREENING 2", "BASELINE", "UNSCHEDULED 3.1", "AMBUL ECG PLACEMENT", "WEEK 2", "UNSCHEDULED 4.1", "UNSCHEDULED 4.2", "WEEK 4", "UNSCHEDULED 5.1", "AMBUL ECG REMOVAL", "UNSCHEDULED 6.1", "WEEK 6", "UNSCHEDULED 7.1", "WEEK 8", "WEEK 10 (T)", "UNSCHEDULED 8.2", "WEEK 12", "WEEK 14 (T)", "UNSCHEDULED 9.2", "UNSCHEDULED 9.3", - "WEEK 16", "WEEK 18 (T)", "UNSCHEDULED 10.2", "WEEK 20", "WEEK 22 (T)", "UNSCHEDULED 11.2", "WEEK 24", "UNSCHEDULED 12.1", "WEEK 26", "UNSCHEDULED 13.1", "AE FOLLOW-UP", "RETRIEVAL", "Rash followup")), "code_decode", - "CL.VISITNUM", "VISITNUM", tibble(code = c("1", "1.1", "1.2", "1.3", "2", "3", "3.1", "3.5", "4", "4.1", "4.2", "5", "5.1", "6", "6.1", "7", "7.1", "8", "8.1", "8.2", "9", "9.1", "9.2", "9.3", "10", "10.1", "10.2", "11", "11.1", "11.2", "12", "12.1", "13", "13.1", "101", "201", "501"), decode = c("SCREENING 1", "UNSCHEDULED 1.1", "UNSCHEDULED 1.2", "UNSCHEDULED 1.3", "SCREENING 2", "BASELINE", "UNSCHEDULED 3.1", "AMBUL ECG PLACEMENT", "WEEK 2", "UNSCHEDULED 4.1", "UNSCHEDULED 4.2", "WEEK 4", "UNSCHEDULED 5.1", "AMBUL ECG REMOVAL", - "UNSCHEDULED 6.1", "WEEK 6", "UNSCHEDULED 7.1", "WEEK 8", "WEEK 10 (T)", "UNSCHEDULED 8.2", "WEEK 12", "WEEK 14 (T)", "UNSCHEDULED 9.2", "UNSCHEDULED 9.3", "WEEK 16", "WEEK 18 (T)", "UNSCHEDULED 10.2", "WEEK 20", "WEEK 22 (T)", "UNSCHEDULED 11.2", "WEEK 24", "UNSCHEDULED 12.1", "WEEK 26", "UNSCHEDULED 13.1", "AE FOLLOW-UP", "RETRIEVAL", "Rash followup")), "code_decode", - #"CL.Y_BLANK", "Y_BLANK", tibble(code = "Y", decode = "Yes"), "code_decode", - "CL.YN", "YN", tibble(code = c("N", "Y"), decode = c("No", "Yes")), "code_decode" - ) - - # Read from define - def_codelist <- xml_to_codelist(define) %>% - filter(code_id != "CL.Y_BLANK") %>% - arrange(code_id) %>% - select(code_id, name, codes, type) - - # Read from spec - spec_codelist <- spec_type_to_codelist(spec, simplify = FALSE) %>% - mutate(code_id = paste0("CL.", code_id)) %>% - filter(code_id != "CL.Y_BLANK") %>% - arrange(code_id) %>% - select(code_id, name, codes, type) - - # Tests - expect_equal(def_codelist, ref_codelist) - expect_equal(spec_codelist, ref_codelist) + # Create reference derivation tibble + ref_codelist <- tibble::tribble( + ~code_id, ~name, ~codes, ~type, + "CL.AECAUS", "AECAUS", tibble(code = c("NONE", "POSSIBLE", "PROBABLE", "REMOTE"), decode = c("NONE", "POSSIBLE", "PROBABLE", "REMOTE")), "code_decode", + "CL.AEDICT", "ADVERSE EVENT DICTIONARY", tibble(dictionary = "MEDDRA", version = "8.0"), "external_library", + "CL.AGEU", "AGEU", tibble(code = "YEARS", decode = "YEARS"), "code_decode", + "CL.ARM", "ARM", tibble(code = c("Screen Failure", "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"), decode = c("Screen Failure", "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose")), "code_decode", + "CL.ARMCD", "ARMCD", tibble(code = c("Scrnfail", "Pbo", "Xan_Lo", "Xan_Hi"), decode = c("Screen Failure", "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose")), "code_decode", + "CL.COUNTRY", "COUNTRY", tibble(code = "USA", decode = "USA"), "code_decode", + "CL.DRUGDICT", "DRUG DICTIONARY", tibble(dictionary = "WHODRUG", version = "200604"), "external_library", + "CL.EPOCH", "EPOCH", tibble(code = c("SCREENING", "TREATMENT", "FOLLOW-UP"), decode = c("Screening", "Treatment", "Follow-Up")), "code_decode", + "CL.ETHNIC", "ETHNIC", tibble(code = c("NOT HISPANIC OR LATINO", "HISPANIC OR LATINO"), decode = c("NOT HISPANIC OR LATINO", "HISPANIC OR LATINO")), "code_decode", + "CL.EXDOSEU", "EXDOSEU", tibble(code = "mg", decode = "mg"), "code_decode", + "CL.EXDOSFRM", "EXDOSFRM", tibble(code = "PATCH", decode = "PATCH"), "code_decode", + "CL.EXFREQ", "EXFREQ", tibble(code = "QD", decode = "QD"), "code_decode", + "CL.EXROUTE", "EXROUTE", tibble(code = "TRANSDERMAL", decode = "TRANSDERMAL"), "code_decode", + "CL.EXTRT", "EXTRT", tibble(code = c("PLACEBO", "XANOMELINE"), decode = c("PLACEBO", "XANOMELINE")), "code_decode", + "CL.MHDICT", "MEDICAL HISTORY DICTIONARY", tibble(dictionary = "MEDDRA", version = "8.0"), "external_library", + "CL.OUT", "OUT", tibble(code = c("RECOVERED/RESOLVED", "NOT RECOVERED/NOT RESOLVED", "FATAL"), decode = c("RECOVERED/RESOLVED", "NOT RECOVERED/NOT RESOLVED", "FATAL")), "code_decode", + "CL.QEVAL", "QEVAL", tibble(code = "CLINICAL STUDY SPONSOR", decode = "CLINICAL STUDY SPONSOR"), "code_decode", + "CL.RACE", "RACE", tibble(code = c("WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE", "ASIAN"), decode = c("WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE", "ASIAN")), "code_decode", + "CL.SEV", "SEV", tibble(code = c("MILD", "MODERATE", "SEVERE"), decode = c("MILD", "MODERATE", "SEVERE")), "code_decode", + "CL.SEX", "SEX", tibble(code = c("F", "M", "U"), decode = c("Female", "Male", "Unknown")), "code_decode", + "CL.SUPPAE.QNAM", "SUPPAE.QNAM", tibble(code = "AETRTEM", decode = "TREAMENT EMERGENT FLAG"), "code_decode", + "CL.SUPPDM.QNAM", "SUPPDM.QNAM", tibble(code = c("COMPLT16", "COMPLT24", "COMPLT8", "EFFICACY", "ITT", "SAFETY"), decode = c("Completers of Week 16 Population Flag", "Completers of Week 24 Population Flag", "Completers of Week 8 Population Flag", "Efficacy Population Flag", "Intent to Treat Population Flag", "Safety Population Flag")), "code_decode", + "CL.VISIT", "VISIT", tibble(code = c( + "SCREENING 1", "UNSCHEDULED 1.1", "UNSCHEDULED 1.2", "UNSCHEDULED 1.3", "SCREENING 2", "BASELINE", "UNSCHEDULED 3.1", "AMBUL ECG PLACEMENT", "WEEK 2", "UNSCHEDULED 4.1", "UNSCHEDULED 4.2", "WEEK 4", "UNSCHEDULED 5.1", "AMBUL ECG REMOVAL", "UNSCHEDULED 6.1", "WEEK 6", "UNSCHEDULED 7.1", "WEEK 8", "WEEK 10 (T)", "UNSCHEDULED 8.2", "WEEK 12", "WEEK 14 (T)", "UNSCHEDULED 9.2", "UNSCHEDULED 9.3", "WEEK 16", "WEEK 18 (T)", "UNSCHEDULED 10.2", "WEEK 20", "WEEK 22 (T)", "UNSCHEDULED 11.2", + "WEEK 24", "UNSCHEDULED 12.1", "WEEK 26", "UNSCHEDULED 13.1", "AE FOLLOW-UP", "RETRIEVAL", "Rash followup" + ), decode = c( + "SCREENING 1", "UNSCHEDULED 1.1", "UNSCHEDULED 1.2", "UNSCHEDULED 1.3", "SCREENING 2", "BASELINE", "UNSCHEDULED 3.1", "AMBUL ECG PLACEMENT", "WEEK 2", "UNSCHEDULED 4.1", "UNSCHEDULED 4.2", "WEEK 4", "UNSCHEDULED 5.1", "AMBUL ECG REMOVAL", "UNSCHEDULED 6.1", "WEEK 6", "UNSCHEDULED 7.1", "WEEK 8", "WEEK 10 (T)", "UNSCHEDULED 8.2", "WEEK 12", "WEEK 14 (T)", "UNSCHEDULED 9.2", "UNSCHEDULED 9.3", + "WEEK 16", "WEEK 18 (T)", "UNSCHEDULED 10.2", "WEEK 20", "WEEK 22 (T)", "UNSCHEDULED 11.2", "WEEK 24", "UNSCHEDULED 12.1", "WEEK 26", "UNSCHEDULED 13.1", "AE FOLLOW-UP", "RETRIEVAL", "Rash followup" + )), "code_decode", + "CL.VISITNUM", "VISITNUM", tibble(code = c("1", "1.1", "1.2", "1.3", "2", "3", "3.1", "3.5", "4", "4.1", "4.2", "5", "5.1", "6", "6.1", "7", "7.1", "8", "8.1", "8.2", "9", "9.1", "9.2", "9.3", "10", "10.1", "10.2", "11", "11.1", "11.2", "12", "12.1", "13", "13.1", "101", "201", "501"), decode = c( + "SCREENING 1", "UNSCHEDULED 1.1", "UNSCHEDULED 1.2", "UNSCHEDULED 1.3", "SCREENING 2", "BASELINE", "UNSCHEDULED 3.1", "AMBUL ECG PLACEMENT", "WEEK 2", "UNSCHEDULED 4.1", "UNSCHEDULED 4.2", "WEEK 4", "UNSCHEDULED 5.1", "AMBUL ECG REMOVAL", + "UNSCHEDULED 6.1", "WEEK 6", "UNSCHEDULED 7.1", "WEEK 8", "WEEK 10 (T)", "UNSCHEDULED 8.2", "WEEK 12", "WEEK 14 (T)", "UNSCHEDULED 9.2", "UNSCHEDULED 9.3", "WEEK 16", "WEEK 18 (T)", "UNSCHEDULED 10.2", "WEEK 20", "WEEK 22 (T)", "UNSCHEDULED 11.2", "WEEK 24", "UNSCHEDULED 12.1", "WEEK 26", "UNSCHEDULED 13.1", "AE FOLLOW-UP", "RETRIEVAL", "Rash followup" + )), "code_decode", + # "CL.Y_BLANK", "Y_BLANK", tibble(code = "Y", decode = "Yes"), "code_decode", + "CL.YN", "YN", tibble(code = c("N", "Y"), decode = c("No", "Yes")), "code_decode" + ) + + # Read from define + def_codelist <- xml_to_codelist(define) %>% + filter(code_id != "CL.Y_BLANK") %>% + arrange(code_id) %>% + select(code_id, name, codes, type) + + # Read from spec + spec_codelist <- spec_type_to_codelist(spec, simplify = FALSE) %>% + mutate(code_id = paste0("CL.", code_id)) %>% + filter(code_id != "CL.Y_BLANK") %>% + arrange(code_id) %>% + select(code_id, name, codes, type) + + # Tests + expect_equal(def_codelist, ref_codelist) + expect_equal(spec_codelist, ref_codelist) }) test_that("Specification Reader's errors and warnings", { - # Check the name-checks work for each - expect_error(spec_type_to_ds_spec(spec, cols = c("foo"))) - expect_error(spec_type_to_ds_spec(spec, cols = c("foo" = "foo"))) - expect_error(spec_type_to_ds_vars(spec, cols = c("foo"))) - expect_error(spec_type_to_ds_vars(spec, cols = c("foo" = "foo"))) - expect_error(spec_type_to_var_spec(spec, cols = c("foo"))) - expect_error(spec_type_to_var_spec(spec, cols = c("foo" = "foo"))) - expect_error(spec_type_to_value_spec(spec, cols = c("foo"))) - expect_error(spec_type_to_value_spec(spec, cols = c("foo" = "foo"))) - expect_error(spec_type_to_derivations(spec, cols = c("foo"))) - expect_error(spec_type_to_derivations(spec, cols = c("foo" = "foo"))) - expect_error(spec_type_to_codelist(spec, cols = c("foo"))) - expect_error(spec_type_to_codelist(spec, cols = c("foo" = "foo"))) - + # Check the name-checks work for each + expect_error(spec_type_to_ds_spec(spec, cols = c("foo"))) + expect_error(spec_type_to_ds_spec(spec, cols = c("foo" = "foo"))) + expect_error(spec_type_to_ds_vars(spec, cols = c("foo"))) + expect_error(spec_type_to_ds_vars(spec, cols = c("foo" = "foo"))) + expect_error(spec_type_to_var_spec(spec, cols = c("foo"))) + expect_error(spec_type_to_var_spec(spec, cols = c("foo" = "foo"))) + expect_error(spec_type_to_value_spec(spec, cols = c("foo"))) + expect_error(spec_type_to_value_spec(spec, cols = c("foo" = "foo"))) + expect_error(spec_type_to_derivations(spec, cols = c("foo"))) + expect_error(spec_type_to_derivations(spec, cols = c("foo" = "foo"))) + expect_error(spec_type_to_codelist(spec, cols = c("foo"))) + expect_error(spec_type_to_codelist(spec, cols = c("foo" = "foo"))) }) -test_that("Check no value reader",{ - empty_val_check <- spec_to_metacore("spec_no_val.xlsx", quiet = TRUE, - where_sep_sheet = FALSE) - auto_val <- empty_val_check$value_spec %>% - filter(where != "TRUE") - expect_equal(nrow(auto_val), 0L) +test_that("Check no value reader", { + empty_val_check <- spec_to_metacore("spec_no_val.xlsx", + quiet = TRUE, + where_sep_sheet = FALSE + ) + auto_val <- empty_val_check$value_spec %>% + filter(where != "TRUE") + expect_equal(nrow(auto_val), 0L) }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 22d9b99..f2d62a1 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,25 +1,27 @@ test_df <- data.frame( - a = c("blah", NA, NA), - b = c("SpecificWord", "Incorrect", "Dummy") + a = c("blah", NA, NA), + b = c("SpecificWord", "Incorrect", "Dummy") ) test_that("check_structure function works", { # error when NA not permissible - expect_equal(check_structure(test_df, "a", is.character, FALSE, "test_df")$error, - "`a` from the `test_df` table contains missing values. Actual values are needed.") + expect_equal( + check_structure(test_df, "a", is.character, FALSE, "test_df")$error, + "`a` from the `test_df` table contains missing values. Actual values are needed." + ) - expect_equal(check_structure(test_df, "a", is.logical, TRUE, "test_df")$warning, - "test_df$a fails is.logical check" - ) + expect_equal( + check_structure(test_df, "a", is.logical, TRUE, "test_df")$warning, + "test_df$a fails is.logical check" + ) expect_null(check_structure(test_df, "b", check_words("SpecificWord", "Incorrect", "Dummy"), TRUE, "test_df")$warning) expect_warning(check_structure(test_df, "b", check_words("SpecificWord"), TRUE, "test_df")) - }) test_that("check_words creates function", { - expect_true(is.function(check_words("word"))) + expect_true(is.function(check_words("word"))) }) @@ -46,71 +48,98 @@ test_that("add labels adds NULL to missing labels", { }) test_that("metacore example returns file options", { - expect_equal(sort(metacore_example()), - sort(c("ADaM_define_CDISC_pilot3.xml", "mock_spec.xlsx", "p21_mock.xlsx", "pilot_ADaM.rda", - "pilot_SDTM.rda", "SDTM_define.xml", "SDTM_spec_CDISC_pilot.xlsx"))) + expect_equal( + sort(metacore_example()), + sort(c( + "ADaM_define_CDISC_pilot3.xml", "mock_spec.xlsx", "p21_mock.xlsx", "pilot_ADaM.rda", + "pilot_SDTM.rda", "SDTM_define.xml", "SDTM_spec_CDISC_pilot.xlsx" + )) + ) }) test_that("quiet_if_true returns expression result when quiet = FALSE", { - result <- quiet_if_true({ 1 + 1 }, quiet = FALSE) - expect_equal(result, 2) + result <- quiet_if_true( + { + 1 + 1 + }, + quiet = FALSE + ) + expect_equal(result, 2) }) test_that("quiet_if_true suppresses messages when quiet = TRUE", { - expect_silent( - quiet_if_true({ - message("this should not print") - 10 - }, quiet = TRUE) - ) + expect_silent( + quiet_if_true( + { + message("this should not print") + 10 + }, + quiet = TRUE + ) + ) }) test_that("quiet_if_true suppresses warnings when quiet = TRUE", { - expect_silent( - quiet_if_true({ - warning("this should not print") - 5 - }, quiet = TRUE) - ) + expect_silent( + quiet_if_true( + { + warning("this should not print") + 5 + }, + quiet = TRUE + ) + ) }) test_that("quiet_if_true suppresses cli output when quiet = TRUE", { - skip_if_not_installed("cli") - - expect_silent( - quiet_if_true({ - cli::cli_alert_info("cli output should not print") - cli::cli_rule("Suppressed rule") - cli::cli_bullets(c("• Bullet should be suppressed")) - 42 - }, quiet = TRUE) - ) + skip_if_not_installed("cli") + + expect_silent( + quiet_if_true( + { + cli::cli_alert_info("cli output should not print") + cli::cli_rule("Suppressed rule") + cli::cli_bullets(c("• Bullet should be suppressed")) + 42 + }, + quiet = TRUE + ) + ) }) test_that("quiet_if_true does not suppress errors when quiet = TRUE", { - expect_error( - quiet_if_true({ - stop("this error must propagate") - }, quiet = TRUE), - "this error must propagate" - ) + expect_error( + quiet_if_true( + { + stop("this error must propagate") + }, + quiet = TRUE + ), + "this error must propagate" + ) }) test_that("quiet_if_true still evaluates side-effect code when quiet = TRUE", { - env <- new.env(parent = emptyenv()) - env$x <- 0 + env <- new.env(parent = emptyenv()) + env$x <- 0 - quiet_if_true({ + quiet_if_true( + { env$x <- 99 - }, quiet = TRUE) + }, + quiet = TRUE + ) - expect_equal(env$x, 99) + expect_equal(env$x, 99) }) test_that("quiet_if_true evaluates expr normally when quiet = FALSE", { - x <- quiet_if_true({ + x <- quiet_if_true( + { message("this should print normally") 123 - }, quiet = FALSE) + }, + quiet = FALSE + ) - expect_equal(x, 123) + expect_equal(x, 123) }) diff --git a/tests/testthat/test-validators.R b/tests/testthat/test-validators.R index e597d95..80057a1 100644 --- a/tests/testthat/test-validators.R +++ b/tests/testthat/test-validators.R @@ -1,8 +1,8 @@ # where should this function go empty_df <- function(nms, fill) { - df <- as.data.frame(matrix(fill,1,length(nms))) - names(df) <- nms - return(df) + df <- as.data.frame(matrix(fill, 1, length(nms))) + names(df) <- nms + return(df) } # both of these functions only work @@ -10,121 +10,128 @@ empty_df <- function(nms, fill) { # and i think its checking the wrong thing test_that("specific words and primitive columns fail when character", { - - dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>% - setNames(c("ds_spec", - "ds_vars", - "var_spec", - "value_spec", - "derivations", - "codelist", - "supp")) - - expect_warning(do.call(check_columns, dfs)) + dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>% + setNames(c( + "ds_spec", + "ds_vars", + "var_spec", + "value_spec", + "derivations", + "codelist", + "supp" + )) + + expect_warning(do.call(check_columns, dfs)) }) test_that("NA columns fail", { - - dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = NA)) %>% - setNames(c("ds_spec", - "ds_vars", - "var_spec", - "value_spec", - "derivations", - "codelist", - "supp")) - - expect_error(do.call(check_columns, dfs)) + dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = NA)) %>% + setNames(c( + "ds_spec", + "ds_vars", + "var_spec", + "value_spec", + "derivations", + "codelist", + "supp" + )) + + expect_error(do.call(check_columns, dfs)) }) test_that("NA columns fail", { - - dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>% - setNames(c("ds_spec", - "ds_vars", - "var_spec", - "value_spec", - "derivations", - "codelist", - "supp")) - - dfs$ds_spec$label <- NA - - expect_warning(do.call(check_columns, dfs)) + dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>% + setNames(c( + "ds_spec", + "ds_vars", + "var_spec", + "value_spec", + "derivations", + "codelist", + "supp" + )) + + dfs$ds_spec$label <- NA + + expect_warning(do.call(check_columns, dfs)) }) test_that("all_message dataframe contains 6 datasets", { - expect_equal(all_message() %>% - distinct(dataset) %>% - nrow(), 7) + expect_equal(all_message() %>% + distinct(dataset) %>% + nrow(), 7) }) test_that("check cross-reference tests", { - dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>% - setNames(c("ds_spec", - "ds_vars", - "var_spec", - "value_spec", - "derivations", - "codelist", - "supp")) - - dfs$var_spec <- dfs$var_spec %>% - mutate(variable = "B") - dfs$derivations <- dfs$derivations %>% - mutate(derivation_id = "C") - dfs$codelist <- dfs$codelist %>% - mutate(code_id = "D") - expect_warning(do.call(metacore, dfs[1:7])) + dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>% + setNames(c( + "ds_spec", + "ds_vars", + "var_spec", + "value_spec", + "derivations", + "codelist", + "supp" + )) + + dfs$var_spec <- dfs$var_spec %>% + mutate(variable = "B") + dfs$derivations <- dfs$derivations %>% + mutate(derivation_id = "C") + dfs$codelist <- dfs$codelist %>% + mutate(code_id = "D") + expect_warning(do.call(metacore, dfs[1:7])) }) test_that("test for incorrect column names", { - dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = NA)) %>% - setNames(c("ds_spec", - "ds_vars", - "var_spec", - "value_spec", - "derivations", - "codelist", - "supp")) - - dfs$codelist <- dfs$codelist %>% - mutate(codelist2 = "A") - expect_warning(do.call(metacore, dfs[1:7])) + dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = NA)) %>% + setNames(c( + "ds_spec", + "ds_vars", + "var_spec", + "value_spec", + "derivations", + "codelist", + "supp" + )) + + dfs$codelist <- dfs$codelist %>% + mutate(codelist2 = "A") + expect_warning(do.call(metacore, dfs[1:7])) }) test_that("check object works", { - load(metacore_example("pilot_ADaM.rda")) - metacore %>% - is_metacore() %>% - expect_equal(TRUE) + load(metacore_example("pilot_ADaM.rda")) + metacore %>% + is_metacore() %>% + expect_equal(TRUE) - is_metacore("THIS IS NOT A THING") %>% - expect_equal(FALSE) + is_metacore("THIS IS NOT A THING") %>% + expect_equal(FALSE) }) # Test that is_DatasetMeta works as intended for various class types load(metacore_example("pilot_ADaM.rda")) test_that("is_DatasetMeta returns FALSE if a non-DatasetMeta object is supplied", { - expect_false(is_DatasetMeta(metacore)) + expect_false(is_DatasetMeta(metacore)) }) dataset_meta <- select_dataset(metacore, "ADSL", quiet = TRUE) test_that("is_DatasetMeta returns TRUE if a DatasetMeta object is supplied", { - expect_true(is_DatasetMeta(dataset_meta)) + expect_true(is_DatasetMeta(dataset_meta)) }) # Test that internal function check_DatasetMeta works as intended for various class types test_that("check_DatasetMeta throws an error if a non-Metacore object is supplied", { - expect_error(verify_DatasetMeta("DUMMY")) + expect_error(verify_DatasetMeta("DUMMY")) }) test_that("is_DatasetMeta throws an error if a non-DatasetMeta object is supplied", { - expect_error(verify_DatasetMeta(metacore)) + expect_error(verify_DatasetMeta(metacore)) }) test_that("is_DatasetMeta returns TRUE if a DatasetMeta object is supplied", { - expect_true(verify_DatasetMeta(dataset_meta)) + expect_true(verify_DatasetMeta(dataset_meta)) }) diff --git a/vignettes/Building_Specification_Readers.Rmd b/vignettes/Building_Specification_Readers.Rmd index a000ed9..37754f0 100644 --- a/vignettes/Building_Specification_Readers.Rmd +++ b/vignettes/Building_Specification_Readers.Rmd @@ -11,8 +11,8 @@ editor_options: ```{r, include = FALSE} knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" + collapse = TRUE, + comment = "#>" ) ``` @@ -66,7 +66,6 @@ But, before we start any of that, we need to read in our document using the `rea ```{r} doc <- read_all_sheets(metacore_example("mock_spec.xlsx")) doc %>% map(head) - ``` Let's start with making the ds_spec (dataset specification) table using `spec_type_to_ds_spec`. The ds_spec table is made of 3 columns: the dataset name, the dataset structure, and the dataset label. If we look at our specification document, it looks like all this information is in the Domains tab. Now we know what we need, we can start building the table by trying the `spec_type_to_ds_spec` function. @@ -88,10 +87,13 @@ If we look at the default input for cols, `"label" = "[L|l]abel|[D|d]escription" We only need the Domain `Name`, `Label`, and `Data Structure` columns. So we can update the expressions to be more specific. ```{r} -ds_spec <- spec_type_to_ds_spec(doc, - cols = c("dataset" = "Name", - "structure" = "Data Structure", - "label" = "Label")) +ds_spec <- spec_type_to_ds_spec(doc, + cols = c( + "dataset" = "Name", + "structure" = "Data Structure", + "label" = "Label" + ) +) head(ds_spec) ``` @@ -124,13 +126,19 @@ This error means it is trying to match the sheet entitled Variable, the variable ```{r} doc$Variables %>% head() -ds_vars<- spec_type_to_ds_vars(doc, cols = c("dataset" = "Domain", - "variable" = "[V|v]ariable [N|n]ame", - "order" = "[V|v]ariable [O|o]rder", - "mandatory" = "[M|m]andatory"), - key_seq_cols = c("dataset" = "Domain Name", - "key_seq" = "Key"), - sheet = "[V|v]ar|Domains") +ds_vars <- spec_type_to_ds_vars(doc, + cols = c( + "dataset" = "Domain", + "variable" = "[V|v]ariable [N|n]ame", + "order" = "[V|v]ariable [O|o]rder", + "mandatory" = "[M|m]andatory" + ), + key_seq_cols = c( + "dataset" = "Domain Name", + "key_seq" = "Key" + ), + sheet = "[V|v]ar|Domains" +) head(ds_vars) ``` @@ -152,20 +160,22 @@ The next table we have is var_spec, the table of variable level metadata. var_sp Looking back at our specification we see this will also be built using the Variable tab. So, we know we need to edit the regular expression for the variable to make it more specific. Additionally, if you look at the default for `cols` you see there is a dataset input. This is because some standards aren't 100% consistent, some variables (e.g. visit) have different lengths depending on the dataset. **So to accommodate this some of the variables in var_spec are in the ds.variable format**. These builders will do this conversion for you , but the dataset is needed. The other thing the builders can automatically deal with is the common variable. If given a dataset column, the builder function will automatically figure out which variables are common to all dataset. This is good because we don't have a common variable in our specs. ```{r} -var_spec <- spec_type_to_var_spec(doc, cols = c("variable" = "Variable Name", - "length" = "[L|l]ength", - "label" = "[L|l]abel", - "type" = "[T|t]ype", - "dataset" = "[D|d]ataset|[D|d]omain", - "format" = "Format")) +var_spec <- spec_type_to_var_spec(doc, cols = c( + "variable" = "Variable Name", + "length" = "[L|l]ength", + "label" = "[L|l]abel", + "type" = "[T|t]ype", + "dataset" = "[D|d]ataset|[D|d]omain", + "format" = "Format" +)) head(var_spec) ``` There is one issue here: the format column is also the codelist names. This is because the information came from the "Controlled Term or Format" column of my spec document. So the final step of preparing var_spec table is to remove the controlled terms. It is easy here because all the formats end in a full stop (.), but the controlled terms don't. ```{r} -var_spec <- var_spec %>% - mutate(format = if_else(str_detect(format, "\\."), format, "")) +var_spec <- var_spec %>% + mutate(format = if_else(str_detect(format, "\\."), format, "")) ``` The next dataset is value_spec, which contains the value level metadata. It is made up of 8 columns: @@ -189,28 +199,38 @@ The next dataset is value_spec, which contains the value level metadata. It is m By default, `spec_type_to_value_spec` is set up to have the where information on a different sheet because that is the format of a P21 spec, but in our spec we don't have that. In fact, we don't have any value level metadata in our spec. But, that is fine - the default builders will just pull what information it can from the variable tab. Additionally this spec doesn't have a predecessor column, so we can just use the method column. ```{r} -value_spec <- spec_type_to_value_spec(doc, cols = c("dataset" = "VLM Name|Domain", - "variable" = "VLM Name|Variable Name", - "origin" = "[O|o]rigin", - "type" = "[T|t]ype", - "code_id" = "Controlled Term", - "where" = "Parameter Code", - "derivation_id" = "Method", - "predecessor" = "Method"), - where_sep_sheet = FALSE) +value_spec <- spec_type_to_value_spec(doc, + cols = c( + "dataset" = "VLM Name|Domain", + "variable" = "VLM Name|Variable Name", + "origin" = "[O|o]rigin", + "type" = "[T|t]ype", + "code_id" = "Controlled Term", + "where" = "Parameter Code", + "derivation_id" = "Method", + "predecessor" = "Method" + ), + where_sep_sheet = FALSE +) head(value_spec) ``` The derivation table is relatively simple by comparison. It just has two columns, the derivation id and the derivation. But, the derivation comes from the supplied derivation, predecessor, or comment column depending on the origin. In this mock we don't have a predecessor column so we can set to comment as well. ```{r} -derivation <- spec_type_to_derivations(doc, cols = c("derivation_id" = "Name", - "derivation" = "[D|d]efinition|[D|d]escription"), - var_cols = c("dataset" = "Domain Name", - "variable" = "Variable Name|VLM", - "origin" = "[O|o]rigin", - "predecessor" = "Comment", - "comment" = "Comment")) +derivation <- spec_type_to_derivations(doc, + cols = c( + "derivation_id" = "Name", + "derivation" = "[D|d]efinition|[D|d]escription" + ), + var_cols = c( + "dataset" = "Domain Name", + "variable" = "Variable Name|VLM", + "origin" = "[O|o]rigin", + "predecessor" = "Comment", + "comment" = "Comment" + ) +) head(derivation) ``` @@ -219,20 +239,26 @@ The final table is codelist. This table contains all the code/decode pairs, all By default the `spec_type_to_codelist` function expects codelists and external dictionaries. But, in the specification we only have codelist so `dict_cols` needs to be set to null. ```{r} -codelist <- spec_type_to_codelist(doc, codelist_cols = c("code_id" = "Codelist Code", - "name" = "Codelist Name", - "code" = "Coded Value", - "decode" = "Decoded Value"), - simplify = TRUE, - dict_cols = NULL) +codelist <- spec_type_to_codelist(doc, + codelist_cols = c( + "code_id" = "Codelist Code", + "name" = "Codelist Name", + "code" = "Coded Value", + "decode" = "Decoded Value" + ), + simplify = TRUE, + dict_cols = NULL +) head(codelist) ``` Now we have all the tables we need we can make the metacore object ```{r} -metacore(ds_spec, ds_vars, var_spec, value_spec, - derivation, codelist) +metacore( + ds_spec, ds_vars, var_spec, value_spec, + derivation, codelist +) ``` And we're good to go! diff --git a/vignettes/Example.Rmd b/vignettes/Example.Rmd index 5bf6fa2..42be0b8 100644 --- a/vignettes/Example.Rmd +++ b/vignettes/Example.Rmd @@ -55,7 +55,7 @@ Something to note about a metacore object is that it inherently holds all data f subset <- test %>% select_dataset("DM") subset$ds_spec -# a simplified dataframe +# a simplified dataframe subset_t <- test %>% select_dataset("DM", simplify = TRUE) ```