diff --git a/.github/workflows/R_CMD_check_Hades.yml b/.github/workflows/R_CMD_check_Hades.yml index 266cc52a7..52f905c20 100644 --- a/.github/workflows/R_CMD_check_Hades.yml +++ b/.github/workflows/R_CMD_check_Hades.yml @@ -3,10 +3,11 @@ on: push: branches: - - '**' + - develop + - main pull_request: branches: - - '**' + - main name: R-CMD-check diff --git a/.github/workflows/R_CMD_check_Hades_minor.yml b/.github/workflows/R_CMD_check_Hades_minor.yml new file mode 100644 index 000000000..ab750dc70 --- /dev/null +++ b/.github/workflows/R_CMD_check_Hades_minor.yml @@ -0,0 +1,128 @@ +#Designed to be a fast github actions check - longer running actions to only run on releases +on: + pull_request: + branches: + - '**' + - '!main' + +name: R-CMD-check-minor + +jobs: + R-CMD-check-minor: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + max-parallel: 1 + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} # Does not appear to have Java 32-bit, hence the --no-multiarch + - {os: macOS-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GH_TOKEN }} + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + #CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM_SCHEMA }} + #CDM5_ORACLE_OHDSI_SCHEMA: ${{ secrets.CDM5_ORACLE_OHDSI_SCHEMA }} + #CDM5_ORACLE_PASSWORD: ${{ secrets.CDM5_ORACLE_PASSWORD }} + #CDM5_ORACLE_SERVER: ${{ secrets.CDM5_ORACLE_SERVER }} + #CDM5_ORACLE_USER: ${{ secrets.CDM5_ORACLE_USER }} + CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM_SCHEMA }} + CDM5_POSTGRESQL_OHDSI_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_OHDSI_SCHEMA }} + CDM5_POSTGRESQL_PASSWORD: ${{ secrets.CDM5_POSTGRESQL_PASSWORD }} + CDM5_POSTGRESQL_SERVER: ${{ secrets.CDM5_POSTGRESQL_SERVER }} + CDM5_POSTGRESQL_USER: ${{ secrets.CDM5_POSTGRESQL_USER }} + #CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM_SCHEMA }} + #CDM5_SQL_SERVER_OHDSI_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_OHDSI_SCHEMA }} + #CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }} + #CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }} + #CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }} + #CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM_SCHEMA }} + #CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }} + #CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }} + #CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }} + #CDM5_REDSHIFT_USER: ${{ secrets.CDM5_REDSHIFT_USER }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v1 + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-tinytex@v1 + + - uses: r-lib/actions/setup-pandoc@v1 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install system dependencies + if: runner.os == 'Linux' + run: | + while read -r cmd + do + eval sudo $cmd + done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') + + - name: Install libssh + if: runner.os == 'Linux' + run: | + sudo apt-get install libssh-dev + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE, INSTALL_opts=c("--no-multiarch")) + remotes::install_cran("rcmdcheck") + shell: Rscript {0} + + - name: Install covr + if: runner.os == 'macOS' + run: | + remotes::install_cran("covr") + shell: Rscript {0} + + - name: Remove check folder if exists + if: runner.os == 'macOS' + run: unlink("check", recursive = TRUE) + shell: Rscript {0} + + - name: Check + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--no-multiarch"), build_args = c("--no-manual"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@v2 + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check + + - name: Upload source package + if: success() && runner.os == 'macOS' && github.event_name != 'pull_request' && github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v2 + with: + name: package_tarball + path: check/*.tar.gz + + - name: Test coverage + if: runner.os == 'macOS' + run: covr::codecov() + shell: Rscript {0} diff --git a/DESCRIPTION b/DESCRIPTION index b6714de50..a64d41011 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: CohortDiagnostics Type: Package Title: Diagnostics for OHDSI Cohorts -Version: 3.1.1 -Date: 2022-07-20 +Version: 3.1.2 +Date: 2022-12-19 Authors@R: c( person("Gowtham", "Rao", email = "rao@ohdsi.org", role = c("aut", "cre")), person("Martijn", "Schuemie", email = "schuemie@ohdsi.org", role = c("aut")), diff --git a/NEWS.md b/NEWS.md index 30681ca75..7c5d05a72 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +CohortDiagnostics 3.1.2 +======================= +Bug Fixes: + +1. Removed package dependency snapshot capture as it was breaking on newer versions of R + + CohortDiagnostics 3.1.1 ======================= Changes: diff --git a/R/CohortCharacterizationDiagnostics.R b/R/CohortCharacterizationDiagnostics.R index e55132e98..690c81905 100644 --- a/R/CohortCharacterizationDiagnostics.R +++ b/R/CohortCharacterizationDiagnostics.R @@ -48,12 +48,15 @@ getCohortCharacteristics <- function(connectionDetails = NULL, covariateSettings = covariateSettings, aggregated = TRUE ) - }) + } + ) populationSize <- attr(x = featureExtractionOutput, which = "metaData")$populationSize populationSize <- - dplyr::tibble(cohortId = names(populationSize) %>% as.numeric(), - populationSize = populationSize) + dplyr::tibble( + cohortId = names(populationSize) %>% as.numeric(), + populationSize = populationSize + ) if (!"analysisRef" %in% names(results)) { results$analysisRef <- featureExtractionOutput$analysisRef @@ -82,8 +85,8 @@ getCohortCharacteristics <- function(connectionDetails = NULL, dplyr::mutate(p = sumValue / populationSize) if (nrow(covariates %>% - dplyr::filter(p > 1) %>% - dplyr::collect()) > 0) { + dplyr::filter(p > 1) %>% + dplyr::collect()) > 0) { stop( paste0( "During characterization, population size (denominator) was found to be smaller than features Value (numerator).", @@ -98,37 +101,37 @@ getCohortCharacteristics <- function(connectionDetails = NULL, dplyr::rename(mean = averageValue) %>% dplyr::select(-populationSize) - if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) { - covariates <- covariates %>% - dplyr::select( - cohortId, - timeId, - covariateId, - sumValue, - mean, - sd - ) - if (length(is.na(covariates$timeId)) > 0) { - covariates[is.na(covariates$timeId),]$timeId <- -1 - } - } else { - covariates <- covariates %>% - dplyr::mutate(timeId = 0) %>% - dplyr::select( - cohortId, - timeId, - covariateId, - sumValue, - mean, - sd - ) - } - if ("covariates" %in% names(results)) { - Andromeda::appendToTable(results$covariates, covariates) - } else { - results$covariates <- covariates + if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) { + covariates <- covariates %>% + dplyr::select( + cohortId, + timeId, + covariateId, + sumValue, + mean, + sd + ) + if (length(is.na(covariates$timeId)) > 0) { + covariates[is.na(covariates$timeId), ]$timeId <- -1 } + } else { + covariates <- covariates %>% + dplyr::mutate(timeId = 0) %>% + dplyr::select( + cohortId, + timeId, + covariateId, + sumValue, + mean, + sd + ) } + if ("covariates" %in% names(results)) { + Andromeda::appendToTable(results$covariates, covariates) + } else { + results$covariates <- covariates + } + } if ("covariatesContinuous" %in% names(featureExtractionOutput) && dplyr::pull(dplyr::count(featureExtractionOutput$covariatesContinuous)) > 0) { @@ -151,12 +154,14 @@ getCohortCharacteristics <- function(connectionDetails = NULL, sd ) if (length(is.na(covariates$timeId)) > 0) { - covariates[is.na(covariates$timeId),]$timeId <- -1 + covariates[is.na(covariates$timeId), ]$timeId <- -1 } } else { covariates <- covariates %>% - dplyr::mutate(sumValue = -1, - timeId = 0) %>% + dplyr::mutate( + sumValue = -1, + timeId = 0 + ) %>% dplyr::select( cohortId, timeId, @@ -179,10 +184,12 @@ getCohortCharacteristics <- function(connectionDetails = NULL, } delta <- Sys.time() - startTime - ParallelLogger::logInfo("Cohort characterization took ", - signif(delta, 3), - " ", - attr(delta, "units")) + ParallelLogger::logInfo( + "Cohort characterization took ", + signif(delta, 3), + " ", + attr(delta, "units") + ) return(results) } @@ -221,9 +228,10 @@ executeCohortCharacterization <- function(connection, ) if (!incremental) { - for (outputFile in c(covariateValueFileName, covariateValueContFileName, - covariateRefFileName, analysisRefFileName, timeRefFileName)) { - + for (outputFile in c( + covariateValueFileName, covariateValueContFileName, + covariateRefFileName, analysisRefFileName, timeRefFileName + )) { if (file.exists(outputFile)) { ParallelLogger::logInfo("Not in incremental mode - Removing file", outputFile, " and replacing") unlink(outputFile) @@ -309,10 +317,12 @@ executeCohortCharacterization <- function(connection, } } delta <- Sys.time() - startCohortCharacterization - ParallelLogger::logInfo("Running ", - jobName, - " took", - signif(delta, 3), - " ", - attr(delta, "units")) + ParallelLogger::logInfo( + "Running ", + jobName, + " took", + signif(delta, 3), + " ", + attr(delta, "units") + ) } diff --git a/R/CohortRelationship.R b/R/CohortRelationship.R index 00d10af4c..9ee7e252e 100644 --- a/R/CohortRelationship.R +++ b/R/CohortRelationship.R @@ -122,7 +122,7 @@ runCohortRelationshipDiagnostics <- package = utils::packageName() ) ) - + DatabaseConnector::renderTranslateExecuteSql( connection = connection, tempEmulationSchema = tempEmulationSchema, @@ -135,7 +135,7 @@ runCohortRelationshipDiagnostics <- cohort_database_schema = cohortDatabaseSchema, cohort_table = cohortTable ) - + DatabaseConnector::renderTranslateQuerySqlToAndromeda( connection = connection, tempEmulationSchema = tempEmulationSchema, @@ -210,13 +210,17 @@ executeCohortRelationshipDiagnostics <- function(connection, allCohortIds <- cohortDefinitionSet %>% dplyr::select(cohortId, checksum) %>% - dplyr::rename(targetCohortId = cohortId, - targetChecksum = checksum) %>% + dplyr::rename( + targetCohortId = cohortId, + targetChecksum = checksum + ) %>% dplyr::distinct() combinationsOfPossibleCohortRelationships <- allCohortIds %>% tidyr::crossing(allCohortIds %>% - dplyr::rename(comparatorCohortId = targetCohortId, - comparatorChecksum = targetChecksum)) %>% + dplyr::rename( + comparatorCohortId = targetCohortId, + comparatorChecksum = targetChecksum + )) %>% dplyr::filter(targetCohortId != comparatorCohortId) %>% dplyr::arrange(targetCohortId, comparatorCohortId) %>% dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum)) @@ -230,7 +234,7 @@ executeCohortRelationshipDiagnostics <- function(connection, if (nrow(subset) > 0) { if (incremental && - (nrow(cohortDefinitionSet) - (length(subset$targetCohortId %>% unique()))) > 0) { + (nrow(cohortDefinitionSet) - (length(subset$targetCohortId %>% unique()))) > 0) { ParallelLogger::logInfo( sprintf( " - Skipping %s target cohorts in incremental mode because the relationships has already been computed with other cohorts.", @@ -240,12 +244,12 @@ executeCohortRelationshipDiagnostics <- function(connection, } if (incremental && - (nrow(combinationsOfPossibleCohortRelationships) - ( - nrow( - combinationsOfPossibleCohortRelationships %>% + (nrow(combinationsOfPossibleCohortRelationships) - ( + nrow( + combinationsOfPossibleCohortRelationships %>% dplyr::filter(targetCohortId %in% c(subset$targetCohortId)) - ) - )) > 0) { + ) + )) > 0) { ParallelLogger::logInfo( sprintf( " - Skipping %s combinations in incremental mode because these were previously computed.", @@ -333,7 +337,7 @@ executeCohortRelationshipDiagnostics <- function(connection, timeExecution( exportFolder, "runCohortRelationshipDiagnostics", - c(subset[start:end,]$targetCohortId %>% unique(), subset[start:end,]$comparatorCohortId %>% unique()), + c(subset[start:end, ]$targetCohortId %>% unique(), subset[start:end, ]$comparatorCohortId %>% unique()), parent = "executeCohortRelationshipDiagnostics", expr = { output <- @@ -342,10 +346,12 @@ executeCohortRelationshipDiagnostics <- function(connection, cohortDatabaseSchema = cohortDatabaseSchema, tempEmulationSchema = tempEmulationSchema, cohortTable = cohortTable, - targetCohortIds = subset[start:end,]$targetCohortId %>% unique(), - comparatorCohortIds = subset[start:end,]$comparatorCohortId %>% unique(), - relationshipDays = dplyr::tibble(startDay = temporalStartDays, - endDay = temporalEndDays) + targetCohortIds = subset[start:end, ]$targetCohortId %>% unique(), + comparatorCohortIds = subset[start:end, ]$comparatorCohortId %>% unique(), + relationshipDays = dplyr::tibble( + startDay = temporalStartDays, + endDay = temporalEndDays + ) ) } ) @@ -364,26 +370,28 @@ executeCohortRelationshipDiagnostics <- function(connection, ) recordTasksDone( - cohortId = subset[start:end,]$targetCohortId, - comparatorId = subset[start:end,]$comparatorCohortId, - targetChecksum = subset[start:end,]$targetChecksum, - comparatorChecksum = subset[start:end,]$comparatorChecksum, + cohortId = subset[start:end, ]$targetCohortId, + comparatorId = subset[start:end, ]$comparatorCohortId, + targetChecksum = subset[start:end, ]$targetChecksum, + comparatorChecksum = subset[start:end, ]$comparatorChecksum, task = "runCohortRelationship", - checksum = subset[start:end,]$checksum, + checksum = subset[start:end, ]$checksum, recordKeepingFile = recordKeepingFile, incremental = incremental ) deltaIteration <- Sys.time() - startCohortRelationship - ParallelLogger::logInfo(" - Running Cohort Relationship iteration with batchsize ", - batchSize, - " from row number ", - start, - " to ", - end, - " took ", - signif(deltaIteration, 3), - " ", - attr(deltaIteration, "units")) + ParallelLogger::logInfo( + " - Running Cohort Relationship iteration with batchsize ", + batchSize, + " from row number ", + start, + " to ", + end, + " took ", + signif(deltaIteration, 3), + " ", + attr(deltaIteration, "units") + ) } } else { ParallelLogger::logInfo(" - Skipping in incremental mode.") diff --git a/R/ConceptSets.R b/R/ConceptSets.R index 10c42f91f..36d63fae9 100644 --- a/R/ConceptSets.R +++ b/R/ConceptSets.R @@ -125,7 +125,7 @@ combineConceptSetsFromCohorts <- function(cohorts) { conceptSetCounter <- 0 for (i in (1:nrow(cohorts))) { - cohort <- cohorts[i,] + cohort <- cohorts[i, ] sql <- extractConceptSetsSqlFromCohortSql(cohortSql = cohort$sql) json <- @@ -197,13 +197,13 @@ mergeTempTables <- ) sql <- SqlRender::translate(sql, - targetDialect = connection@dbms, - tempEmulationSchema = tempEmulationSchema + targetDialect = connection@dbms, + tempEmulationSchema = tempEmulationSchema ) DatabaseConnector::executeSql(connection, - sql, - progressBar = FALSE, - reportOverallTime = FALSE + sql, + progressBar = FALSE, + reportOverallTime = FALSE ) # Drop temp tables: @@ -212,13 +212,13 @@ mergeTempTables <- sprintf("TRUNCATE TABLE %s;\nDROP TABLE %s;", tempTable, tempTable) sql <- SqlRender::translate(sql, - targetDialect = connection@dbms, - tempEmulationSchema = tempEmulationSchema + targetDialect = connection@dbms, + tempEmulationSchema = tempEmulationSchema ) DatabaseConnector::executeSql(connection, - sql, - progressBar = FALSE, - reportOverallTime = FALSE + sql, + progressBar = FALSE, + reportOverallTime = FALSE ) } } @@ -261,13 +261,13 @@ instantiateUniqueConceptSets <- function(uniqueConceptSets, sqlSubset <- SqlRender::render(sqlSubset, vocabulary_database_schema = vocabularyDatabaseSchema) sqlSubset <- SqlRender::translate(sqlSubset, - targetDialect = connection@dbms, - tempEmulationSchema = tempEmulationSchema + targetDialect = connection@dbms, + tempEmulationSchema = tempEmulationSchema ) DatabaseConnector::executeSql(connection, - sqlSubset, - progressBar = FALSE, - reportOverallTime = FALSE + sqlSubset, + progressBar = FALSE, + reportOverallTime = FALSE ) } utils::setTxtProgressBar(pb, 1) @@ -298,7 +298,7 @@ getCodeSetIds <- function(criterionList) { return(NULL) } else { return(dplyr::tibble(domain = names(criterionList), codeSetIds = codeSetIds) - %>% filter(!is.na(codeSetIds))) + %>% filter(!is.na(codeSetIds))) } } @@ -386,8 +386,8 @@ runConceptSetDiagnostics <- function(connection, ) uniqueConceptSets <- - conceptSets[!duplicated(conceptSets$uniqueConceptSetId),] %>% - dplyr::select(-cohortId, -conceptSetId) + conceptSets[!duplicated(conceptSets$uniqueConceptSetId), ] %>% + dplyr::select(-cohortId, -conceptSetId) timeExecution( exportFolder, @@ -403,11 +403,11 @@ runConceptSetDiagnostics <- function(connection, tempEmulationSchema = tempEmulationSchema, conceptSetsTable = "#inst_concept_sets" ) - }) + } + ) if ((runIncludedSourceConcepts && nrow(subsetIncluded) > 0) || (runOrphanConcepts && nrow(subsetOrphans) > 0)) { - timeExecution( exportFolder, taskName = "createConceptCountsTable", @@ -422,7 +422,8 @@ runConceptSetDiagnostics <- function(connection, conceptCountsTable = conceptCountsTable, conceptCountsTableIsTemp = conceptCountsTableIsTemp ) - }) + } + ) } if (runIncludedSourceConcepts) { timeExecution( @@ -431,7 +432,6 @@ runConceptSetDiagnostics <- function(connection, cohortIds = NULL, parent = "runConceptSetDiagnostics", expr = { - # Included concepts ------------------------------------------------------------------ ParallelLogger::logInfo("Fetching included source concepts") # TODO: Disregard empty cohorts in tally: @@ -465,7 +465,7 @@ runConceptSetDiagnostics <- function(connection, tempEmulationSchema = tempEmulationSchema, snakeCaseToCamelCase = TRUE ) %>% - tidyr::tibble() + tidyr::tibble() counts <- counts %>% dplyr::rename(uniqueConceptSetId = conceptSetId) %>% @@ -560,7 +560,8 @@ runConceptSetDiagnostics <- function(connection, )) } } - }) + } + ) } if (runBreakdownIndexEvents) { @@ -595,7 +596,6 @@ runConceptSetDiagnostics <- function(connection, cohortIds = cohort$cohortId, parent = "runConceptSetDiagnostics", expr = { - cohortDefinition <- RJSONIO::fromJSON(cohort$json, digits = 23) primaryCodesetIds <- @@ -603,7 +603,7 @@ runConceptSetDiagnostics <- function(connection, cohortDefinition$PrimaryCriteria$CriteriaList, getCodeSetIds ) %>% - dplyr::bind_rows() + dplyr::bind_rows() if (nrow(primaryCodesetIds) == 0) { warning( "No primary event criteria concept sets found for cohort id: ", @@ -612,13 +612,13 @@ runConceptSetDiagnostics <- function(connection, return(tidyr::tibble()) } primaryCodesetIds <- primaryCodesetIds %>% dplyr::filter(domain %in% - c(domains$domain %>% unique())) + c(domains$domain %>% unique())) if (nrow(primaryCodesetIds) == 0) { warning( "Primary event criteria concept sets found for cohort id: ", cohort$cohortId, " but,", "\nnone of the concept sets belong to the supported domains.", "\nThe supported domains are:\n", paste(domains$domain, - collapse = ", " + collapse = ", " ) ) return(tidyr::tibble()) @@ -643,7 +643,7 @@ runConceptSetDiagnostics <- function(connection, primaryCodesetIds <- dplyr::bind_rows(primaryCodesetIds) getCounts <- function(row) { - domain <- domains[domains$domain == row$domain,] + domain <- domains[domains$domain == row$domain, ] sql <- SqlRender::loadRenderTranslateSql( "CohortEntryBreakdown.sql", @@ -681,7 +681,7 @@ runConceptSetDiagnostics <- function(connection, store_table = "#breakdown", snakeCaseToCamelCase = TRUE ) %>% - tidyr::tibble() + tidyr::tibble() if (!is.null(conceptIdTable)) { sql <- "INSERT INTO @concept_id_table (concept_id) SELECT DISTINCT concept_id @@ -711,8 +711,8 @@ runConceptSetDiagnostics <- function(connection, counts <- lapply(split(primaryCodesetIds, 1:nrow(primaryCodesetIds)), getCounts) %>% - dplyr::bind_rows() %>% - dplyr::arrange(conceptCount) + dplyr::bind_rows() %>% + dplyr::arrange(conceptCount) if (nrow(counts) > 0) { counts$cohortId <- cohort$cohortId @@ -724,7 +724,8 @@ runConceptSetDiagnostics <- function(connection, return(dplyr::tibble()) } return(counts) - }) + } + ) } data <- @@ -794,7 +795,7 @@ runConceptSetDiagnostics <- function(connection, # [OPTIMIZATION idea] can we modify the sql to do this for all uniqueConceptSetId in one query using group by? data <- list() for (i in (1:nrow(uniqueConceptSets))) { - conceptSet <- uniqueConceptSets[i,] + conceptSet <- uniqueConceptSets[i, ] ParallelLogger::logInfo( "- Finding orphan concepts for concept set '", conceptSet$conceptSetName, @@ -807,7 +808,6 @@ runConceptSetDiagnostics <- function(connection, parent = "runConceptSetDiagnostics", cohortIds = paste("concept_set-", conceptSet$name), expr = { - data[[i]] <- .findOrphanConcepts( connection = connection, cdmDatabaseSchema = cdmDatabaseSchema, @@ -835,7 +835,8 @@ runConceptSetDiagnostics <- function(connection, reportOverallTime = FALSE ) } - }) + } + ) sql <- "TRUNCATE TABLE @orphan_concept_table;\nDROP TABLE @orphan_concept_table;" DatabaseConnector::renderTranslateExecuteSql( @@ -940,16 +941,16 @@ runConceptSetDiagnostics <- function(connection, tempEmulationSchema = tempEmulationSchema, snakeCaseToCamelCase = TRUE ) %>% - dplyr::tibble() %>% - dplyr::rename(uniqueConceptSetId = codesetId) %>% - dplyr::inner_join(conceptSets, - by = "uniqueConceptSetId" - ) %>% - dplyr::select( - cohortId, - conceptSetId, - conceptId - ) + dplyr::tibble() %>% + dplyr::rename(uniqueConceptSetId = codesetId) %>% + dplyr::inner_join(conceptSets, + by = "uniqueConceptSetId" + ) %>% + dplyr::select( + cohortId, + conceptSetId, + conceptId + ) resolvedConceptIds <- makeDataExportable( x = resolvedConceptIds, diff --git a/R/DataSourceInformation.R b/R/DataSourceInformation.R index 72f66db48..de9e2b785 100644 --- a/R/DataSourceInformation.R +++ b/R/DataSourceInformation.R @@ -49,9 +49,11 @@ getCdmDataSourceInformation <- } } - if (!DatabaseConnector::existsTable(connection = connection, - databaseSchema = cdmDatabaseSchema, - tableName = "cdm_source")) { + if (!DatabaseConnector::existsTable( + connection = connection, + databaseSchema = cdmDatabaseSchema, + tableName = "cdm_source" + )) { warning("CDM Source table not found in CDM. Metadata on CDM source will be limited.") return(NULL) } @@ -90,9 +92,10 @@ getCdmDataSourceInformation <- sourceReleaseDate <- as.Date(NA) if ("sourceReleaseDate" %in% colnames(cdmDataSource)) { if (class(cdmDataSource$sourceReleaseDate) != "Date") { - try(sourceReleaseDate <- - max(as.Date(cdmDataSource$sourceReleaseDate)), - silent = TRUE + try( + sourceReleaseDate <- + max(as.Date(cdmDataSource$sourceReleaseDate)), + silent = TRUE ) } else { sourceReleaseDate <- max(as.Date(cdmDataSource$sourceReleaseDate)) diff --git a/R/ExportCharacterization.R b/R/ExportCharacterization.R index ead0585b8..d4d344ed7 100644 --- a/R/ExportCharacterization.R +++ b/R/ExportCharacterization.R @@ -55,7 +55,8 @@ exportCharacterization <- function(characteristics, sd = round(sd, digits = 4) ) %>% dplyr::select(-cohortEntries, -cohortSubjects) %>% - dplyr::distinct() %>% makeDataExportable( + dplyr::distinct() %>% + makeDataExportable( tableName = "temporal_covariate_value", minCellCount = minCellCount, databaseId = databaseId @@ -74,7 +75,7 @@ exportCharacterization <- function(characteristics, covariateId = covariateRef$covariateId ) - analysisRef <- makeDataExportable( + analysisRef <- makeDataExportable( x = characteristics$analysisRef, tableName = "temporal_analysis_ref", minCellCount = minCellCount @@ -109,12 +110,12 @@ exportCharacterization <- function(characteristics, if (!"covariatesContinuous" %in% names(characteristics)) { ParallelLogger::logInfo("No continuous characterization output for submitted cohorts") } else if (dplyr::pull(dplyr::count(characteristics$covariateRef)) > 0) { - characteristics$filteredCovariatesContinous <- makeDataExportable( - x = characteristics$covariatesContinuous, - tableName = "temporal_covariate_value_dist", - minCellCount = minCellCount, - databaseId = databaseId - ) + characteristics$filteredCovariatesContinous <- makeDataExportable( + x = characteristics$covariatesContinuous, + tableName = "temporal_covariate_value_dist", + minCellCount = minCellCount, + databaseId = databaseId + ) if (dplyr::pull(dplyr::count(characteristics$filteredCovariatesContinous)) > 0) { writeToCsv( diff --git a/R/InclusionRules.R b/R/InclusionRules.R index 21e08056e..5fb8dbea0 100644 --- a/R/InclusionRules.R +++ b/R/InclusionRules.R @@ -44,21 +44,23 @@ getInclusionStats <- function(connection, ParallelLogger::logInfo("Exporting inclusion rules with CohortGenerator") timeExecution(exportFolder, - "getInclusionStatsCohortGenerator", - parent = "getInclusionStats", - expr = - { - CohortGenerator::insertInclusionRuleNames( - connection = connection, - cohortDefinitionSet = subset, - cohortDatabaseSchema = cohortDatabaseSchema, - cohortInclusionTable = cohortTableNames$cohortInclusionTable - ) + "getInclusionStatsCohortGenerator", + parent = "getInclusionStats", + expr = { + CohortGenerator::insertInclusionRuleNames( + connection = connection, + cohortDefinitionSet = subset, + cohortDatabaseSchema = cohortDatabaseSchema, + cohortInclusionTable = cohortTableNames$cohortInclusionTable + ) - stats <- CohortGenerator::getCohortStats(connection = connection, - cohortTableNames = cohortTableNames, - cohortDatabaseSchema = cohortDatabaseSchema) - }) + stats <- CohortGenerator::getCohortStats( + connection = connection, + cohortTableNames = cohortTableNames, + cohortDatabaseSchema = cohortDatabaseSchema + ) + } + ) if (!is.null(stats)) { if ("cohortInclusionTable" %in% (names(stats))) { cohortInclusion <- makeDataExportable( diff --git a/R/Incremental.R b/R/Incremental.R index a54246030..34373f55b 100644 --- a/R/Incremental.R +++ b/R/Incremental.R @@ -179,7 +179,6 @@ writeToCsv.default <- function(data, fileName, incremental = FALSE, ...) { delim = "," ) } - } writeToCsv.tbl_Andromeda <- @@ -210,8 +209,8 @@ writeToCsv.tbl_Andromeda <- addChunk <- function(chunk) { if ("timeId" %in% colnames(chunk)) { - if (nrow(chunk[is.na(chunk$timeId),]) > 0) { - chunk[is.na(chunk$timeId),]$timeId <- 0 + if (nrow(chunk[is.na(chunk$timeId), ]) > 0) { + chunk[is.na(chunk$timeId), ]$timeId <- 0 } } else { chunk$timeId <- 0 @@ -266,7 +265,7 @@ saveIncremental <- function(data, fileName, ...) { lazy = FALSE ) if ((nrow(previousData)) > 0) { - if("database_id" %in% colnames(previousData)) { + if ("database_id" %in% colnames(previousData)) { previousData$database_id <- as.character(previousData$database_id) } diff --git a/R/Private.R b/R/Private.R index a1ed136e4..188b074ea 100644 --- a/R/Private.R +++ b/R/Private.R @@ -324,11 +324,13 @@ timeExecution <- function(exportFolder, eval(expr) execTime <- Sys.time() - start } - executionTimes <- data.frame(task = taskName, - startTime = start, - cohortIds = paste(cohortIds, collapse = ";"), - executionTime = execTime, - parent = paste(parent, collapse = "")) + executionTimes <- data.frame( + task = taskName, + startTime = start, + cohortIds = paste(cohortIds, collapse = ";"), + executionTime = execTime, + parent = paste(parent, collapse = "") + ) readr::write_csv(executionTimes, file = executionTimePath, append = file.exists(executionTimePath)) return(executionTimes) diff --git a/R/ResultsDataModel.R b/R/ResultsDataModel.R index 90b50118f..c908ab3ef 100644 --- a/R/ResultsDataModel.R +++ b/R/ResultsDataModel.R @@ -49,7 +49,7 @@ fixTableMetadataForBackwardCompatibility <- function(table, tableName) { if (!"metadata" %in% colnames(table)) { data <- list() for (i in (1:nrow(table))) { - data[[i]] <- table[i,] + data[[i]] <- table[i, ] colnamesDf <- colnames(data[[i]]) metaDataList <- list() for (j in (1:length(colnamesDf))) { @@ -99,7 +99,7 @@ checkFixColumnNames <- expectedNames <- tableSpecs %>% dplyr::select(columnName) %>% dplyr::anti_join(dplyr::filter(optionalNames, !columnName %in% observeredNames), - by = "columnName" + by = "columnName" ) %>% dplyr::arrange(columnName) %>% dplyr::pull() @@ -206,7 +206,7 @@ checkAndFixDuplicateRows <- specifications = getResultsDataModelSpecifications()) { primaryKeys <- specifications %>% dplyr::filter(tableName == !!tableName & - primaryKey == "Yes") %>% + primaryKey == "Yes") %>% dplyr::select(columnName) %>% dplyr::pull() duplicatedRows <- duplicated(table[, primaryKeys]) @@ -219,7 +219,7 @@ checkAndFixDuplicateRows <- sum(duplicatedRows) ) ) - return(table[!duplicatedRows,]) + return(table[!duplicatedRows, ]) } else { return(table) } @@ -233,7 +233,7 @@ appendNewRows <- if (nrow(data) > 0) { primaryKeys <- specifications %>% dplyr::filter(tableName == !!tableName & - primaryKey == "Yes") %>% + primaryKey == "Yes") %>% dplyr::select(columnName) %>% dplyr::pull() newData <- newData %>% @@ -245,14 +245,18 @@ appendNewRows <- # Private function for testing migrations in isolation .createDataModel <- function(connection, databaseSchema, tablePrefix) { sqlParams <- getPrefixedTableNames(tablePrefix) - sql <- do.call(SqlRender::loadRenderTranslateSql, - c(sqlParams, - list( - sqlFilename = "CreateResultsDataModel.sql", - packageName = utils::packageName(), - dbms = connection@dbms, - results_schema = databaseSchema - ))) + sql <- do.call( + SqlRender::loadRenderTranslateSql, + c( + sqlParams, + list( + sqlFilename = "CreateResultsDataModel.sql", + packageName = utils::packageName(), + dbms = connection@dbms, + results_schema = databaseSchema + ) + ) + ) DatabaseConnector::executeSql(connection, sql) } @@ -276,9 +280,11 @@ createResultsDataModel <- function(connectionDetails = NULL, on.exit(DatabaseConnector::disconnect(connection)) .createDataModel(connection, databaseSchema, tablePrefix) - migrateDataModel(connectionDetails = connectionDetails, - databaseSchema = databaseSchema, - tablePrefix = tablePrefix) + migrateDataModel( + connectionDetails = connectionDetails, + databaseSchema = databaseSchema, + tablePrefix = tablePrefix + ) } naToEmpty <- function(x) { @@ -360,7 +366,7 @@ uploadResults <- function(connectionDetails, primaryKey <- specifications %>% filter(tableName == !!tableName & - primaryKey == "Yes") %>% + primaryKey == "Yes") %>% select(columnName) %>% pull() @@ -459,8 +465,8 @@ uploadResults <- function(connectionDetails, if (!is.null(env$primaryKeyValuesInDb)) { primaryKeyValuesInChunk <- unique(chunk[env$primaryKey]) duplicates <- inner_join(env$primaryKeyValuesInDb, - primaryKeyValuesInChunk, - by = env$primaryKey + primaryKeyValuesInChunk, + by = env$primaryKey ) if (nrow(duplicates) != 0) { if ("database_id" %in% env$primaryKey || @@ -527,7 +533,6 @@ uploadResults <- function(connectionDetails, deleteFromServer <- function(connection, schema, tableName, keyValues, tablePrefix) { - createSqlStatement <- function(i) { sql <- paste0( "DELETE FROM ", @@ -537,7 +542,7 @@ deleteFromServer <- tableName, "\nWHERE ", paste(paste0( - colnames(keyValues), " = '", keyValues[i,], "'" + colnames(keyValues), " = '", keyValues[i, ], "'" ), collapse = " AND "), ";" ) @@ -591,9 +596,9 @@ deleteAllRecordsForDatabaseId <- function(connection, database_id = databaseId, ) DatabaseConnector::renderTranslateExecuteSql(connection, - sql, - progressBar = FALSE, - reportOverallTime = FALSE + sql, + progressBar = FALSE, + reportOverallTime = FALSE ) } } @@ -615,10 +620,11 @@ migrateDataModel <- function(connectionDetails, databaseSchema, tablePrefix = "" ParallelLogger::logInfo("Updating version number") updateVersionSql <- SqlRender::loadRenderTranslateSql("UpdateVersionNumber.sql", - packageName = utils::packageName(), - database_schema = databaseSchema, - table_prefix = tablePrefix, - dbms = connectionDetails$dbms) + packageName = utils::packageName(), + database_schema = databaseSchema, + table_prefix = tablePrefix, + dbms = connectionDetails$dbms + ) connection <- DatabaseConnector::connect(connectionDetails = connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) @@ -638,9 +644,11 @@ migrateDataModel <- function(connectionDetails, databaseSchema, tablePrefix = "" #' @returns Instance of ResultModelManager::DataMigrationManager that has interface for converting existing data models #' @export getDataMigrator <- function(connectionDetails, databaseSchema, tablePrefix = "") { - ResultModelManager::DataMigrationManager$new(connectionDetails = connectionDetails, - databaseSchema = databaseSchema, - tablePrefix = tablePrefix, - migrationPath = "migrations", - packageName = utils::packageName()) + ResultModelManager::DataMigrationManager$new( + connectionDetails = connectionDetails, + databaseSchema = databaseSchema, + tablePrefix = tablePrefix, + migrationPath = "migrations", + packageName = utils::packageName() + ) } diff --git a/R/RunDiagnostics.R b/R/RunDiagnostics.R index c2e2b1236..0746b4175 100644 --- a/R/RunDiagnostics.R +++ b/R/RunDiagnostics.R @@ -227,12 +227,7 @@ executeDiagnostics <- function(cohortDefinitionSet, incremental = callingArgs$incremental, temporalCovariateSettings = callingArgs$temporalCovariateSettings ) %>% - RJSONIO::toJSON(digits = 23, pretty = TRUE) - - # take package dependency snapshot - packageDependencySnapShotJson <- - takepackageDependencySnapshot() %>% - RJSONIO::toJSON(digits = 23, pretty = TRUE) + RJSONIO::toJSON(digits = 23, pretty = TRUE) exportFolder <- normalizePath(exportFolder, mustWork = FALSE) incrementalFolder <- normalizePath(incrementalFolder, mustWork = FALSE) @@ -253,25 +248,25 @@ executeDiagnostics <- function(cohortDefinitionSet, errorMessage <- checkmate::makeAssertCollection() checkmate::assertList(cohortTableNames, null.ok = FALSE, types = "character", add = errorMessage, names = "named") checkmate::assertNames(names(cohortTableNames), - must.include = c( - "cohortTable", - "cohortInclusionTable", - "cohortInclusionResultTable", - "cohortInclusionStatsTable", - "cohortSummaryStatsTable", - "cohortCensorStatsTable" - ), - add = errorMessage + must.include = c( + "cohortTable", + "cohortInclusionTable", + "cohortInclusionResultTable", + "cohortInclusionStatsTable", + "cohortSummaryStatsTable", + "cohortCensorStatsTable" + ), + add = errorMessage ) checkmate::assertDataFrame(cohortDefinitionSet, add = errorMessage) checkmate::assertNames(names(cohortDefinitionSet), - must.include = c( - "json", - "cohortId", - "cohortName", - "sql" - ), - add = errorMessage + must.include = c( + "json", + "cohortId", + "cohortName", + "sql" + ), + add = errorMessage ) cohortTable <- cohortTableNames$cohortTable @@ -362,26 +357,32 @@ executeDiagnostics <- function(cohortDefinitionSet, # All temporal covariate settings objects must be covariateSettings checkmate::assert_true(all(lapply(temporalCovariateSettings, class) == c("covariateSettings")), add = errorMessage) - requiredCharacterisationSettings <- c("DemographicsGender", "DemographicsAgeGroup", "DemographicsRace", - "DemographicsEthnicity", "DemographicsIndexYear", "DemographicsIndexMonth", - "ConditionEraGroupOverlap", "DrugEraGroupOverlap", "CharlsonIndex", - "Chads2", "Chads2Vasc") + requiredCharacterisationSettings <- c( + "DemographicsGender", "DemographicsAgeGroup", "DemographicsRace", + "DemographicsEthnicity", "DemographicsIndexYear", "DemographicsIndexMonth", + "ConditionEraGroupOverlap", "DrugEraGroupOverlap", "CharlsonIndex", + "Chads2", "Chads2Vasc" + ) presentSettings <- temporalCovariateSettings[[1]][requiredCharacterisationSettings] if (!all(unlist(presentSettings))) { warning( "For cohort charcterization to display standardized results the following covariates must be present in your temporalCovariateSettings: \n\n", - paste(requiredCharacterisationSettings, collapse = ", ")) + paste(requiredCharacterisationSettings, collapse = ", ") + ) } - requiredTimeDistributionSettings <- c("DemographicsPriorObservationTime", - "DemographicsPostObservationTime", - "DemographicsTimeInCohort") + requiredTimeDistributionSettings <- c( + "DemographicsPriorObservationTime", + "DemographicsPostObservationTime", + "DemographicsTimeInCohort" + ) presentSettings <- temporalCovariateSettings[[1]][requiredTimeDistributionSettings] if (!all(unlist(presentSettings))) { warning( "For time distributions diagnostics to display standardized results the following covariates must be present in your temporalCovariateSettings: \n\n", - paste(requiredTimeDistributionSettings, collapse = ", ")) + paste(requiredTimeDistributionSettings, collapse = ", ") + ) } # forcefully set ConditionEraGroupStart and drugEraGroupStart to NULL @@ -389,26 +390,32 @@ executeDiagnostics <- function(cohortDefinitionSet, temporalCovariateSettings[[1]]$ConditionEraGroupStart <- NULL temporalCovariateSettings[[1]]$DrugEraGroupStart <- NULL - checkmate::assert_integerish(x = temporalCovariateSettings[[1]]$temporalStartDays, - any.missing = FALSE, - min.len = 1, - add = errorMessage) - checkmate::assert_integerish(x = temporalCovariateSettings[[1]]$temporalEndDays, - any.missing = FALSE, - min.len = 1, - add = errorMessage) + checkmate::assert_integerish( + x = temporalCovariateSettings[[1]]$temporalStartDays, + any.missing = FALSE, + min.len = 1, + add = errorMessage + ) + checkmate::assert_integerish( + x = temporalCovariateSettings[[1]]$temporalEndDays, + any.missing = FALSE, + min.len = 1, + add = errorMessage + ) checkmate::reportAssertions(collection = errorMessage) # Adding required temporal windows required in results viewer requiredTemporalPairs <- - list(c(-365, 0), - c(-30, 0), - c(-365, -31), - c(-30, -1), - c(0, 0), - c(1, 30), - c(31, 365), - c(-9999, 9999)) + list( + c(-365, 0), + c(-30, 0), + c(-365, -31), + c(-30, -1), + c(0, 0), + c(1, 30), + c(31, 365), + c(-9999, 9999) + ) for (p1 in requiredTemporalPairs) { found <- FALSE for (i in 1:length(temporalCovariateSettings[[1]]$temporalStartDays)) { @@ -444,17 +451,17 @@ executeDiagnostics <- function(cohortDefinitionSet, sort() cohortTableColumnNamesExpected <- getResultsDataModelSpecifications() %>% - dplyr::filter(tableName == "cohort") %>% - dplyr::pull(columnName) %>% - SqlRender::snakeCaseToCamelCase() %>% - sort() + dplyr::filter(tableName == "cohort") %>% + dplyr::pull(columnName) %>% + SqlRender::snakeCaseToCamelCase() %>% + sort() cohortTableColumnNamesRequired <- getResultsDataModelSpecifications() %>% - dplyr::filter(tableName == "cohort") %>% - dplyr::filter(isRequired == "Yes") %>% - dplyr::pull(columnName) %>% - SqlRender::snakeCaseToCamelCase() %>% - sort() + dplyr::filter(tableName == "cohort") %>% + dplyr::filter(isRequired == "Yes") %>% + dplyr::pull(columnName) %>% + SqlRender::snakeCaseToCamelCase() %>% + sort() expectedButNotObsevered <- setdiff(x = cohortTableColumnNamesExpected, y = cohortTableColumnNamesObserved) @@ -534,8 +541,8 @@ executeDiagnostics <- function(cohortDefinitionSet, } } vocabularyVersion <- getVocabularyVersion(connection, vocabularyDatabaseSchema) - - }) + } + ) cohortDefinitionSet$checksum <- computeChecksum(cohortDefinitionSet$sql) @@ -570,7 +577,8 @@ executeDiagnostics <- function(cohortDefinitionSet, snakeCaseToCamelCase = TRUE, tempEmulationSchema = tempEmulationSchema ) - }) + } + ) # Database metadata --------------------------------------------- saveDatabaseMetaData( databaseId = databaseId, @@ -600,7 +608,8 @@ executeDiagnostics <- function(cohortDefinitionSet, minCellCount = minCellCount, databaseId = databaseId ) - }) + } + ) if (nrow(cohortCounts) > 0) { instantiatedCohorts <- cohortCounts %>% @@ -639,7 +648,8 @@ executeDiagnostics <- function(cohortDefinitionSet, minCellCount = minCellCount, recordKeepingFile = recordKeepingFile ) - }) + } + ) } # Concept set diagnostics ----------------------------------------------- @@ -674,7 +684,8 @@ executeDiagnostics <- function(cohortDefinitionSet, conceptIdTable = "#concept_ids", recordKeepingFile = recordKeepingFile ) - }) + } + ) } # Time series ---------------------------------------------------------------------- @@ -700,7 +711,8 @@ executeDiagnostics <- function(cohortDefinitionSet, recordKeepingFile = recordKeepingFile, observationPeriodDateRange = observationPeriodDateRange ) - }) + } + ) } @@ -727,7 +739,8 @@ executeDiagnostics <- function(cohortDefinitionSet, recordKeepingFile = recordKeepingFile, incremental = incremental ) - }) + } + ) } # Incidence rates -------------------------------------------------------------------------------------- @@ -752,7 +765,8 @@ executeDiagnostics <- function(cohortDefinitionSet, recordKeepingFile = recordKeepingFile, incremental = incremental ) - }) + } + ) } # Cohort relationship --------------------------------------------------------------------------------- @@ -777,7 +791,8 @@ executeDiagnostics <- function(cohortDefinitionSet, recordKeepingFile = recordKeepingFile, incremental = incremental ) - }) + } + ) } # Temporal Cohort characterization --------------------------------------------------------------- @@ -813,7 +828,8 @@ executeDiagnostics <- function(cohortDefinitionSet, timeRefFileName = file.path(exportFolder, "temporal_time_ref.csv"), minCharacterizationMean = minCharacterizationMean ) - }) + } + ) } # Store information from the vocabulary on the concepts used ------------------------- @@ -821,8 +837,7 @@ executeDiagnostics <- function(cohortDefinitionSet, exportFolder, "exportConceptInformation", parent = "executeDiagnostics", - expr = - { + expr = { exportConceptInformation( connection = connection, cdmDatabaseSchema = cdmDatabaseSchema, @@ -831,15 +846,15 @@ executeDiagnostics <- function(cohortDefinitionSet, incremental = incremental, exportFolder = exportFolder ) - }) + } + ) # Delete unique concept ID table --------------------------------- ParallelLogger::logTrace("Deleting concept ID table") timeExecution( exportFolder, "DeleteConceptIdTable", parent = "executeDiagnostics", - expr = - { + expr = { sql <- "TRUNCATE TABLE @table;\nDROP TABLE @table;" DatabaseConnector::renderTranslateExecuteSql( connection = connection, @@ -849,7 +864,8 @@ executeDiagnostics <- function(cohortDefinitionSet, progressBar = FALSE, reportOverallTime = FALSE ) - }) + } + ) # Writing metadata file ParallelLogger::logInfo("Retrieving metadata information and writing metadata") @@ -925,7 +941,7 @@ executeDiagnostics <- function(cohortDefinitionSet, # 2 as.character(attr(delta, "units")), # 3 - packageDependencySnapShotJson, + "{}", # 4 callingArgsJson, # 5 @@ -996,7 +1012,8 @@ executeDiagnostics <- function(cohortDefinitionSet, parent = "executeDiagnostics", expr = { writeResultsZip(exportFolder, databaseId) - }) + } + ) ParallelLogger::logInfo( "Computing all diagnostics took ", diff --git a/R/Shiny.R b/R/Shiny.R index fa4031824..d00937df1 100644 --- a/R/Shiny.R +++ b/R/Shiny.R @@ -58,7 +58,6 @@ launchDiagnosticsExplorer <- function(sqliteDbPath = "MergedCohortDiagnosticsDat port = 80, launch.browser = FALSE, enableAnnotation = TRUE) { - if (is.null(shinyConfigPath)) { if (is.null(connectionDetails)) { sqliteDbPath <- normalizePath(sqliteDbPath) @@ -110,34 +109,36 @@ launchDiagnosticsExplorer <- function(sqliteDbPath = "MergedCohortDiagnosticsDat on.exit(options("CD-shiny-config" = NULL)) } - ensure_installed(c("checkmate", - "DatabaseConnector", - "dplyr", - "plyr", - "ggplot2", - "ggiraph", - "gtable", - "htmltools", - "lubridate", - "pool", - "purrr", - "scales", - "shiny", - "shinydashboard", - "shinyWidgets", - "shinyjs", - "shinycssloaders", - "stringr", - "SqlRender", - "tidyr", - "CirceR", - "rmarkdown", - "reactable", - "markdownInput", - "markdown", - "jsonlite", - "ggh4x", - "yaml")) + ensure_installed(c( + "checkmate", + "DatabaseConnector", + "dplyr", + "plyr", + "ggplot2", + "ggiraph", + "gtable", + "htmltools", + "lubridate", + "pool", + "purrr", + "scales", + "shiny", + "shinydashboard", + "shinyWidgets", + "shinyjs", + "shinycssloaders", + "stringr", + "SqlRender", + "tidyr", + "CirceR", + "rmarkdown", + "reactable", + "markdownInput", + "markdown", + "jsonlite", + "ggh4x", + "yaml" + )) appDir <- system.file("shiny", "DiagnosticsExplorer", package = utils::packageName()) @@ -229,7 +230,7 @@ createMergedResultsFile <- createDiagnosticsExplorerZip <- function(outputZipfile = file.path(getwd(), "DiagnosticsExplorer.zip"), sqliteDbPath = "MergedCohortDiagnosticsData.sqlite", shinyDirectory = system.file(file.path("shiny", "DiagnosticsExplorer"), - package = "CohortDiagnostics" + package = "CohortDiagnostics" ), overwrite = FALSE) { outputZipfile <- normalizePath(outputZipfile, mustWork = FALSE) diff --git a/R/TakePackageDependencySnapshot.R b/R/TakePackageDependencySnapshot.R deleted file mode 100644 index 4288acb0c..000000000 --- a/R/TakePackageDependencySnapshot.R +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2022 Observational Health Data Sciences and Informatics -# -# This file is part of CohortDiagnostics -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#' Take a snapshot of the R environment -#' -#' @details -#' This function records all versions used in the R environment as used by runCohortDiagnostics. -#' This function was borrowed from OhdsiRTools -#' -#' @return -#' A data frame listing all the dependencies of the root package and their version numbers, in the -#' order in which they should be installed. -#' -takepackageDependencySnapshot <- function() { - splitPackageList <- function(packageList) { - if (is.null(packageList)) { - return(c()) - } else { - return(strsplit( - gsub( - "\\([^)]*\\)", "", gsub(" ", "", gsub("\n", "", packageList)) - ), - "," - )[[1]]) - } - } - - fetchDependencies <- - function(package, - recursive = TRUE, - level = 0) { - description <- utils::packageDescription(package) - packages <- splitPackageList(description$Depends) - packages <- c(packages, splitPackageList(description$Imports)) - packages <- - c(packages, splitPackageList(description$LinkingTo)) - # Note: if we want to include suggests, we'll need to consider circular references packages <- - # c(packages, splitPackageList(description$Suggests)) - packages <- packages[packages != "R"] - packages <- data.frame( - name = packages, - level = rep( - level, - length(packages) - ), - stringsAsFactors = FALSE - ) - if (recursive && nrow(packages) > 0) { - all <- - lapply(packages$name, - fetchDependencies, - recursive = TRUE, - level = level + 1 - ) - dependencies <- do.call("rbind", all) - if (nrow(dependencies) > 0) { - packages <- rbind(packages, dependencies) - packages <- aggregate(level ~ name, packages, max) - } - } - return(packages) - } - - packages <- - fetchDependencies("CohortDiagnostics", recursive = TRUE) - packages <- packages[order(-packages$level), ] - getVersion <- function(package) { - return(utils::packageDescription(package)$Version) - } - versions <- - sapply(c(packages$name, "CohortDiagnostics"), getVersion) - snapshot <- data.frame( - package = names(versions), - version = as.vector(versions), - stringsAsFactors = FALSE - ) - s <- utils::sessionInfo() - rVersion <- data.frame( - package = "R", - version = paste(s$R.version$major, s$R.version$minor, sep = "."), - stringsAsFactors = FALSE - ) - snapshot <- rbind(rVersion, snapshot) - return(snapshot) -} diff --git a/R/TimeSeries.R b/R/TimeSeries.R index f35c23e33..682442dd2 100644 --- a/R/TimeSeries.R +++ b/R/TimeSeries.R @@ -124,8 +124,8 @@ runCohortTimeSeriesDiagnostics <- function(connectionDetails = NULL, by = clock::duration_months(3) ) ) %>% - dplyr::mutate(periodEnd = clock::add_months(x = periodBegin, n = 3) - 1) %>% - dplyr::mutate(calendarInterval = "q") + dplyr::mutate(periodEnd = clock::add_months(x = periodBegin, n = 3) - 1) %>% + dplyr::mutate(calendarInterval = "q") calendarMonth <- dplyr::tibble( @@ -135,8 +135,8 @@ runCohortTimeSeriesDiagnostics <- function(connectionDetails = NULL, by = clock::duration_months(1) ) ) %>% - dplyr::mutate(periodEnd = clock::add_months(x = periodBegin, n = 1) - 1) %>% - dplyr::mutate(calendarInterval = "m") + dplyr::mutate(periodEnd = clock::add_months(x = periodBegin, n = 1) - 1) %>% + dplyr::mutate(calendarInterval = "m") calendarYear <- dplyr::tibble( @@ -146,8 +146,8 @@ runCohortTimeSeriesDiagnostics <- function(connectionDetails = NULL, by = clock::duration_years(1) ) ) %>% - dplyr::mutate(periodEnd = clock::add_years(x = periodBegin, n = 1) - 1) %>% - dplyr::mutate(calendarInterval = "y") + dplyr::mutate(periodEnd = clock::add_years(x = periodBegin, n = 1) - 1) %>% + dplyr::mutate(calendarInterval = "y") timeSeriesDateRange <- dplyr::tibble( periodBegin = timeSeriesMinDate, @@ -162,9 +162,9 @@ runCohortTimeSeriesDiagnostics <- function(connectionDetails = NULL, calendarYear, timeSeriesDateRange ) %>% # calendarWeek - dplyr::distinct() %>% - dplyr::arrange(periodBegin, periodEnd, calendarInterval) %>% - dplyr::mutate(timeId = dplyr::row_number()) + dplyr::distinct() %>% + dplyr::arrange(periodBegin, periodEnd, calendarInterval) %>% + dplyr::mutate(timeId = dplyr::row_number()) ParallelLogger::logTrace(" - Inserting calendar periods") DatabaseConnector::insertTable( @@ -428,7 +428,7 @@ runCohortTimeSeriesDiagnostics <- function(connectionDetails = NULL, ) resultsInAndromeda$ageGroupGender <- resultsInAndromeda$ageGroupGender %>% - dplyr::mutate(seriesType = !!seriesId) + dplyr::mutate(seriesType = !!seriesId) Andromeda::appendToTable( resultsInAndromeda$allData, resultsInAndromeda$ageGroupGender @@ -452,7 +452,7 @@ runCohortTimeSeriesDiagnostics <- function(connectionDetails = NULL, dplyr::collect() %>% # temporal solution till fix of bug in andromeda on handling dates # periodBegin gets converted to integer dplyr::inner_join(resultsInAndromeda$calendarPeriods %>% dplyr::collect(), - by = c("timeId") + by = c("timeId") ) %>% dplyr::arrange( cohortId, @@ -520,7 +520,6 @@ executeTimeSeriesDiagnostics <- function(connection, recordKeepingFile, observationPeriodDateRange, batchSize = getOption("CohortDiagnostics-TimeSeries-batch-size", default = 20)) { - if (all(!runCohortTimeSeries, !runDataSourceTimeSeries)) { warning( "Both Datasource time series and cohort time series are set to FALSE. Skippping executeTimeSeriesDiagnostics." @@ -566,7 +565,7 @@ executeTimeSeriesDiagnostics <- function(connection, ) } - cohortIds <- subset[start:end,]$cohortId %>% unique() + cohortIds <- subset[start:end, ]$cohortId %>% unique() timeExecution( exportFolder, "runCohortTimeSeriesDiagnostics", @@ -598,12 +597,12 @@ executeTimeSeriesDiagnostics <- function(connection, data = data, fileName = outputFile, incremental = TRUE, - cohortId = subset[start:end,]$cohortId %>% unique() + cohortId = subset[start:end, ]$cohortId %>% unique() ) recordTasksDone( - cohortId = subset[start:end,]$cohortId %>% unique(), + cohortId = subset[start:end, ]$cohortId %>% unique(), task = "runCohortTimeSeries", - checksum = subset[start:end,]$checksum, + checksum = subset[start:end, ]$checksum, recordKeepingFile = recordKeepingFile, incremental = incremental ) @@ -623,8 +622,10 @@ executeTimeSeriesDiagnostics <- function(connection, recordKeepingFile = recordKeepingFile ) - if (all(nrow(subset) == 0, - incremental)) { + if (all( + nrow(subset) == 0, + incremental + )) { ParallelLogger::logInfo("Skipping Data Source Time Series in Incremental mode.") return(NULL) } diff --git a/docs/404.html b/docs/404.html index 653fa5c34..3dd8ed2c3 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@
diff --git a/docs/articles/CreatingAStudyPackage.html b/docs/articles/CreatingAStudyPackage.html index d7205370b..2950d8852 100644 --- a/docs/articles/CreatingAStudyPackage.html +++ b/docs/articles/CreatingAStudyPackage.html @@ -33,7 +33,7 @@ @@ -97,7 +97,7 @@vignettes/CreatingAStudyPackage.Rmd
CreatingAStudyPackage.Rmd
# please ensure you have the latest version of Hydra. As of 08/13/2021 - CohortDiagnostics support for Hydra is still in develop branch.
# please check hydra release notes and update hydra
remotes::install_github("OHDSI/Hydra", ref = "develop")
-outputFolder <- "d:/temp/output" # location where you study package will be created
+outputFolder <- "d:/temp/output" # location where you study package will be created
########## Please populate the information below #####################
@@ -208,62 +208,68 @@ option A: Using Hydra and ROhdsiW
baseUrl <- Sys.getenv("baseUrl")
# if you have security enabled, please authorize the use - example below
# ROhdsiWebApi::authorizeWebApi(baseUrl, 'windows')
-cohortIds <- c(22040,
- 22042,
- 22041,
- 22039,
- 22038,
- 22037,
- 22036,
- 22035,
- 22034,
- 22033,
- 22031,
- 22032,
- 22030,
- 22028,
- 22029)
+cohortIds <- c(
+ 22040,
+ 22042,
+ 22041,
+ 22039,
+ 22038,
+ 22037,
+ 22036,
+ 22035,
+ 22034,
+ 22033,
+ 22031,
+ 22032,
+ 22030,
+ 22028,
+ 22029
+)
################# end of user input ##############
webApiCohorts <- ROhdsiWebApi::getCohortDefinitionsMetaData(baseUrl = baseUrl)
-studyCohorts <- webApiCohorts %>%
- dplyr::filter(.data$id %in% cohortIds)
+studyCohorts <- webApiCohorts %>%
+ dplyr::filter(.data$id %in% cohortIds)
# compile them into a data table
cohortDefinitionsArray <- list()
for (i in (1:nrow(studyCohorts))) {
- cohortDefinition <-
- ROhdsiWebApi::getCohortDefinition(cohortId = studyCohorts$id[[i]],
- baseUrl = baseUrl)
- cohortDefinitionsArray[[i]] <- list(
- id = studyCohorts$id[[i]],
- createdDate = studyCohorts$createdDate[[i]],
- modifiedDate = studyCohorts$createdDate[[i]],
- logicDescription = studyCohorts$description[[i]],
- name = stringr::str_trim(stringr::str_squish(cohortDefinition$name)),
- expression = cohortDefinition$expression
- )
+ cohortDefinition <-
+ ROhdsiWebApi::getCohortDefinition(
+ cohortId = studyCohorts$id[[i]],
+ baseUrl = baseUrl
+ )
+ cohortDefinitionsArray[[i]] <- list(
+ id = studyCohorts$id[[i]],
+ createdDate = studyCohorts$createdDate[[i]],
+ modifiedDate = studyCohorts$createdDate[[i]],
+ logicDescription = studyCohorts$description[[i]],
+ name = stringr::str_trim(stringr::str_squish(cohortDefinition$name)),
+ expression = cohortDefinition$expression
+ )
}
tempFolder <- tempdir()
unlink(x = tempFolder, recursive = TRUE, force = TRUE)
dir.create(path = tempFolder, showWarnings = FALSE, recursive = TRUE)
-specifications <- list(id = 1,
- version = version,
- name = name,
- packageName = packageName,
- skeletonVersion = skeletonVersion,
- createdBy = createdBy,
- createdDate = createdDate,
- modifiedBy = modifiedBy,
- modifiedDate = modifiedDate,
- skeletonType = skeletonType,
- organizationName = organizationName,
- description = description,
- cohortDefinitions = cohortDefinitionsArray)
+specifications <- list(
+ id = 1,
+ version = version,
+ name = name,
+ packageName = packageName,
+ skeletonVersion = skeletonVersion,
+ createdBy = createdBy,
+ createdDate = createdDate,
+ modifiedBy = modifiedBy,
+ modifiedDate = modifiedDate,
+ skeletonType = skeletonType,
+ organizationName = organizationName,
+ description = description,
+ cohortDefinitions = cohortDefinitionsArray
+)
jsonFileName <- paste0(file.path(tempFolder, "CohortDiagnosticsSpecs.json"))
write(x = specifications %>% RJSONIO::toJSON(pretty = TRUE, digits = 23), file = jsonFileName)
@@ -280,13 +286,13 @@ option A: Using Hydra and ROhdsiW
#### get the skeleton from github
# download.file(url = "https://github.com/OHDSI/SkeletonCohortDiagnosticsStudy/archive/refs/heads/main.zip",
# destfile = file.path(tempFolder, 'skeleton.zip'))
-# unzip(zipfile = file.path(tempFolder, 'skeleton.zip'),
+# unzip(zipfile = file.path(tempFolder, 'skeleton.zip'),
# overwrite = TRUE,
# exdir = file.path(tempFolder, "skeleton")
# )
# fileList <- list.files(path = file.path(tempFolder, "skeleton"), full.names = TRUE, recursive = TRUE, all.files = TRUE)
-# DatabaseConnector::createZipFile(zipFile = file.path(tempFolder, 'skeleton.zip'),
-# files = fileList,
+# DatabaseConnector::createZipFile(zipFile = file.path(tempFolder, 'skeleton.zip'),
+# files = fileList,
# rootFolder = list.dirs(file.path(tempFolder, 'skeleton'), recursive = FALSE))
##############################################################
@@ -301,13 +307,14 @@ option A: Using Hydra and ROhdsiW
hydraSpecificationFromFile <- Hydra::loadSpecifications(fileName = jsonFileName)
unlink(x = outputFolder, recursive = TRUE)
dir.create(path = outputFolder, showWarnings = FALSE, recursive = TRUE)
-Hydra::hydrate(specifications = hydraSpecificationFromFile,
- outputFolder = outputFolder
+Hydra::hydrate(
+ specifications = hydraSpecificationFromFile,
+ outputFolder = outputFolder
)
# for advanced user using skeletons outside of Hydra
# Hydra::hydrate(specifications = hydraSpecificationFromFile,
-# outputFolder = outputFolder,
+# outputFolder = outputFolder,
# skeletonFileName = file.path(tempFolder, 'skeleton.zip')
# )
diff --git a/docs/articles/DatabaseModeInDiagnosticsExplorer.html b/docs/articles/DatabaseModeInDiagnosticsExplorer.html
index c0e4afcb6..409421e5e 100644
--- a/docs/articles/DatabaseModeInDiagnosticsExplorer.html
+++ b/docs/articles/DatabaseModeInDiagnosticsExplorer.html
@@ -33,7 +33,7 @@
@@ -97,7 +97,7 @@ Database mode in Diagnostics Explorer
Gowtham
Rao
- 2022-12-06
+ 2022-12-19
Source: vignettes/DatabaseModeInDiagnosticsExplorer.Rmd
DatabaseModeInDiagnosticsExplorer.Rmd
diff --git a/docs/articles/RunningCohortDiagnostics.html b/docs/articles/RunningCohortDiagnostics.html
index 19b970613..645569673 100644
--- a/docs/articles/RunningCohortDiagnostics.html
+++ b/docs/articles/RunningCohortDiagnostics.html
@@ -33,7 +33,7 @@
@@ -97,7 +97,7 @@ Running Cohort Diagnostics
Gowtham Rao and
James P. Gilbert
- 2022-12-06
+ 2022-12-19
Source: vignettes/RunningCohortDiagnostics.Rmd
RunningCohortDiagnostics.Rmd
@@ -122,8 +122,8 @@ pre-requisitesROhdsiWebApi
package to download cohort
definitions from an ATLAS instance:
-remotes::install_github('OHDSI/Eunomia')
-remotes::install_github('OHDSI/ROhdsiWebApi')
remotes::install_github("OHDSI/Eunomia")
+remotes::install_github("OHDSI/ROhdsiWebApi")
library(CohortDiagnostics)
-connectionDetails <- createConnectionDetails(dbms = "postgresql",
- server = "localhost/ohdsi",
- user = "joe",
- password = "supersecret")
For the purposes of this example, we will use the Eunomia test CDM package that is in an Sqlite database stored locally.
@@ -181,10 +183,12 @@Loading cohort references from the OMOP Common Data Model format.
+cohortDefinitionSet <- CohortGenerator::getCohortDefinitionSet( + settingsFileName = "Cohorts.csv", + jsonFolder = "cohorts", + sqlFolder = "sql/sql_server", + packageName = "CohortDiagnostics" +)library(CohortDiagnostics) -cohortDefinitionSet <- CohortGenerator::getCohortDefinitionSet(settingsFileName = "Cohorts.csv", - jsonFolder = "cohorts", - sqlFolder = "sql/sql_server", - packageName = "CohortDiagnostics")
Looking at this data.frame of Cohorts you will see the sql and json for these cohorts:
@@ -204,11 +208,13 @@Loading cohort references from We
# Set up url baseUrl <- "https://atlas.hosting.com/WebAPI" # list of cohort ids -cohortIds <- c(18345,18346) +cohortIds <- c(18345, 18346) -cohortDefinitionSet <- ROhdsiWebApi::exportCohortDefinitionSet(baseUrl = baseUrl, - cohortIds = cohortIds, - generateStats = TRUE)
Consult the ROhdsiWebApi documentation for details on authentication to your atlas instance. Please note that in order to generate inclusion rules statistics (a useful diagnostic tool) the parameter @@ -226,18 +232,22 @@
cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = cohortTable)
# Next create the tables on the database
-CohortGenerator::createCohortTables(connectionDetails = connectionDetails,
- cohortTableNames = cohortTableNames,
- cohortDatabaseSchema = "main",
- incremental = FALSE)
+CohortGenerator::createCohortTables(
+ connectionDetails = connectionDetails,
+ cohortTableNames = cohortTableNames,
+ cohortDatabaseSchema = "main",
+ incremental = FALSE
+)
# Generate the cohort set
-CohortGenerator::generateCohortSet(connectionDetails= connectionDetails,
- cdmDatabaseSchema = cdmDatabaseSchema,
- cohortDatabaseSchema = cohortDatabaseSchema,
- cohortTableNames = cohortTableNames,
- cohortDefinitionSet = cohortDefinitionSet,
- incremental = FALSE)
+CohortGenerator::generateCohortSet(
+ connectionDetails = connectionDetails,
+ cdmDatabaseSchema = cdmDatabaseSchema,
+ cohortDatabaseSchema = cohortDatabaseSchema,
+ cohortTableNames = cohortTableNames,
+ cohortDefinitionSet = cohortDefinitionSet,
+ incremental = FALSE
+)
Note, that the above code will delete an existing
table. However, incremental mode can be used when setting the
parameter incremental = TRUE
.
executeDiagnostics(cohortDefinitionSet,
- connectionDetails = connectionDetails,
- cohortTable = cohortTable,
- cohortDatabaseSchema = cohortDatabaseSchema,
- cdmDatabaseSchema = cdmDatabaseSchema,
- exportFolder = exportFolder,
- databaseId = "MyCdm",
- minCellCount = 5)
+ connectionDetails = connectionDetails,
+ cohortTable = cohortTable,
+ cohortDatabaseSchema = cohortDatabaseSchema,
+ cdmDatabaseSchema = cdmDatabaseSchema,
+ exportFolder = exportFolder,
+ databaseId = "MyCdm",
+ minCellCount = 5
+)
-CohortGenerator::dropCohortStatsTables(connectionDetails = connectionDetails,
- cohortDatabaseSchema = cohortDatabaseSchema,
- cohortTableNames = cohortTableNames)
CohortGenerator::dropCohortStatsTables(
+ connectionDetails = connectionDetails,
+ cohortDatabaseSchema = cohortDatabaseSchema,
+ cohortTableNames = cohortTableNames
+)
vignettes/ViewingResultsUsingDiagnosticsExplorer.Rmd
ViewingResultsUsingDiagnosticsExplorer.Rmd
vignettes/WhatIsCohortDiagnostics.Rmd
WhatIsCohortDiagnostics.Rmd
NEWS.md
+ Bug Fixes:
+Changes:
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 227a7fe6e..0b8c7f096 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -7,5 +7,5 @@ articles: RunningCohortDiagnostics: RunningCohortDiagnostics.html ViewingResultsUsingDiagnosticsExplorer: ViewingResultsUsingDiagnosticsExplorer.html WhatIsCohortDiagnostics: WhatIsCohortDiagnostics.html -last_built: 2022-12-06T21:26Z +last_built: 2022-12-20T00:46Z diff --git a/docs/reference/CohortDiagnostics-package.html b/docs/reference/CohortDiagnostics-package.html index 117984ced..49e7e7bff 100644 --- a/docs/reference/CohortDiagnostics-package.html +++ b/docs/reference/CohortDiagnostics-package.html @@ -17,7 +17,7 @@runCohortTimeSeriesDiagnostics()
Given a set of instantiated cohorts get time series for the cohorts.
Take a snapshot of the R environment